diff --git a/.github/ISSUE_TEMPLATE/kaldi10-issue.md b/.github/ISSUE_TEMPLATE/kaldi10-issue.md
new file mode 100644
index 00000000000..5f2d11d8a0a
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/kaldi10-issue.md
@@ -0,0 +1,9 @@
+---
+name: Kaldi10 issue
+about: This option is for use by core developers only
+title: ''
+labels: kaldi10-TODO
+assignees: ''
+
+---
+
diff --git a/.gitignore b/.gitignore
index 9f219d458a4..267fdc91f5b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -151,3 +151,10 @@ GSYMS
 /tools/cub-1.8.0/
 /tools/cub
 /tools/python/
+/tools/ngram-1.3.7.tar.gz
+/tools/ngram-1.3.7/
+
+# These CMakeLists.txt files are all genareted on the fly at the moment.
+# They are added here to avoid accidently checkin.
+/src/**/CMakeLists.txt
+/build*
diff --git a/.travis.yml b/.travis.yml
index 51e49653efc..92959f16227 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,6 +26,7 @@ addons:
 branches:
   only:
     - master
+    - pybind11
 
 before_install:
   - cat /proc/sys/kernel/core_pattern
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000000..ededc78b8a4
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,244 @@
+cmake_minimum_required(VERSION 3.5)
+project(kaldi)
+
+set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}")
+include(GNUInstallDirs)
+include(Utils)
+include(third_party/get_third_party)
+
+# Should update cmake to a more recent version which supports FindPython3.
+find_package(PythonInterp)
+if(NOT PYTHON_EXECUTABLE OR PYTHON_VERSION_MAJOR LESS 3)
+    message(WARNING "Needs python3 to auto-generate most CMake files, but not found. "
+                    "Will try `python3` directly...")
+    set(PYTHON_EXECUTABLE "python3")
+endif()
+
+message(STATUS "Running gen_cmake_skeleton.py")
+execute_process(COMMAND ${PYTHON_EXECUTABLE}
+    "${CMAKE_CURRENT_SOURCE_DIR}/cmake/gen_cmake_skeleton.py"
+    "${CMAKE_CURRENT_SOURCE_DIR}/src"
+    "--quiet"
+)
+
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_INSTALL_MESSAGE LAZY) # hide "-- Up-to-date: ..."
+if(BUILD_SHARED_LIBS)
+    set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+    if(WIN32)
+        set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
+        message(FATAL_ERROR "DLL is not supported currently")
+    elseif(APPLE)
+        set(CMAKE_INSTALL_RPATH "@loader_path")
+    else()
+        set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../lib")
+    endif()
+endif()
+
+if(APPLE)
+    # Use built-in BLAS on MacOS by default.
+    set(MATHLIB "Accelerate" CACHE STRING "OpenBLAS|MKL|Accelerate")
+else()
+    set(MATHLIB "OpenBLAS" CACHE STRING "OpenBLAS|MKL|Accelerate")
+endif()
+option(KALDI_BUILD_EXE "If disabled, will make add_kaldi_executable a no-op" ON)
+option(KALDI_BUILD_TEST "If disabled, will make add_kaldi_test_executable a no-op" ON)
+option(KALDI_USE_PATCH_NUMBER "Use MAJOR.MINOR.PATCH format, otherwise MAJOR.MINOR" OFF)
+
+if (KALDI_BUILD_TEST)
+    include(CTest)
+    enable_testing()
+endif()
+
+link_libraries(${CMAKE_DL_LIBS})
+
+find_package(Threads)
+link_libraries(Threads::Threads)
+
+if(MATHLIB STREQUAL "OpenBLAS")
+    set(BLA_VENDOR "OpenBLAS")
+    find_package(LAPACK REQUIRED)
+    add_definitions(-DHAVE_CLAPACK=1)
+    include_directories(${CMAKE_CURRENT_SOURCE_DIR}/tools/CLAPACK)
+    link_libraries(${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
+elseif(MATHLIB STREQUAL "MKL")
+    set(BLA_VENDOR "Intel10_64lp")
+    # find_package(BLAS REQUIRED)
+    normalize_env_path(ENV{MKLROOT})
+    find_package(LAPACK REQUIRED)
+    add_definitions(-DHAVE_MKL=1)
+    include_directories($ENV{MKLROOT}/include) # TODO: maybe not use env, idk, find_package doesnt handle includes...
+    link_libraries(${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
+elseif(MATHLIB STREQUAL "Accelerate")
+    execute_process(COMMAND sw_vers -productVersion
+        OUTPUT_VARIABLE MACOS_VERSION)
+    if(MACOS_VERSION VERSION_LESS "10.12" AND MACOS_VERSION VERSION_GREATER_EQUAL "10.11")
+        message(WARNING
+            "**BAD WARNING**: You are using OS X El Capitan.  Some versions of this OS"
+            " have a bug in the BLAS implementation that affects Kaldi."
+            " After compiling, cd to matrix/ and type 'make test'.  The"
+            " test will fail if the problem exists in your version."
+            " Eventually this issue will be fixed by system updates from"
+            " Apple.  Unexplained crashes with reports of NaNs will"
+            " be caused by this bug, but some recipes will (sometimes) work."
+        )
+    endif()
+    set(BLA_VENDOR "Apple")
+    find_package(BLAS REQUIRED)
+    find_package(LAPACK REQUIRED)
+    add_definitions(-DHAVE_CLAPACK=1)
+    link_libraries(${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
+else()
+    message(FATAL_ERROR "${MATHLIB} is not tested and supported, you are on your own now.")
+endif()
+
+if(MSVC)
+    # Added in source, but we actually should do it in build script, whatever...
+    # add_definitions(-DWIN32_LEAN_AND_MEAN=1)
+
+    add_compile_options(/permissive- /FS /wd4819 /EHsc /bigobj)
+
+    # some warnings related with fst
+    add_compile_options(/wd4018 /wd4244 /wd4267 /wd4291 /wd4305)
+
+    set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
+    if(NOT DEFINED ENV{CUDAHOSTCXX})
+        set(ENV{CUDAHOSTCXX} ${CMAKE_CXX_COMPILER})
+    endif()
+    if(NOT DEFINED CUDA_HOST_COMPILER)
+        set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
+    endif()
+endif()
+
+find_package(CUDA)
+if(CUDA_FOUND)
+    set(CUB_ROOT_DIR "${PROJECT_SOURCE_DIR}/tools/cub")
+
+    set(CUDA_PROPAGATE_HOST_FLAGS ON)
+    set(KALDI_CUDA_NVCC_FLAGS "--default-stream=per-thread;-std=c++${CMAKE_CXX_STANDARD}")
+    if(MSVC)
+        list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler /permissive-,/FS,/wd4819,/EHsc,/bigobj")
+        list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler /wd4018,/wd4244,/wd4267,/wd4291,/wd4305")
+        if(BUILD_SHARED_LIBS)
+            list(APPEND CUDA_NVCC_FLAGS_RELEASE -Xcompiler /MD)
+            list(APPEND CUDA_NVCC_FLAGS_DEBUG -Xcompiler /MDd)
+        endif()
+    else()
+    #     list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -std=c++${CMAKE_CXX_STANDARD}")
+        list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
+    endif()
+    set(CUDA_NVCC_FLAGS ${KALDI_CUDA_NVCC_FLAGS} ${CUDA_NVCC_FLAGS})
+
+    add_definitions(-DHAVE_CUDA=1)
+    add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM=1)
+    include_directories(${CUDA_INCLUDE_DIRS})
+    link_libraries(
+        ${CUDA_LIBRARIES}
+        ${CUDA_CUDA_LIBRARY}
+        ${CUDA_CUBLAS_LIBRARIES}
+        ${CUDA_CUFFT_LIBRARIES}
+        ${CUDA_curand_LIBRARY}
+        ${CUDA_cusolver_LIBRARY}
+        ${CUDA_cusparse_LIBRARY})
+
+    find_package(NvToolExt REQUIRED)
+    include_directories(${NvToolExt_INCLUDE_DIR})
+    link_libraries(${NvToolExt_LIBRARIES})
+
+    find_package(CUB REQUIRED)
+    include_directories(${CUB_INCLUDE_DIR})
+endif()
+
+add_definitions(-DKALDI_NO_PORTAUDIO=1)
+
+include(VersionHelper)
+get_version() # this will set KALDI_VERSION and KALDI_PATCH_NUMBER
+if(${KALDI_USE_PATCH_NUMBER})
+    set(KALDI_VERSION "${KALDI_VERSION}.${KALDI_PATCH_NUMBER}")
+endif()
+
+get_third_party(openfst)
+set(OPENFST_ROOT_DIR ${CMAKE_BINARY_DIR}/openfst)
+include(third_party/openfst_lib_target)
+link_libraries(fst)
+
+# add all native libraries
+add_subdirectory(src/base) # NOTE, we need to patch the target with version from outside
+set_property(TARGET kaldi-base PROPERTY COMPILE_DEFINITIONS "KALDI_VERSION=\"${KALDI_VERSION}\"")
+add_subdirectory(src/matrix)
+add_subdirectory(src/cudamatrix)
+add_subdirectory(src/util)
+add_subdirectory(src/feat)
+add_subdirectory(src/tree)
+add_subdirectory(src/gmm)
+add_subdirectory(src/transform)
+add_subdirectory(src/sgmm2)
+add_subdirectory(src/fstext)
+add_subdirectory(src/hmm)
+add_subdirectory(src/lm)
+add_subdirectory(src/decoder)
+add_subdirectory(src/lat)
+add_subdirectory(src/nnet)
+add_subdirectory(src/nnet2)
+add_subdirectory(src/nnet3)
+add_subdirectory(src/rnnlm)
+add_subdirectory(src/chain)
+add_subdirectory(src/ivector)
+add_subdirectory(src/online)
+add_subdirectory(src/online2)
+add_subdirectory(src/kws)
+
+add_subdirectory(src/itf)
+
+if(TENSORFLOW_DIR)
+    add_subdirectory(src/tfrnnlm)
+    add_subdirectory(src/tfrnnlmbin)
+endif()
+
+# add all cuda libraries
+if(CUDA_FOUND)
+    add_subdirectory(src/cudafeat)
+    add_subdirectory(src/cudadecoder)
+endif()
+
+# add all native executables
+add_subdirectory(src/bin)
+add_subdirectory(src/gmmbin)
+add_subdirectory(src/featbin)
+add_subdirectory(src/sgmm2bin)
+add_subdirectory(src/fstbin)
+add_subdirectory(src/lmbin)
+add_subdirectory(src/latbin)
+add_subdirectory(src/nnetbin)
+add_subdirectory(src/nnet2bin)
+add_subdirectory(src/nnet3bin)
+add_subdirectory(src/rnnlmbin)
+add_subdirectory(src/chainbin)
+add_subdirectory(src/ivectorbin)
+add_subdirectory(src/onlinebin)
+add_subdirectory(src/online2bin)
+add_subdirectory(src/kwsbin)
+
+# add all cuda executables
+if(CUDA_FOUND)
+    add_subdirectory(src/cudafeatbin)
+    add_subdirectory(src/cudadecoderbin)
+endif()
+
+include(CMakePackageConfigHelpers)
+# maybe we should put this into subfolder?
+configure_package_config_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/cmake/kaldi-config.cmake.in
+    ${CMAKE_BINARY_DIR}/cmake/kaldi-config.cmake
+    INSTALL_DESTINATION lib/cmake/kaldi
+)
+write_basic_package_version_file(
+    ${CMAKE_BINARY_DIR}/cmake/kaldi-config-version.cmake
+    VERSION ${KALDI_VERSION}
+    COMPATIBILITY AnyNewerVersion
+)
+install(FILES ${CMAKE_BINARY_DIR}/cmake/kaldi-config.cmake ${CMAKE_BINARY_DIR}/cmake/kaldi-config-version.cmake
+    DESTINATION lib/cmake/kaldi
+)
+install(EXPORT kaldi-targets DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/cmake/kaldi)
diff --git a/INSTALL b/INSTALL
index 2dbf318118c..7beb79a7336 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,9 +1,16 @@
 This is the official Kaldi INSTALL. Look also at INSTALL.md for the git mirror installation.
-[for native Windows install, see windows/INSTALL]
+[Option 1 in the following does not apply to native Windows install, see windows/INSTALL or following Option 2]
 
-(1)
-go to tools/  and follow INSTALL instructions there.
+Option 1 (bash + makefile):
 
-(2) 
-go to src/ and follow INSTALL instructions there.
+  Steps:
+    (1)
+    go to tools/  and follow INSTALL instructions there.
 
+    (2)
+    go to src/ and follow INSTALL instructions there.
+
+Option 2 (cmake):
+
+    Go to cmake/ and follow INSTALL.md instructions there.
+    Note, it may not be well tested and some features are missing currently.
diff --git a/cmake/FindBLAS.cmake b/cmake/FindBLAS.cmake
new file mode 100644
index 00000000000..67676110c6d
--- /dev/null
+++ b/cmake/FindBLAS.cmake
@@ -0,0 +1,816 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FindBLAS
+--------
+
+Find Basic Linear Algebra Subprograms (BLAS) library
+
+This module finds an installed Fortran library that implements the
+BLAS linear-algebra interface (see http://www.netlib.org/blas/).  The
+list of libraries searched for is taken from the ``autoconf`` macro file,
+``acx_blas.m4`` (distributed at
+http://ac-archive.sourceforge.net/ac-archive/acx_blas.html).
+
+Input Variables
+^^^^^^^^^^^^^^^
+
+The following variables may be set to influence this module's behavior:
+
+``BLA_STATIC``
+  if ``ON`` use static linkage
+
+``BLA_VENDOR``
+  If set, checks only the specified vendor, if not set checks all the
+  possibilities.  List of vendors valid in this module:
+
+  * Goto
+  * OpenBLAS
+  * FLAME
+  * ATLAS PhiPACK
+  * CXML
+  * DXML
+  * SunPerf
+  * SCSL
+  * SGIMATH
+  * IBMESSL
+  * Intel10_32 (intel mkl v10 32 bit)
+  * Intel10_64lp (intel mkl v10+ 64 bit, threaded code, lp64 model)
+  * Intel10_64lp_seq (intel mkl v10+ 64 bit, sequential code, lp64 model)
+  * Intel10_64ilp (intel mkl v10+ 64 bit, threaded code, ilp64 model)
+  * Intel10_64ilp_seq (intel mkl v10+ 64 bit, sequential code, ilp64 model)
+  * Intel (obsolete versions of mkl 32 and 64 bit)
+  * ACML
+  * ACML_MP
+  * ACML_GPU
+  * Apple
+  * NAS
+  * Generic
+
+``BLA_F95``
+  if ``ON`` tries to find the BLAS95 interfaces
+
+``BLA_PREFER_PKGCONFIG``
+  if set ``pkg-config`` will be used to search for a BLAS library first
+  and if one is found that is preferred
+
+Result Variables
+^^^^^^^^^^^^^^^^
+
+This module defines the following variables:
+
+``BLAS_FOUND``
+  library implementing the BLAS interface is found
+``BLAS_LINKER_FLAGS``
+  uncached list of required linker flags (excluding ``-l`` and ``-L``).
+``BLAS_LIBRARIES``
+  uncached list of libraries (using full path name) to link against
+  to use BLAS (may be empty if compiler implicitly links BLAS)
+``BLAS95_LIBRARIES``
+  uncached list of libraries (using full path name) to link against
+  to use BLAS95 interface
+``BLAS95_FOUND``
+  library implementing the BLAS95 interface is found
+
+.. note::
+
+  C or CXX must be enabled to use Intel Math Kernel Library (MKL)
+
+  For example, to use Intel MKL libraries and/or Intel compiler:
+
+  .. code-block:: cmake
+
+    set(BLA_VENDOR Intel10_64lp)
+    find_package(BLAS)
+
+Hints
+^^^^^
+
+Set ``MKLROOT`` environment variable to a directory that contains an MKL
+installation.
+
+#]=======================================================================]
+
+include(CheckFunctionExists)
+include(CheckFortranFunctionExists)
+include(CMakePushCheckState)
+include(FindPackageHandleStandardArgs)
+cmake_push_check_state()
+set(CMAKE_REQUIRED_QUIET ${BLAS_FIND_QUIETLY})
+
+set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
+
+# Check the language being used
+if( NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED) )
+  if(BLAS_FIND_REQUIRED)
+    message(FATAL_ERROR "FindBLAS requires Fortran, C, or C++ to be enabled.")
+  else()
+    message(STATUS "Looking for BLAS... - NOT found (Unsupported languages)")
+    return()
+  endif()
+endif()
+
+if(BLA_PREFER_PKGCONFIG)
+  find_package(PkgConfig)
+  pkg_check_modules(PKGC_BLAS blas)
+  if(PKGC_BLAS_FOUND)
+    set(BLAS_FOUND ${PKGC_BLAS_FOUND})
+    set(BLAS_LIBRARIES "${PKGC_BLAS_LINK_LIBRARIES}")
+    return()
+  endif()
+endif()
+
+macro(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list _thread)
+  # This macro checks for the existence of the combination of fortran libraries
+  # given by _list.  If the combination is found, this macro checks (using the
+  # Check_Fortran_Function_Exists macro) whether can link against that library
+  # combination using the name of a routine given by _name using the linker
+  # flags given by _flags.  If the combination of libraries is found and passes
+  # the link test, LIBRARIES is set to the list of complete library paths that
+  # have been found.  Otherwise, LIBRARIES is set to FALSE.
+
+  # N.B. _prefix is the prefix applied to the names of all cached variables that
+  # are generated internally and marked advanced by this macro.
+
+  set(_libdir ${ARGN})
+
+  set(_libraries_work TRUE)
+  set(${LIBRARIES})
+  set(_combined_name)
+  if (NOT _libdir)
+    if (WIN32)
+      set(_libdir ENV LIB)
+    elseif (APPLE)
+      set(_libdir ENV DYLD_LIBRARY_PATH)
+    else ()
+      set(_libdir ENV LD_LIBRARY_PATH)
+    endif ()
+  endif ()
+
+  list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}")
+
+  foreach(_library ${_list})
+    set(_combined_name ${_combined_name}_${_library})
+    if(NOT "${_thread}" STREQUAL "")
+      set(_combined_name ${_combined_name}_thread)
+    endif()
+    if(_libraries_work)
+      if (BLA_STATIC)
+        if (WIN32)
+          set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES})
+        endif ()
+        if (APPLE)
+          set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES})
+        else ()
+          set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
+        endif ()
+      else ()
+        if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+          # for ubuntu's libblas3gf and liblapack3gf packages
+          set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf)
+        endif ()
+      endif ()
+      find_library(${_prefix}_${_library}_LIBRARY
+        NAMES ${_library}
+        PATHS ${_libdir}
+        )
+      mark_as_advanced(${_prefix}_${_library}_LIBRARY)
+      set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})
+      set(_libraries_work ${${_prefix}_${_library}_LIBRARY})
+    endif()
+  endforeach()
+  if(_libraries_work)
+    # Test this combination of libraries.
+    set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_thread})
+    #  message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}")
+    if (CMAKE_Fortran_COMPILER_LOADED)
+      check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS)
+    else()
+      check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS)
+    endif()
+    set(CMAKE_REQUIRED_LIBRARIES)
+    set(_libraries_work ${${_prefix}${_combined_name}_WORKS})
+  endif()
+  if(_libraries_work)
+    if("${_list}" STREQUAL "")
+      set(${LIBRARIES} "${LIBRARIES}-PLACEHOLDER-FOR-EMPTY-LIBRARIES")
+    else()
+      set(${LIBRARIES} ${${LIBRARIES}} ${_thread})  # for static link
+    endif()
+  else()
+    set(${LIBRARIES} FALSE)
+  endif()
+  #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}")
+endmacro()
+
+set(BLAS_LINKER_FLAGS)
+set(BLAS_LIBRARIES)
+set(BLAS95_LIBRARIES)
+if (NOT $ENV{BLA_VENDOR} STREQUAL "")
+  set(BLA_VENDOR $ENV{BLA_VENDOR})
+else ()
+  if(NOT BLA_VENDOR)
+    set(BLA_VENDOR "All")
+  endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    # Implicitly linked BLAS libraries
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      ""
+      ""
+      )
+  endif()
+endif ()
+
+#BLAS in intel mkl 10+ library? (em64t 64bit)
+if (BLA_VENDOR MATCHES "Intel" OR BLA_VENDOR STREQUAL "All")
+  if (NOT BLAS_LIBRARIES)
+
+    # System-specific settings
+    if (WIN32)
+      if (BLA_STATIC)
+        set(BLAS_mkl_DLL_SUFFIX "")
+      else()
+        set(BLAS_mkl_DLL_SUFFIX "_dll")
+      endif()
+    else()
+      # Switch to GNU Fortran support layer if needed (but not on Apple, where MKL does not provide it)
+      if(CMAKE_Fortran_COMPILER_LOADED AND CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" AND NOT APPLE)
+          set(BLAS_mkl_INTFACE "gf")
+          set(BLAS_mkl_THREADING "gnu")
+          set(BLAS_mkl_OMP "gomp")
+      else()
+          set(BLAS_mkl_INTFACE "intel")
+          set(BLAS_mkl_THREADING "intel")
+          set(BLAS_mkl_OMP "iomp5")
+      endif()
+      set(BLAS_mkl_LM "-lm")
+      set(BLAS_mkl_LDL "-ldl")
+    endif()
+
+    if (BLA_VENDOR MATCHES "_64ilp")
+      set(BLAS_mkl_ILP_MODE "ilp64")
+    else ()
+      set(BLAS_mkl_ILP_MODE "lp64")
+    endif ()
+
+    if (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)
+      if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED)
+        find_package(Threads)
+      else()
+        find_package(Threads REQUIRED)
+      endif()
+
+      set(BLAS_SEARCH_LIBS "")
+
+      if(BLA_F95)
+        set(BLAS_mkl_SEARCH_SYMBOL sgemm_f95)
+        set(_LIBRARIES BLAS95_LIBRARIES)
+        if (WIN32)
+          # Find the main file (32-bit or 64-bit)
+          set(BLAS_SEARCH_LIBS_WIN_MAIN "")
+          if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
+            list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
+              "mkl_blas95${BLAS_mkl_DLL_SUFFIX} mkl_intel_c${BLAS_mkl_DLL_SUFFIX}")
+          endif()
+          if (BLA_VENDOR MATCHES "^Intel10_64i?lp" OR BLA_VENDOR STREQUAL "All")
+            list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
+              "mkl_blas95_${BLAS_mkl_ILP_MODE}${BLAS_mkl_DLL_SUFFIX} mkl_intel_${BLAS_mkl_ILP_MODE}${BLAS_mkl_DLL_SUFFIX}")
+          endif ()
+
+          # Add threading/sequential libs
+          set(BLAS_SEARCH_LIBS_WIN_THREAD "")
+          if (BLA_VENDOR MATCHES "_seq$" OR BLA_VENDOR STREQUAL "All")
+            list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+              "mkl_sequential${BLAS_mkl_DLL_SUFFIX}")
+          endif()
+          if (NOT BLA_VENDOR MATCHES "_seq$" OR BLA_VENDOR STREQUAL "All")
+            # old version
+            list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+              "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
+            # mkl >= 10.3
+            list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+              "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
+          endif()
+
+          # Cartesian product of the above
+          foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN})
+            foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD})
+              list(APPEND BLAS_SEARCH_LIBS
+                "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}")
+            endforeach()
+          endforeach()
+        else ()
+          if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
+            # old version
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_blas95 mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide")
+
+            # mkl >= 10.3
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_blas95 mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_OMP}")
+          endif ()
+          if (BLA_VENDOR MATCHES "^Intel10_64i?lp$" OR BLA_VENDOR STREQUAL "All")
+            # old version
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_blas95 mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide")
+
+            # mkl >= 10.3
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_blas95_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_OMP}")
+          endif ()
+          if (BLA_VENDOR MATCHES "^Intel10_64i?lp_seq$" OR BLA_VENDOR STREQUAL "All")
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_blas95_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_sequential mkl_core")
+          endif ()
+        endif ()
+      else ()
+        set(BLAS_mkl_SEARCH_SYMBOL sgemm)
+        set(_LIBRARIES BLAS_LIBRARIES)
+        if (WIN32)
+          # Find the main file (32-bit or 64-bit)
+          set(BLAS_SEARCH_LIBS_WIN_MAIN "")
+          if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
+            list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
+              "mkl_intel_c${BLAS_mkl_DLL_SUFFIX}")
+          endif()
+          if (BLA_VENDOR MATCHES "^Intel10_64i?lp" OR BLA_VENDOR STREQUAL "All")
+            list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
+              "mkl_intel_${BLAS_mkl_ILP_MODE}${BLAS_mkl_DLL_SUFFIX}")
+          endif ()
+
+          # Add threading/sequential libs
+          set(BLAS_SEARCH_LIBS_WIN_THREAD "")
+          if (NOT BLA_VENDOR MATCHES "_seq$" OR BLA_VENDOR STREQUAL "All")
+            # old version
+            list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+              "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
+            # mkl >= 10.3
+            list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+              "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
+          endif()
+          if (BLA_VENDOR MATCHES "_seq$" OR BLA_VENDOR STREQUAL "All")
+            list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+              "mkl_sequential${BLAS_mkl_DLL_SUFFIX}")
+          endif()
+
+          # Cartesian product of the above
+          foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN})
+            foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD})
+              list(APPEND BLAS_SEARCH_LIBS
+                "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}")
+            endforeach()
+          endforeach()
+        else ()
+          if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
+            # old version
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide")
+
+            # mkl >= 10.3
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_OMP}")
+          endif ()
+          if (BLA_VENDOR MATCHES "^Intel10_64i?lp$" OR BLA_VENDOR STREQUAL "All")
+            # old version
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide")
+
+            # mkl >= 10.3
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_OMP}")
+          endif ()
+          if (BLA_VENDOR MATCHES "^Intel10_64i?lp_seq$" OR BLA_VENDOR STREQUAL "All")
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_sequential mkl_core")
+          endif ()
+
+          #older vesions of intel mkl libs
+          if (BLA_VENDOR STREQUAL "Intel" OR BLA_VENDOR STREQUAL "All")
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl")
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_ia32")
+            list(APPEND BLAS_SEARCH_LIBS
+              "mkl_em64t")
+          endif ()
+        endif ()
+      endif ()
+
+      if (DEFINED ENV{MKLROOT})
+        if (BLA_VENDOR STREQUAL "Intel10_32")
+          set(_BLAS_MKLROOT_LIB_DIR "$ENV{MKLROOT}/lib/ia32")
+        elseif (BLA_VENDOR MATCHES "^Intel10_64i?lp$" OR BLA_VENDOR MATCHES "^Intel10_64i?lp_seq$")
+          set(_BLAS_MKLROOT_LIB_DIR "$ENV{MKLROOT}/lib/intel64")
+        endif ()
+      endif ()
+      if (_BLAS_MKLROOT_LIB_DIR)
+        if (WIN32)
+          string(APPEND _BLAS_MKLROOT_LIB_DIR "_win")
+        elseif (APPLE)
+          string(APPEND _BLAS_MKLROOT_LIB_DIR "_mac")
+        else ()
+          string(APPEND _BLAS_MKLROOT_LIB_DIR "_lin")
+        endif ()
+      endif ()
+
+      foreach (IT ${BLAS_SEARCH_LIBS})
+        string(REPLACE " " ";" SEARCH_LIBS ${IT})
+        if (NOT ${_LIBRARIES})
+          check_fortran_libraries(
+            ${_LIBRARIES}
+            BLAS
+            ${BLAS_mkl_SEARCH_SYMBOL}
+            ""
+            "${SEARCH_LIBS}"
+            "${CMAKE_THREAD_LIBS_INIT};${BLAS_mkl_LM};${BLAS_mkl_LDL}"
+            "${_BLAS_MKLROOT_LIB_DIR}"
+            )
+        endif ()
+      endforeach ()
+
+    endif ()
+    unset(BLAS_mkl_ILP_MODE)
+    unset(BLAS_mkl_INTFACE)
+    unset(BLAS_mkl_THREADING)
+    unset(BLAS_mkl_OMP)
+    unset(BLAS_mkl_DLL_SUFFIX)
+    unset(BLAS_mkl_LM)
+    unset(BLAS_mkl_LDL)
+  endif ()
+endif ()
+
+if(BLA_F95)
+  find_package_handle_standard_args(BLAS REQUIRED_VARS BLAS95_LIBRARIES)
+  set(BLAS95_FOUND ${BLAS_FOUND})
+  if(BLAS_FOUND)
+    set(BLAS_LIBRARIES "${BLAS95_LIBRARIES}")
+  endif()
+endif()
+
+if (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    # gotoblas (http://www.tacc.utexas.edu/tacc-projects/gotoblas2)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "goto2"
+      ""
+      )
+  endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "OpenBLAS" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    # OpenBLAS (http://www.openblas.net)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "openblas"
+      ""
+      )
+  endif()
+  if(NOT BLAS_LIBRARIES)
+    find_package(Threads)
+    # OpenBLAS (http://www.openblas.net)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "openblas"
+      "${CMAKE_THREAD_LIBS_INIT}"
+      )
+  endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "FLAME" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    # FLAME's blis library (https://github.com/flame/blis)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "blis"
+      ""
+      )
+  endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "ATLAS" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      dgemm
+      ""
+      "f77blas;atlas"
+      ""
+      )
+  endif()
+endif ()
+
+# BLAS in PhiPACK libraries? (requires generic BLAS lib, too)
+if (BLA_VENDOR STREQUAL "PhiPACK" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "sgemm;dgemm;blas"
+      ""
+      )
+  endif()
+endif ()
+
+# BLAS in Alpha CXML library?
+if (BLA_VENDOR STREQUAL "CXML" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "cxml"
+      ""
+      )
+  endif()
+endif ()
+
+# BLAS in Alpha DXML library? (now called CXML, see above)
+if (BLA_VENDOR STREQUAL "DXML" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "dxml"
+      ""
+      )
+  endif()
+endif ()
+
+# BLAS in Sun Performance library?
+if (BLA_VENDOR STREQUAL "SunPerf" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      "-xlic_lib=sunperf"
+      "sunperf;sunmath"
+      ""
+      )
+    if(BLAS_LIBRARIES)
+      set(BLAS_LINKER_FLAGS "-xlic_lib=sunperf")
+    endif()
+  endif()
+endif ()
+
+# BLAS in SCSL library?  (SGI/Cray Scientific Library)
+if (BLA_VENDOR STREQUAL "SCSL" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "scsl"
+      ""
+      )
+  endif()
+endif ()
+
+# BLAS in SGIMATH library?
+if (BLA_VENDOR STREQUAL "SGIMATH" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "complib.sgimath"
+      ""
+      )
+  endif()
+endif ()
+
+# BLAS in IBM ESSL library? (requires generic BLAS lib, too)
+if (BLA_VENDOR STREQUAL "IBMESSL" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "essl;blas"
+      ""
+      )
+  endif()
+endif ()
+
+#BLAS in acml library?
+if (BLA_VENDOR MATCHES "ACML" OR BLA_VENDOR STREQUAL "All")
+  if( ((BLA_VENDOR STREQUAL "ACML") AND (NOT BLAS_ACML_LIB_DIRS)) OR
+    ((BLA_VENDOR STREQUAL "ACML_MP") AND (NOT BLAS_ACML_MP_LIB_DIRS)) OR
+    ((BLA_VENDOR STREQUAL "ACML_GPU") AND (NOT BLAS_ACML_GPU_LIB_DIRS))
+    )
+  # try to find acml in "standard" paths
+  if( WIN32 )
+    file( GLOB _ACML_ROOT "C:/AMD/acml*/ACML-EULA.txt" )
+  else()
+    file( GLOB _ACML_ROOT "/opt/acml*/ACML-EULA.txt" )
+  endif()
+  if( WIN32 )
+    file( GLOB _ACML_GPU_ROOT "C:/AMD/acml*/GPGPUexamples" )
+  else()
+    file( GLOB _ACML_GPU_ROOT "/opt/acml*/GPGPUexamples" )
+  endif()
+  list(GET _ACML_ROOT 0 _ACML_ROOT)
+  list(GET _ACML_GPU_ROOT 0 _ACML_GPU_ROOT)
+  if( _ACML_ROOT )
+    get_filename_component( _ACML_ROOT ${_ACML_ROOT} PATH )
+    if( SIZEOF_INTEGER EQUAL 8 )
+      set( _ACML_PATH_SUFFIX "_int64" )
+    else()
+      set( _ACML_PATH_SUFFIX "" )
+    endif()
+    if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" )
+      set( _ACML_COMPILER32 "ifort32" )
+      set( _ACML_COMPILER64 "ifort64" )
+    elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "SunPro" )
+      set( _ACML_COMPILER32 "sun32" )
+      set( _ACML_COMPILER64 "sun64" )
+    elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
+      set( _ACML_COMPILER32 "pgi32" )
+      if( WIN32 )
+        set( _ACML_COMPILER64 "win64" )
+      else()
+        set( _ACML_COMPILER64 "pgi64" )
+      endif()
+    elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "Open64" )
+      # 32 bit builds not supported on Open64 but for code simplicity
+      # We'll just use the same directory twice
+      set( _ACML_COMPILER32 "open64_64" )
+      set( _ACML_COMPILER64 "open64_64" )
+    elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "NAG" )
+      set( _ACML_COMPILER32 "nag32" )
+      set( _ACML_COMPILER64 "nag64" )
+    else()
+      set( _ACML_COMPILER32 "gfortran32" )
+      set( _ACML_COMPILER64 "gfortran64" )
+    endif()
+
+    if( BLA_VENDOR STREQUAL "ACML_MP" )
+      set(_ACML_MP_LIB_DIRS
+        "${_ACML_ROOT}/${_ACML_COMPILER32}_mp${_ACML_PATH_SUFFIX}/lib"
+        "${_ACML_ROOT}/${_ACML_COMPILER64}_mp${_ACML_PATH_SUFFIX}/lib" )
+    else()
+      set(_ACML_LIB_DIRS
+        "${_ACML_ROOT}/${_ACML_COMPILER32}${_ACML_PATH_SUFFIX}/lib"
+        "${_ACML_ROOT}/${_ACML_COMPILER64}${_ACML_PATH_SUFFIX}/lib" )
+    endif()
+  endif()
+elseif(BLAS_${BLA_VENDOR}_LIB_DIRS)
+  set(_${BLA_VENDOR}_LIB_DIRS ${BLAS_${BLA_VENDOR}_LIB_DIRS})
+endif()
+
+if( BLA_VENDOR STREQUAL "ACML_MP" )
+  foreach( BLAS_ACML_MP_LIB_DIRS ${_ACML_MP_LIB_DIRS})
+    check_fortran_libraries (
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      "" "acml_mp;acml_mv" "" ${BLAS_ACML_MP_LIB_DIRS}
+      )
+    if( BLAS_LIBRARIES )
+      break()
+    endif()
+  endforeach()
+elseif( BLA_VENDOR STREQUAL "ACML_GPU" )
+  foreach( BLAS_ACML_GPU_LIB_DIRS ${_ACML_GPU_LIB_DIRS})
+    check_fortran_libraries (
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      "" "acml;acml_mv;CALBLAS" "" ${BLAS_ACML_GPU_LIB_DIRS}
+      )
+    if( BLAS_LIBRARIES )
+      break()
+    endif()
+  endforeach()
+else()
+  foreach( BLAS_ACML_LIB_DIRS ${_ACML_LIB_DIRS} )
+    check_fortran_libraries (
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      "" "acml;acml_mv" "" ${BLAS_ACML_LIB_DIRS}
+      )
+    if( BLAS_LIBRARIES )
+      break()
+    endif()
+  endforeach()
+endif()
+
+# Either acml or acml_mp should be in LD_LIBRARY_PATH but not both
+if(NOT BLAS_LIBRARIES)
+  check_fortran_libraries(
+    BLAS_LIBRARIES
+    BLAS
+    sgemm
+    ""
+    "acml;acml_mv"
+    ""
+    )
+endif()
+if(NOT BLAS_LIBRARIES)
+  check_fortran_libraries(
+    BLAS_LIBRARIES
+    BLAS
+    sgemm
+    ""
+    "acml_mp;acml_mv"
+    ""
+    )
+endif()
+if(NOT BLAS_LIBRARIES)
+  check_fortran_libraries(
+    BLAS_LIBRARIES
+    BLAS
+    sgemm
+    ""
+    "acml;acml_mv;CALBLAS"
+    ""
+    )
+endif()
+endif () # ACML
+
+# Apple BLAS library?
+if (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      dgemm
+      ""
+      "Accelerate"
+      ""
+      )
+  endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All")
+  if ( NOT BLAS_LIBRARIES )
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      dgemm
+      ""
+      "vecLib"
+      ""
+      )
+  endif ()
+endif ()
+
+# Generic BLAS library?
+if (BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All")
+  if(NOT BLAS_LIBRARIES)
+    check_fortran_libraries(
+      BLAS_LIBRARIES
+      BLAS
+      sgemm
+      ""
+      "blas"
+      ""
+      )
+  endif()
+endif ()
+
+if(NOT BLA_F95)
+  find_package_handle_standard_args(BLAS REQUIRED_VARS BLAS_LIBRARIES)
+endif()
+
+# On compilers that implicitly link BLAS (such as ftn, cc, and CC on Cray HPC machines)
+# we used a placeholder for empty BLAS_LIBRARIES to get through our logic above.
+if (BLAS_LIBRARIES STREQUAL "BLAS_LIBRARIES-PLACEHOLDER-FOR-EMPTY-LIBRARIES")
+  set(BLAS_LIBRARIES "")
+endif()
+
+cmake_pop_check_state()
+set(CMAKE_FIND_LIBRARY_SUFFIXES ${_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
diff --git a/cmake/FindCUB.cmake b/cmake/FindCUB.cmake
new file mode 100644
index 00000000000..33c8a926f97
--- /dev/null
+++ b/cmake/FindCUB.cmake
@@ -0,0 +1,25 @@
+# Try to find the CUB library and headers.
+#  CUB_ROOT_DIR     - where to find
+
+#  CUB_FOUND        - system has CUB
+#  CUB_INCLUDE_DIRS - the CUB include directory
+
+
+find_path(CUB_INCLUDE_DIR
+    NAMES cub/cub.cuh
+    HINTS ${CUB_ROOT_DIR}
+    DOC "The directory where CUB includes reside"
+)
+
+set(CUB_INCLUDE_DIRS ${CUB_INCLUDE_DIR})
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(CUB
+        FOUND_VAR CUB_FOUND
+        REQUIRED_VARS CUB_INCLUDE_DIR
+)
+
+mark_as_advanced(CUB_FOUND)
+
+add_library(CUB INTERFACE)
+target_include_directories(CUB INTERFACE ${CUB_INCLUDE_DIR})
diff --git a/cmake/FindICU.cmake b/cmake/FindICU.cmake
new file mode 100644
index 00000000000..8c460082c36
--- /dev/null
+++ b/cmake/FindICU.cmake
@@ -0,0 +1,428 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FindICU
+-------
+
+Find the International Components for Unicode (ICU) libraries and
+programs.
+
+This module supports multiple components.
+Components can include any of: ``data``, ``i18n``, ``io``, ``le``,
+``lx``, ``test``, ``tu`` and ``uc``.
+
+Note that on Windows ``data`` is named ``dt`` and ``i18n`` is named
+``in``; any of the names may be used, and the appropriate
+platform-specific library name will be automatically selected.
+
+This module reports information about the ICU installation in
+several variables.  General variables::
+
+  ICU_VERSION - ICU release version
+  ICU_FOUND - true if the main programs and libraries were found
+  ICU_LIBRARIES - component libraries to be linked
+  ICU_INCLUDE_DIRS - the directories containing the ICU headers
+
+Imported targets::
+
+  ICU::<C>
+
+Where ``<C>`` is the name of an ICU component, for example
+``ICU::i18n``.
+
+ICU programs are reported in::
+
+  ICU_GENCNVAL_EXECUTABLE - path to gencnval executable
+  ICU_ICUINFO_EXECUTABLE - path to icuinfo executable
+  ICU_GENBRK_EXECUTABLE - path to genbrk executable
+  ICU_ICU-CONFIG_EXECUTABLE - path to icu-config executable
+  ICU_GENRB_EXECUTABLE - path to genrb executable
+  ICU_GENDICT_EXECUTABLE - path to gendict executable
+  ICU_DERB_EXECUTABLE - path to derb executable
+  ICU_PKGDATA_EXECUTABLE - path to pkgdata executable
+  ICU_UCONV_EXECUTABLE - path to uconv executable
+  ICU_GENCFU_EXECUTABLE - path to gencfu executable
+  ICU_MAKECONV_EXECUTABLE - path to makeconv executable
+  ICU_GENNORM2_EXECUTABLE - path to gennorm2 executable
+  ICU_GENCCODE_EXECUTABLE - path to genccode executable
+  ICU_GENSPREP_EXECUTABLE - path to gensprep executable
+  ICU_ICUPKG_EXECUTABLE - path to icupkg executable
+  ICU_GENCMN_EXECUTABLE - path to gencmn executable
+
+ICU component libraries are reported in::
+
+  ICU_<C>_FOUND - ON if component was found
+  ICU_<C>_LIBRARIES - libraries for component
+
+ICU datafiles are reported in::
+
+  ICU_MAKEFILE_INC - Makefile.inc
+  ICU_PKGDATA_INC - pkgdata.inc
+
+Note that ``<C>`` is the uppercased name of the component.
+
+This module reads hints about search results from::
+
+  ICU_ROOT - the root of the ICU installation
+
+The environment variable ``ICU_ROOT`` may also be used; the
+ICU_ROOT variable takes precedence.
+
+The following cache variables may also be set::
+
+  ICU_<P>_EXECUTABLE - the path to executable <P>
+  ICU_INCLUDE_DIR - the directory containing the ICU headers
+  ICU_<C>_LIBRARY - the library for component <C>
+
+.. note::
+
+  In most cases none of the above variables will require setting,
+  unless multiple ICU versions are available and a specific version
+  is required.
+
+Other variables one may set to control this module are::
+
+  ICU_DEBUG - Set to ON to enable debug output from FindICU.
+#]=======================================================================]
+
+# Written by Roger Leigh <rleigh@codelibre.net>
+
+set(icu_programs
+  gencnval
+  icuinfo
+  genbrk
+  icu-config
+  genrb
+  gendict
+  derb
+  pkgdata
+  uconv
+  gencfu
+  makeconv
+  gennorm2
+  genccode
+  gensprep
+  icupkg
+  gencmn)
+
+set(icu_data
+  Makefile.inc
+  pkgdata.inc)
+
+# The ICU checks are contained in a function due to the large number
+# of temporary variables needed.
+function(_ICU_FIND)
+  # Set up search paths, taking compiler into account.  Search ICU_ROOT,
+  # with ICU_ROOT in the environment as a fallback if unset.
+  if(ICU_ROOT)
+    list(APPEND icu_roots "${ICU_ROOT}")
+  else()
+    if(NOT "$ENV{ICU_ROOT}" STREQUAL "")
+      file(TO_CMAKE_PATH "$ENV{ICU_ROOT}" NATIVE_PATH)
+      list(APPEND icu_roots "${NATIVE_PATH}")
+      set(ICU_ROOT "${NATIVE_PATH}"
+          CACHE PATH "Location of the ICU installation" FORCE)
+    endif()
+  endif()
+
+  # Find include directory
+  list(APPEND icu_include_suffixes "include")
+  find_path(ICU_INCLUDE_DIR
+            NAMES "unicode/utypes.h"
+            HINTS ${icu_roots}
+            PATH_SUFFIXES ${icu_include_suffixes}
+            DOC "ICU include directory")
+  set(ICU_INCLUDE_DIR "${ICU_INCLUDE_DIR}" PARENT_SCOPE)
+
+  # Get version
+  if(ICU_INCLUDE_DIR AND EXISTS "${ICU_INCLUDE_DIR}/unicode/uvernum.h")
+    file(STRINGS "${ICU_INCLUDE_DIR}/unicode/uvernum.h" icu_header_str
+      REGEX "^#define[\t ]+U_ICU_VERSION[\t ]+\".*\".*")
+
+    string(REGEX REPLACE "^#define[\t ]+U_ICU_VERSION[\t ]+\"([^ \\n]*)\".*"
+      "\\1" icu_version_string "${icu_header_str}")
+    set(ICU_VERSION "${icu_version_string}")
+    set(ICU_VERSION "${icu_version_string}" PARENT_SCOPE)
+    unset(icu_header_str)
+    unset(icu_version_string)
+  endif()
+
+  if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+    # 64-bit binary directory
+    set(_bin64 "bin64")
+    # 64-bit library directory
+    set(_lib64 "lib64")
+  endif()
+
+
+  # Find all ICU programs
+  list(APPEND icu_binary_suffixes "${_bin64}" "bin" "sbin")
+  foreach(program ${icu_programs})
+    string(TOUPPER "${program}" program_upcase)
+    set(cache_var "ICU_${program_upcase}_EXECUTABLE")
+    set(program_var "ICU_${program_upcase}_EXECUTABLE")
+    find_program("${cache_var}"
+      NAMES "${program}"
+      HINTS ${icu_roots}
+      PATH_SUFFIXES ${icu_binary_suffixes}
+      DOC "ICU ${program} executable"
+      NO_PACKAGE_ROOT_PATH
+      )
+    mark_as_advanced(cache_var)
+    set("${program_var}" "${${cache_var}}" PARENT_SCOPE)
+  endforeach()
+
+  # Find all ICU libraries
+  list(APPEND icu_library_suffixes "${_lib64}" "lib")
+  set(ICU_REQUIRED_LIBS_FOUND ON)
+  set(static_prefix )
+  # static icu libraries compiled with MSVC have the prefix 's'
+  if(MSVC)
+    set(static_prefix "s")
+  endif()
+  foreach(component ${ICU_FIND_COMPONENTS})
+    string(TOUPPER "${component}" component_upcase)
+    set(component_cache "ICU_${component_upcase}_LIBRARY")
+    set(component_cache_release "${component_cache}_RELEASE")
+    set(component_cache_debug "${component_cache}_DEBUG")
+    set(component_found "${component_upcase}_FOUND")
+    set(component_libnames "icu${component}")
+    set(component_debug_libnames "icu${component}d")
+
+    # Special case deliberate library naming mismatches between Unix
+    # and Windows builds
+    unset(component_libnames)
+    unset(component_debug_libnames)
+    list(APPEND component_libnames "icu${component}")
+    list(APPEND component_debug_libnames "icu${component}d")
+    if(component STREQUAL "data")
+      list(APPEND component_libnames "icudt")
+      # Note there is no debug variant at present
+      list(APPEND component_debug_libnames "icudtd")
+    endif()
+    if(component STREQUAL "dt")
+      list(APPEND component_libnames "icudata")
+      # Note there is no debug variant at present
+      list(APPEND component_debug_libnames "icudatad")
+    endif()
+    if(component STREQUAL "i18n")
+      list(APPEND component_libnames "icuin")
+      list(APPEND component_debug_libnames "icuind")
+    endif()
+    if(component STREQUAL "in")
+      list(APPEND component_libnames "icui18n")
+      list(APPEND component_debug_libnames "icui18nd")
+    endif()
+
+    if(static_prefix)
+      unset(static_component_libnames)
+      unset(static_component_debug_libnames)
+      foreach(component_libname ${component_libnames})
+        list(APPEND static_component_libnames
+          ${static_prefix}${component_libname})
+      endforeach()
+      foreach(component_libname ${component_debug_libnames})
+        list(APPEND static_component_debug_libnames
+          ${static_prefix}${component_libname})
+      endforeach()
+      list(APPEND component_libnames ${static_component_libnames})
+      list(APPEND component_debug_libnames ${static_component_debug_libnames})
+    endif()
+    find_library("${component_cache_release}"
+      NAMES ${component_libnames}
+      HINTS ${icu_roots}
+      PATH_SUFFIXES ${icu_library_suffixes}
+      DOC "ICU ${component} library (release)"
+      NO_PACKAGE_ROOT_PATH
+      )
+    find_library("${component_cache_debug}"
+      NAMES ${component_debug_libnames}
+      HINTS ${icu_roots}
+      PATH_SUFFIXES ${icu_library_suffixes}
+      DOC "ICU ${component} library (debug)"
+      NO_PACKAGE_ROOT_PATH
+      )
+    include(SelectLibraryConfigurations)
+    select_library_configurations(ICU_${component_upcase})
+    mark_as_advanced("${component_cache_release}" "${component_cache_debug}")
+    if(${component_cache})
+      set("${component_found}" ON)
+      list(APPEND ICU_LIBRARY "${${component_cache}}")
+    endif()
+    mark_as_advanced("${component_found}")
+    set("${component_cache}" "${${component_cache}}" PARENT_SCOPE)
+    set("${component_found}" "${${component_found}}" PARENT_SCOPE)
+    if(${component_found})
+      if (ICU_FIND_REQUIRED_${component})
+        list(APPEND ICU_LIBS_FOUND "${component} (required)")
+      else()
+        list(APPEND ICU_LIBS_FOUND "${component} (optional)")
+      endif()
+    else()
+      if (ICU_FIND_REQUIRED_${component})
+        set(ICU_REQUIRED_LIBS_FOUND OFF)
+        list(APPEND ICU_LIBS_NOTFOUND "${component} (required)")
+      else()
+        list(APPEND ICU_LIBS_NOTFOUND "${component} (optional)")
+      endif()
+    endif()
+  endforeach()
+  set(_ICU_REQUIRED_LIBS_FOUND "${ICU_REQUIRED_LIBS_FOUND}" PARENT_SCOPE)
+  set(ICU_LIBRARY "${ICU_LIBRARY}" PARENT_SCOPE)
+
+  # Find all ICU data files
+  if(CMAKE_LIBRARY_ARCHITECTURE)
+    list(APPEND icu_data_suffixes
+      "${_lib64}/${CMAKE_LIBRARY_ARCHITECTURE}/icu/${ICU_VERSION}"
+      "lib/${CMAKE_LIBRARY_ARCHITECTURE}/icu/${ICU_VERSION}"
+      "${_lib64}/${CMAKE_LIBRARY_ARCHITECTURE}/icu"
+      "lib/${CMAKE_LIBRARY_ARCHITECTURE}/icu")
+  endif()
+  list(APPEND icu_data_suffixes
+    "${_lib64}/icu/${ICU_VERSION}"
+    "lib/icu/${ICU_VERSION}"
+    "${_lib64}/icu"
+    "lib/icu")
+  foreach(data ${icu_data})
+    string(TOUPPER "${data}" data_upcase)
+    string(REPLACE "." "_" data_upcase "${data_upcase}")
+    set(cache_var "ICU_${data_upcase}")
+    set(data_var "ICU_${data_upcase}")
+    find_file("${cache_var}"
+      NAMES "${data}"
+      HINTS ${icu_roots}
+      PATH_SUFFIXES ${icu_data_suffixes}
+      DOC "ICU ${data} data file")
+    mark_as_advanced(cache_var)
+    set("${data_var}" "${${cache_var}}" PARENT_SCOPE)
+  endforeach()
+
+  if(NOT ICU_FIND_QUIETLY)
+    if(ICU_LIBS_FOUND)
+      message(STATUS "Found the following ICU libraries:")
+      foreach(found ${ICU_LIBS_FOUND})
+        message(STATUS "  ${found}")
+      endforeach()
+    endif()
+    if(ICU_LIBS_NOTFOUND)
+      message(STATUS "The following ICU libraries were not found:")
+      foreach(notfound ${ICU_LIBS_NOTFOUND})
+        message(STATUS "  ${notfound}")
+      endforeach()
+    endif()
+  endif()
+
+  if(ICU_DEBUG)
+    message(STATUS "--------FindICU.cmake search debug--------")
+    message(STATUS "ICU binary path search order: ${icu_roots}")
+    message(STATUS "ICU include path search order: ${icu_roots}")
+    message(STATUS "ICU library path search order: ${icu_roots}")
+    message(STATUS "----------------")
+  endif()
+endfunction()
+
+_ICU_FIND()
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(ICU
+                                  FOUND_VAR ICU_FOUND
+                                  REQUIRED_VARS ICU_INCLUDE_DIR
+                                                ICU_LIBRARY
+                                                _ICU_REQUIRED_LIBS_FOUND
+                                  VERSION_VAR ICU_VERSION
+                                  FAIL_MESSAGE "Failed to find all ICU components")
+
+unset(_ICU_REQUIRED_LIBS_FOUND)
+
+if(ICU_FOUND)
+  set(ICU_INCLUDE_DIRS "${ICU_INCLUDE_DIR}")
+  set(ICU_LIBRARIES "${ICU_LIBRARY}")
+  foreach(_ICU_component ${ICU_FIND_COMPONENTS})
+    string(TOUPPER "${_ICU_component}" _ICU_component_upcase)
+    set(_ICU_component_cache "ICU_${_ICU_component_upcase}_LIBRARY")
+    set(_ICU_component_cache_release "ICU_${_ICU_component_upcase}_LIBRARY_RELEASE")
+    set(_ICU_component_cache_debug "ICU_${_ICU_component_upcase}_LIBRARY_DEBUG")
+    set(_ICU_component_lib "ICU_${_ICU_component_upcase}_LIBRARIES")
+    set(_ICU_component_found "${_ICU_component_upcase}_FOUND")
+    set(_ICU_imported_target "ICU::${_ICU_component}")
+    if(${_ICU_component_found})
+      set("${_ICU_component_lib}" "${${_ICU_component_cache}}")
+      if(NOT TARGET ${_ICU_imported_target})
+        add_library(${_ICU_imported_target} UNKNOWN IMPORTED)
+        if(ICU_INCLUDE_DIR)
+          set_target_properties(${_ICU_imported_target} PROPERTIES
+            INTERFACE_INCLUDE_DIRECTORIES "${ICU_INCLUDE_DIR}")
+        endif()
+        if(EXISTS "${${_ICU_component_cache}}")
+          set_target_properties(${_ICU_imported_target} PROPERTIES
+            IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+            IMPORTED_LOCATION "${${_ICU_component_cache}}")
+        endif()
+        if(EXISTS "${${_ICU_component_cache_release}}")
+          set_property(TARGET ${_ICU_imported_target} APPEND PROPERTY
+            IMPORTED_CONFIGURATIONS RELEASE)
+          set_target_properties(${_ICU_imported_target} PROPERTIES
+            IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX"
+            IMPORTED_LOCATION_RELEASE "${${_ICU_component_cache_release}}")
+        endif()
+        if(EXISTS "${${_ICU_component_cache_debug}}")
+          set_property(TARGET ${_ICU_imported_target} APPEND PROPERTY
+            IMPORTED_CONFIGURATIONS DEBUG)
+          set_target_properties(${_ICU_imported_target} PROPERTIES
+            IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "CXX"
+            IMPORTED_LOCATION_DEBUG "${${_ICU_component_cache_debug}}")
+        endif()
+        if(CMAKE_DL_LIBS AND _ICU_component STREQUAL "uc")
+          set_target_properties(${_ICU_imported_target} PROPERTIES
+            INTERFACE_LINK_LIBRARIES "${CMAKE_DL_LIBS}")
+        endif()
+      endif()
+    endif()
+    unset(_ICU_component_upcase)
+    unset(_ICU_component_cache)
+    unset(_ICU_component_lib)
+    unset(_ICU_component_found)
+    unset(_ICU_imported_target)
+  endforeach()
+endif()
+
+if(ICU_DEBUG)
+  message(STATUS "--------FindICU.cmake results debug--------")
+  message(STATUS "ICU found: ${ICU_FOUND}")
+  message(STATUS "ICU_VERSION number: ${ICU_VERSION}")
+  message(STATUS "ICU_ROOT directory: ${ICU_ROOT}")
+  message(STATUS "ICU_INCLUDE_DIR directory: ${ICU_INCLUDE_DIR}")
+  message(STATUS "ICU_LIBRARIES: ${ICU_LIBRARIES}")
+
+  foreach(program IN LISTS icu_programs)
+    string(TOUPPER "${program}" program_upcase)
+    set(program_lib "ICU_${program_upcase}_EXECUTABLE")
+    message(STATUS "${program} program: ${${program_lib}}")
+    unset(program_upcase)
+    unset(program_lib)
+  endforeach()
+
+  foreach(data IN LISTS icu_data)
+    string(TOUPPER "${data}" data_upcase)
+    string(REPLACE "." "_" data_upcase "${data_upcase}")
+    set(data_lib "ICU_${data_upcase}")
+    message(STATUS "${data} data: ${${data_lib}}")
+    unset(data_upcase)
+    unset(data_lib)
+  endforeach()
+
+  foreach(component IN LISTS ICU_FIND_COMPONENTS)
+    string(TOUPPER "${component}" component_upcase)
+    set(component_lib "ICU_${component_upcase}_LIBRARIES")
+    set(component_found "${component_upcase}_FOUND")
+    message(STATUS "${component} library found: ${${component_found}}")
+    message(STATUS "${component} library: ${${component_lib}}")
+    unset(component_upcase)
+    unset(component_lib)
+    unset(component_found)
+  endforeach()
+  message(STATUS "----------------")
+endif()
+
+unset(icu_programs)
diff --git a/cmake/FindLAPACK.cmake b/cmake/FindLAPACK.cmake
new file mode 100644
index 00000000000..60fbf0726a0
--- /dev/null
+++ b/cmake/FindLAPACK.cmake
@@ -0,0 +1,430 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FindLAPACK
+----------
+
+Find Linear Algebra PACKage (LAPACK) library
+
+This module finds an installed fortran library that implements the
+LAPACK linear-algebra interface (see http://www.netlib.org/lapack/).
+
+The approach follows that taken for the autoconf macro file,
+``acx_lapack.m4`` (distributed at
+http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html).
+
+Input Variables
+^^^^^^^^^^^^^^^
+
+The following variables may be set to influence this module's behavior:
+
+``BLA_STATIC``
+  if ``ON`` use static linkage
+
+``BLA_VENDOR``
+  If set, checks only the specified vendor, if not set checks all the
+  possibilities.  List of vendors valid in this module:
+
+  * ``Intel10_32`` (intel mkl v10 32 bit)
+  * ``Intel10_64lp`` (intel mkl v10+ 64 bit, threaded code, lp64 model)
+  * ``Intel10_64lp_seq`` (intel mkl v10+ 64 bit, sequential code, lp64 model)
+  * ``Intel10_64ilp`` (intel mkl v10+ 64 bit, threaded code, ilp64 model)
+  * ``Intel10_64ilp_seq`` (intel mkl v10+ 64 bit, sequential code, ilp64 model)
+  * ``Intel`` (obsolete versions of mkl 32 and 64 bit)
+  * ``OpenBLAS``
+  * ``FLAME``
+  * ``ACML``
+  * ``Apple``
+  * ``NAS``
+  * ``Generic``
+
+``BLA_F95``
+  if ``ON`` tries to find BLAS95/LAPACK95
+
+Result Variables
+^^^^^^^^^^^^^^^^
+
+This module defines the following variables:
+
+``LAPACK_FOUND``
+  library implementing the LAPACK interface is found
+``LAPACK_LINKER_FLAGS``
+  uncached list of required linker flags (excluding -l and -L).
+``LAPACK_LIBRARIES``
+  uncached list of libraries (using full path name) to link against
+  to use LAPACK
+``LAPACK95_LIBRARIES``
+  uncached list of libraries (using full path name) to link against
+  to use LAPACK95
+``LAPACK95_FOUND``
+  library implementing the LAPACK95 interface is found
+
+.. note::
+
+  C or CXX must be enabled to use Intel MKL
+
+  For example, to use Intel MKL libraries and/or Intel compiler:
+
+  .. code-block:: cmake
+
+    set(BLA_VENDOR Intel10_64lp)
+    find_package(LAPACK)
+#]=======================================================================]
+
+set(_lapack_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
+
+# Check the language being used
+if( NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED) )
+  if(LAPACK_FIND_REQUIRED)
+    message(FATAL_ERROR "FindLAPACK requires Fortran, C, or C++ to be enabled.")
+  else()
+    message(STATUS "Looking for LAPACK... - NOT found (Unsupported languages)")
+    return()
+  endif()
+endif()
+
+if (CMAKE_Fortran_COMPILER_LOADED)
+include(CheckFortranFunctionExists)
+else ()
+include(CheckFunctionExists)
+endif ()
+include(CMakePushCheckState)
+
+cmake_push_check_state()
+set(CMAKE_REQUIRED_QUIET ${LAPACK_FIND_QUIETLY})
+
+set(LAPACK_FOUND FALSE)
+set(LAPACK95_FOUND FALSE)
+
+# TODO: move this stuff to separate module
+
+macro(Check_Lapack_Libraries LIBRARIES _prefix _name _flags _list _blas _threads)
+# This macro checks for the existence of the combination of fortran libraries
+# given by _list.  If the combination is found, this macro checks (using the
+# Check_Fortran_Function_Exists macro) whether can link against that library
+# combination using the name of a routine given by _name using the linker
+# flags given by _flags.  If the combination of libraries is found and passes
+# the link test, LIBRARIES is set to the list of complete library paths that
+# have been found.  Otherwise, LIBRARIES is set to FALSE.
+
+# N.B. _prefix is the prefix applied to the names of all cached variables that
+# are generated internally and marked advanced by this macro.
+
+set(_libraries_work TRUE)
+set(${LIBRARIES})
+set(_combined_name)
+if (NOT _libdir)
+  if (WIN32)
+    set(_libdir ENV LIB)
+  elseif (APPLE)
+    set(_libdir ENV DYLD_LIBRARY_PATH)
+  else ()
+    set(_libdir ENV LD_LIBRARY_PATH)
+  endif ()
+endif ()
+
+list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}")
+
+foreach(_library ${_list})
+  set(_combined_name ${_combined_name}_${_library})
+
+  if(_libraries_work)
+    if (BLA_STATIC)
+      if (WIN32)
+        set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES})
+      endif ()
+      if (APPLE)
+        set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES})
+      else ()
+        set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
+      endif ()
+    else ()
+      if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+        # for ubuntu's libblas3gf and liblapack3gf packages
+        set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf)
+      endif ()
+    endif ()
+    find_library(${_prefix}_${_library}_LIBRARY
+      NAMES ${_library}
+      PATHS ${_libdir}
+      )
+    mark_as_advanced(${_prefix}_${_library}_LIBRARY)
+    set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})
+    set(_libraries_work ${${_prefix}_${_library}_LIBRARY})
+  endif()
+endforeach()
+
+if(_libraries_work)
+  # Test this combination of libraries.
+  if(UNIX AND BLA_STATIC)
+    set(CMAKE_REQUIRED_LIBRARIES ${_flags} "-Wl,--start-group" ${${LIBRARIES}} ${_blas} "-Wl,--end-group" ${_threads})
+  else()
+    set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas} ${_threads})
+  endif()
+#  message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}")
+  if (NOT CMAKE_Fortran_COMPILER_LOADED)
+    check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS)
+  else ()
+    check_fortran_function_exists(${_name} ${_prefix}${_combined_name}_WORKS)
+  endif ()
+  set(CMAKE_REQUIRED_LIBRARIES)
+  set(_libraries_work ${${_prefix}${_combined_name}_WORKS})
+  #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}")
+endif()
+
+if(_libraries_work)
+  set(${LIBRARIES} ${${LIBRARIES}} ${_blas} ${_threads})
+else()
+  set(${LIBRARIES} FALSE)
+endif()
+
+endmacro()
+
+
+set(LAPACK_LINKER_FLAGS)
+set(LAPACK_LIBRARIES)
+set(LAPACK95_LIBRARIES)
+
+
+if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)
+  find_package(BLAS)
+else()
+  find_package(BLAS REQUIRED)
+endif()
+
+
+if(BLAS_FOUND)
+  set(LAPACK_LINKER_FLAGS ${BLAS_LINKER_FLAGS})
+  if (NOT $ENV{BLA_VENDOR} STREQUAL "")
+    set(BLA_VENDOR $ENV{BLA_VENDOR})
+  else ()
+    if(NOT BLA_VENDOR)
+      set(BLA_VENDOR "All")
+    endif()
+  endif ()
+
+#intel lapack
+if (BLA_VENDOR MATCHES "Intel" OR BLA_VENDOR STREQUAL "All")
+  if (NOT WIN32)
+    set(LAPACK_mkl_LM "-lm")
+    set(LAPACK_mkl_LDL "-ldl")
+  endif ()
+  if (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)
+    if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)
+      find_PACKAGE(Threads)
+    else()
+      find_package(Threads REQUIRED)
+    endif()
+
+    if (BLA_VENDOR MATCHES "_64ilp")
+      set(LAPACK_mkl_ILP_MODE "ilp64")
+    else ()
+      set(LAPACK_mkl_ILP_MODE "lp64")
+    endif ()
+
+    set(LAPACK_SEARCH_LIBS "")
+
+    if (BLA_F95)
+      set(LAPACK_mkl_SEARCH_SYMBOL "cheev_f95")
+      set(_LIBRARIES LAPACK95_LIBRARIES)
+      set(_BLAS_LIBRARIES ${BLAS95_LIBRARIES})
+
+      # old
+      list(APPEND LAPACK_SEARCH_LIBS
+        "mkl_lapack95")
+      # new >= 10.3
+      list(APPEND LAPACK_SEARCH_LIBS
+        "mkl_intel_c")
+      list(APPEND LAPACK_SEARCH_LIBS
+        "mkl_lapack95_${LAPACK_mkl_ILP_MODE}")
+    else()
+      set(LAPACK_mkl_SEARCH_SYMBOL "cheev")
+      set(_LIBRARIES LAPACK_LIBRARIES)
+      set(_BLAS_LIBRARIES ${BLAS_LIBRARIES})
+
+      # old
+      list(APPEND LAPACK_SEARCH_LIBS
+        "mkl_lapack")
+    endif()
+
+    # First try empty lapack libs
+    if (NOT ${_LIBRARIES})
+      check_lapack_libraries(
+        ${_LIBRARIES}
+        LAPACK
+        ${LAPACK_mkl_SEARCH_SYMBOL}
+        ""
+        ""
+        "${_BLAS_LIBRARIES}"
+        ""
+        )
+    endif ()
+    # Then try the search libs
+    foreach (IT ${LAPACK_SEARCH_LIBS})
+      if (NOT ${_LIBRARIES})
+        check_lapack_libraries(
+          ${_LIBRARIES}
+          LAPACK
+          ${LAPACK_mkl_SEARCH_SYMBOL}
+          ""
+          "${IT}"
+          "${_BLAS_LIBRARIES}"
+          "${CMAKE_THREAD_LIBS_INIT};${LAPACK_mkl_LM};${LAPACK_mkl_LDL}"
+          )
+      endif ()
+    endforeach ()
+
+    unset(LAPACK_mkl_ILP_MODE)
+    unset(LAPACK_mkl_SEARCH_SYMBOL)
+    unset(LAPACK_mkl_LM)
+    unset(LAPACK_mkl_LDL)
+  endif ()
+endif()
+
+if (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All")
+ if(NOT LAPACK_LIBRARIES)
+  check_lapack_libraries(
+  LAPACK_LIBRARIES
+  LAPACK
+  cheev
+  ""
+  "goto2"
+  "${BLAS_LIBRARIES}"
+  ""
+  )
+ endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "OpenBLAS" OR BLA_VENDOR STREQUAL "All")
+ if(NOT LAPACK_LIBRARIES)
+  check_lapack_libraries(
+  LAPACK_LIBRARIES
+  LAPACK
+  cheev
+  ""
+  "openblas"
+  "${BLAS_LIBRARIES}"
+  ""
+  )
+ endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "FLAME" OR BLA_VENDOR STREQUAL "All")
+ if(NOT LAPACK_LIBRARIES)
+  check_lapack_libraries(
+  LAPACK_LIBRARIES
+  LAPACK
+  cheev
+  ""
+  "flame"
+  "${BLAS_LIBRARIES}"
+  ""
+  )
+ endif()
+endif ()
+
+#acml lapack
+if (BLA_VENDOR MATCHES "ACML" OR BLA_VENDOR STREQUAL "All")
+  if (BLAS_LIBRARIES MATCHES ".+acml.+")
+    set (LAPACK_LIBRARIES ${BLAS_LIBRARIES})
+  endif ()
+endif ()
+
+# Apple LAPACK library?
+if (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All")
+  if(NOT LAPACK_LIBRARIES)
+    check_lapack_libraries(
+    LAPACK_LIBRARIES
+    LAPACK
+    cheev
+    ""
+    "Accelerate"
+    "${BLAS_LIBRARIES}"
+    ""
+    )
+  endif()
+endif ()
+if (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All")
+  if ( NOT LAPACK_LIBRARIES )
+    check_lapack_libraries(
+    LAPACK_LIBRARIES
+    LAPACK
+    cheev
+    ""
+    "vecLib"
+    "${BLAS_LIBRARIES}"
+    ""
+    )
+  endif ()
+endif ()
+# Generic LAPACK library?
+if (BLA_VENDOR STREQUAL "Generic" OR
+    BLA_VENDOR STREQUAL "ATLAS" OR
+    BLA_VENDOR STREQUAL "All")
+  if ( NOT LAPACK_LIBRARIES )
+    check_lapack_libraries(
+    LAPACK_LIBRARIES
+    LAPACK
+    cheev
+    ""
+    "lapack"
+    "${BLAS_LIBRARIES}"
+    ""
+    )
+  endif ()
+endif ()
+
+else()
+  message(STATUS "LAPACK requires BLAS")
+endif()
+
+if(BLA_F95)
+  if(LAPACK95_LIBRARIES)
+    set(LAPACK95_FOUND TRUE)
+  else()
+    set(LAPACK95_FOUND FALSE)
+  endif()
+  if(NOT LAPACK_FIND_QUIETLY)
+    if(LAPACK95_FOUND)
+      message(STATUS "A library with LAPACK95 API found.")
+    else()
+      if(LAPACK_FIND_REQUIRED)
+        message(FATAL_ERROR
+        "A required library with LAPACK95 API not found. Please specify library location."
+        )
+      else()
+        message(STATUS
+        "A library with LAPACK95 API not found. Please specify library location."
+        )
+      endif()
+    endif()
+  endif()
+  set(LAPACK_FOUND "${LAPACK95_FOUND}")
+  set(LAPACK_LIBRARIES "${LAPACK95_LIBRARIES}")
+else()
+  if(LAPACK_LIBRARIES)
+    set(LAPACK_FOUND TRUE)
+  else()
+    set(LAPACK_FOUND FALSE)
+  endif()
+
+  if(NOT LAPACK_FIND_QUIETLY)
+    if(LAPACK_FOUND)
+      message(STATUS "A library with LAPACK API found.")
+    else()
+      if(LAPACK_FIND_REQUIRED)
+        message(FATAL_ERROR
+        "A required library with LAPACK API not found. Please specify library location."
+        )
+      else()
+        message(STATUS
+        "A library with LAPACK API not found. Please specify library location."
+        )
+      endif()
+    endif()
+  endif()
+endif()
+
+cmake_pop_check_state()
+set(CMAKE_FIND_LIBRARY_SUFFIXES ${_lapack_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
diff --git a/cmake/FindNvToolExt.cmake b/cmake/FindNvToolExt.cmake
new file mode 100644
index 00000000000..5f2998e442a
--- /dev/null
+++ b/cmake/FindNvToolExt.cmake
@@ -0,0 +1,35 @@
+# The following variables are optionally searched for defaults
+#  NvToolExt_ROOT_DIR:
+#
+# The following are set after configuration is done:
+#  NvToolExt_FOUND
+#  NvToolExt_INCLUDE_DIR
+#  NvToolExt_LIBRARIES
+#  NvToolExt_LIBRARY_DIR
+#  NvToolExt:                   a target
+
+include(FindPackageHandleStandardArgs)
+
+set(NvToolExt_SEARCH_DIRS ${CUDA_TOOLKIT_ROOT_DIR})
+if(WIN32)
+    list(APPEND NvToolExt_SEARCH_DIRS "C:/Program Files/NVIDIA Corporation/NvToolsExt")
+endif()
+set(NvToolExt_SEARCH_DIRS ${NvToolExt_ROOT_DIR} ${NvToolExt_SEARCH_DIRS})
+
+
+find_path(NvToolExt_INCLUDE_DIR nvToolsExt.h HINTS ${NvToolExt_SEARCH_DIRS} PATH_SUFFIXES include)
+
+# 32bit not considered
+set(NvToolExt_LIBNAME nvToolsExt libnvToolsExt.so libnvToolsExt.a libnvToolsExt.so nvToolsExt64_1.lib)
+find_library(NvToolExt_LIBRARIES NAMES ${NvToolExt_LIBNAME} HINTS ${NvToolExt_SEARCH_DIRS}
+    PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)
+
+find_package_handle_standard_args(NvToolExt REQUIRED_VARS NvToolExt_INCLUDE_DIR NvToolExt_LIBRARIES)
+
+add_library(NvToolExt INTERFACE)
+target_include_directories(NvToolExt INTERFACE ${NvToolExt_INCLUDE_DIR})
+# target_link_directories(NvToolExt INTERFACE ${NvToolExt_INCLUDE_DIR})
+target_link_libraries(NvToolExt INTERFACE ${NvToolExt_LIBRARIES})
+
+unset(NvToolExt_SEARCH_DIRS)
+unset(NvToolExt_LIBNAME)
diff --git a/cmake/INSTALL.md b/cmake/INSTALL.md
new file mode 100644
index 00000000000..0082212eb9b
--- /dev/null
+++ b/cmake/INSTALL.md
@@ -0,0 +1,49 @@
+# Install Instruction
+
+Execute following commands in the repo root.
+
+## Build with Old Style Make Generator
+```bash
+mkdir -p build && cd build
+cmake -DCMAKE_INSTALL_PREFIX=../dist .. # configure
+cmake --build . --target install -- -j8 # build && install, substitude -j8 with /m:8 if you are on Windows
+```
+
+## Build with Ninja Generator
+``` bash
+mkdir -p build && cd build
+cmake -GNinja -DCMAKE_INSTALL_PREFIX=../dist ..
+cmake --build . --target install
+```
+
+After built, you can find all installed files in <your_repo_root>/dist
+
+# For Advance Configuration
+
+Follow options are currently available:
+
+| Variable               | Available Options         | Default  |
+| ---------------------- | ------------------------- | -------- |
+| MATHLIB                | OpenBLAS, MKL, Accelerate | OpenBLAS |
+| KALDI_BUILD_EXE        | ON,OFF                    | ON |
+| KALDI_BUILD_TEST       | ON,OFF                    | ON |
+| KALDI_USE_PATCH_NUMBER | ON,OFF                    | OFF |
+| BUILD_SHARED_LIBS      | ON,OFF                    | OFF |
+
+Append `-D<Variable>=<Value>` to the configure command to use it, e.g.,
+`-DKALDI_BUILD_TEST=OFF` will disable building of test executables. For more
+information, please refers to
+[CMake Documentation](https://cmake.org/cmake/help/latest/manual/cmake.1.html).
+For quick learning CMake usage, LLVM's short introuction will do the trick:
+[Basic CMake usage](https://llvm.org/docs/CMake.html#usage),
+[Options and variables](https://llvm.org/docs/CMake.html#options-and-variables),
+[Frequently-used CMake variables](https://llvm.org/docs/CMake.html#frequently-used-cmake-variables).
+
+NOTE 1: Currently, BUILD_SHARED_LIBS does not work on Windows due to some symbols
+        (variables) are not properly exported.
+
+NOTE 2: For scripts users, since you are doing an out of source build, and the
+        install destination is at your disposal, the `$PATH` is not configured
+        properly in this case. Scripts will not work out of box. See how `$PATH`
+        is modified in [path.sh](../egs/wsj/s5/path.sh). You should add
+        `<installation_path>/bin` to your `$PATH` before running any scripts.
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
new file mode 100644
index 00000000000..c7f45827a99
--- /dev/null
+++ b/cmake/Utils.cmake
@@ -0,0 +1,50 @@
+if(NOT CMAKE_VERSION VERSION_LESS "3.10")
+    include_guard()
+endif()
+
+# For Windows, some env or vars are using backward slash for pathes, convert
+# them to forward slashes will fix some nasty problem in CMake.
+macro(normalize_path in_path)
+    file(TO_CMAKE_PATH "${${in_path}}" normalize_path_out_path)
+    set(${in_path} "${normalize_path_out_path}")
+    unset(normalize_path_out_path)
+endmacro()
+
+macro(normalize_env_path in_path)
+    file(TO_CMAKE_PATH "$${in_path}" normalize_env_path_out_path)
+    set(${in_path} "${normalize_env_path_out_path}")
+    unset(normalize_env_path_out_path)
+endmacro()
+
+
+macro(add_kaldi_executable)
+    if(${KALDI_BUILD_EXE})
+        cmake_parse_arguments(kaldi_exe "" "NAME" "SOURCES;DEPENDS" ${ARGN})
+        add_executable(${kaldi_exe_NAME} ${kaldi_exe_SOURCES})
+        target_link_libraries(${kaldi_exe_NAME} PRIVATE ${kaldi_exe_DEPENDS})
+        # list(APPEND KALDI_EXECUTABLES ${kaldi_exe_NAME})
+        install(TARGETS ${kaldi_exe_NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+        unset(kaldi_exe_NAME)
+        unset(kaldi_exe_SOURCES)
+        unset(kaldi_exe_DEPENDS)
+    endif()
+endmacro()
+
+macro(add_kaldi_test_executable)
+    if(${KALDI_BUILD_TEST})
+        cmake_parse_arguments(kaldi_test_exe "" "NAME" "SOURCES;DEPENDS" ${ARGN})
+        add_executable(${kaldi_test_exe_NAME} ${kaldi_test_exe_SOURCES})
+        target_link_libraries(${kaldi_test_exe_NAME} PRIVATE ${kaldi_test_exe_DEPENDS})
+        add_test(
+            NAME ${kaldi_test_exe_NAME}
+            COMMAND ${kaldi_test_exe_NAME}
+            WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
+        # list(APPEND KALDI_TEST_EXECUTABLES ${kaldi_test_exe_NAME})
+        install(TARGETS ${kaldi_test_exe_NAME} RUNTIME DESTINATION testbin)
+
+        unset(kaldi_test_exe_NAME)
+        unset(kaldi_test_exe_SOURCES)
+        unset(kaldi_test_exe_DEPENDS)
+    endif()
+endmacro()
diff --git a/cmake/VersionHelper.cmake b/cmake/VersionHelper.cmake
new file mode 100644
index 00000000000..eb8c6acef23
--- /dev/null
+++ b/cmake/VersionHelper.cmake
@@ -0,0 +1,15 @@
+function(get_version)
+    file(READ ${CMAKE_CURRENT_SOURCE_DIR}/src/.version version)
+    string(STRIP ${version} version)
+    execute_process(COMMAND git log -n1 --format=%H src/.version
+                    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+                    OUTPUT_VARIABLE version_commit
+                    OUTPUT_STRIP_TRAILING_WHITESPACE)
+    execute_process(COMMAND git rev-list --count "${version_commit}..HEAD"
+                    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+                    OUTPUT_VARIABLE patch_number)
+    string(STRIP ${patch_number} patch_number)
+
+    set(KALDI_VERSION ${version} PARENT_SCOPE)
+    set(KALDI_PATCH_NUMBER ${patch_number} PARENT_SCOPE)
+endfunction()
diff --git a/cmake/gen_cmake_skeleton.py b/cmake/gen_cmake_skeleton.py
new file mode 100644
index 00000000000..6492f36f7c8
--- /dev/null
+++ b/cmake/gen_cmake_skeleton.py
@@ -0,0 +1,357 @@
+import os
+import sys
+import re
+import fnmatch
+import argparse
+
+# earily parse, will refernece args globally
+parser = argparse.ArgumentParser()
+parser.add_argument("working_dir")
+parser.add_argument("--quiet", default=False, action="store_true")
+args = parser.parse_args()
+
+def print_wrapper(*args_, **kwargs):
+    if not args.quiet:
+        print(*args_, **kwargs)
+
+def get_subdirectories(d):
+    return [name for name in os.listdir(d) if os.path.isdir(os.path.join(d, name))]
+
+def is_bin_dir(d):
+    return d.endswith("bin")
+
+def get_files(d):
+    return [name for name in os.listdir(d) if os.path.isfile(os.path.join(d, name))]
+
+def is_header(f):
+    return f.endswith(".h")
+
+def is_cu_source(f):
+    return f.endswith(".cu")
+
+def is_test_source(f):
+    return f.endswith("-test.cc")
+
+def is_source(f):
+    return f.endswith(".cc") and not is_test_source(f)
+
+def lib_dir_name_to_lib_target(dir_name):
+    return "kaldi-" + dir_name
+
+def bin_dir_name_to_lib_target(dir_name):
+    """return the primary lib target for all executable targets in this bin dir"""
+    assert is_bin_dir(dir_name)
+    if dir_name == "bin":
+        # NOTE: "kaldi-util" might be a more strict primary lib target...
+        return "kaldi-hmm"
+    elif dir_name == "fstbin":
+        return "kaldi-fstext"
+    else:
+        return "kaldi-" + dir_name[:-3]
+
+def wrap_notwin32_condition(should_wrap, lines):
+    if isinstance(lines, str):
+        lines = [lines]
+    if should_wrap:
+        return ["if(NOT WIN32)"] + list(map(lambda l: "    " + l, lines)) + ["endif()"]
+    else:
+        return lines
+
+
+def get_exe_additional_depends(t):
+    additional = {
+        # solve bin
+        "align-*": ["decoder"],
+        "compile-*graph*": ["decoder"],
+        "decode-faster": ["decoder"],
+        "latgen-faster-mapped": ["decoder"],
+        "latgen-faster-mapped-parallel": ["decoder"],
+        "latgen-incremental-mapped": ["decoder"],
+        "decode-faster-mapped": ["decoder"],
+        "sum-lda-accs": ["transform"],
+        "sum-mllt-accs": ["transform"],
+        "est-mllt": ["transform"],
+        "est-lda": ["transform"],
+        "acc-lda": ["transform"],
+        "build-pfile-from-ali": ["gmm"],
+        "make-*-transducer": ["fstext"],
+        "phones-to-prons": ["fstext"],
+
+        # solve gmmbin
+        "post-to-feats" : ["hmm"],
+        "append-post-to-feats" : ["hmm"],
+        "gmm-*": ["hmm", "transform"],
+        "gmm-latgen-*": ["decoder"],
+        "gmm-decode-*": ["decoder"],
+        "gmm-align": ["decoder"],
+        "gmm-align-compiled": ["decoder"],
+        "gmm-est-fmllr-gpost": ["sgmm2", "hmm"],
+        "gmm-rescore-lattice": ["hmm", "lat"],
+
+        # solve fstbin
+        "make-grammar-fst": ["decoder"],
+
+        # solve sgmm2bin
+        "sgmm2-*": ["hmm"],
+        "sgmm2-latgen-faster*": ["decoder"],
+        "sgmm2-align-compiled": ["decoder"],
+        "sgmm2-rescore-lattice": ["lat"],
+        "init-ubm": ["hmm"],
+
+        # solve nnetbin
+        "nnet-train-mmi-sequential": ["lat"],
+        "nnet-train-mpe-sequential": ["lat"],
+
+        # solve nnet2bin
+        "nnet-latgen-faster*": ["fstext", "decoder"],
+        "nnet-align-compiled": ["decoder"],
+        "nnet1-to-raw-nnet": ["nnet"],
+
+        # solve chainbin
+        "nnet3-chain-*": ["nnet3"],
+
+        # solve latbin
+        "lattice-compose": ["fstext"],
+        "lattice-lmrescore": ["fstext"],
+        "lattice-lmrescore-*": ["fstext", "rnnlm"],
+
+        # solve ivectorbin
+        "ivector-extract*": ["hmm"],
+
+        # solve kwsbin
+        "generate-proxy-keywords": ["fstext"],
+        "transcripts-to-fsts": ["fstext"],
+    }
+    l = []
+    for pattern in additional.keys():
+        if fnmatch.fnmatch(t, pattern):
+            l.extend(list(map(lambda name: lib_dir_name_to_lib_target(name), additional[pattern])))
+    return sorted(list(set(l)))
+
+def disable_for_win32(t):
+    disabled = [
+        "online-audio-client",
+        "online-net-client",
+        "online2-tcp-nnet3-decode-faster",
+        "online-server-gmm-decode-faster",
+        "online-audio-server-decode-faster"
+    ]
+    return t in disabled
+
+class CMakeListsHeaderLibrary(object):
+    def __init__(self, dir_name):
+        self.dir_name = dir_name
+        self.target_name = lib_dir_name_to_lib_target(self.dir_name)
+        self.header_list = []
+
+    def add_header(self, filename):
+        self.header_list.append(filename)
+
+    def add_source(self, filename):
+        pass
+
+    def add_cuda_source(self, filename):
+        pass
+
+    def add_test_source(self, filename):
+        pass
+
+    def gen_code(self):
+        ret = []
+        if len(self.header_list) > 0:
+            ret.append("set(PUBLIC_HEADERS")
+            for f in self.header_list:
+                ret.append("    " + f)
+            ret.append(")\n")
+
+        ret.append("add_library(" + self.target_name + " INTERFACE)")
+        ret.append("target_include_directories(" + self.target_name + " INTERFACE ")
+        ret.append("    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>")
+        ret.append("    $<INSTALL_INTERFACE:include/kaldi>")
+        ret.append(")\n")
+
+        ret.append("""
+install(TARGETS {tgt} EXPORT kaldi-targets)
+
+install(FILES ${{PUBLIC_HEADERS}} DESTINATION include/kaldi/{dir})
+""".format(tgt=self.target_name, dir=self.dir_name))
+
+        return "\n".join(ret)
+
+class CMakeListsLibrary(object):
+
+    def __init__(self, dir_name):
+        self.dir_name = dir_name
+        self.target_name = lib_dir_name_to_lib_target(self.dir_name)
+        self.header_list = []
+        self.source_list = []
+        self.cuda_source_list = []
+        self.test_source_list = []
+        self.depends = []
+
+    def add_header(self, filename):
+        self.header_list.append(filename)
+
+    def add_source(self, filename):
+        self.source_list.append(filename)
+
+    def add_cuda_source(self, filename):
+        self.cuda_source_list.append(filename)
+
+    def add_test_source(self, filename):
+        self.test_source_list.append(filename)
+
+    def load_dependency_from_makefile(self, filename):
+        with open(filename) as f:
+            makefile = f.read()
+            if "ADDLIBS" not in makefile:
+                print_wrapper("WARNING: non-standard", filename)
+                return
+            libs = makefile.split("ADDLIBS")[-1].split("\n\n")[0]
+            libs = re.findall("[^\s\\\\=]+", libs)
+            for l in libs:
+                self.depends.append(os.path.splitext(os.path.basename(l))[0])
+
+    def gen_code(self):
+        ret = []
+
+        if len(self.header_list) > 0:
+            ret.append("set(PUBLIC_HEADERS")
+            for f in self.header_list:
+                ret.append("    " + f)
+            ret.append(")\n")
+
+        if len(self.cuda_source_list) > 0:
+            self.source_list.append("${CUDA_OBJS}")
+            ret.append("if(CUDA_FOUND)")
+            ret.append("    cuda_include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)")
+            ret.append("    cuda_compile(CUDA_OBJS")
+            for f in self.cuda_source_list:
+                ret.append("        " + f)
+            ret.append("    )")
+            ret.append("endif()\n")
+
+        ret.append("add_library(" + self.target_name)
+        for f in self.source_list:
+            ret.append("    " + f)
+        ret.append(")\n")
+        ret.append("target_include_directories(" + self.target_name + " PUBLIC ")
+        ret.append("     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>")
+        ret.append("     $<INSTALL_INTERFACE:include/kaldi>")
+        ret.append(")\n")
+
+        if len(self.depends) > 0:
+            ret.append("target_link_libraries(" + self.target_name + " PUBLIC")
+            for d in self.depends:
+                ret.append("    " + d)
+            ret.append(")\n")
+
+        def get_test_exe_name(filename):
+            exe_name = os.path.splitext(f)[0]
+            if self.dir_name.startswith("nnet") and exe_name.startswith("nnet"):
+                return self.dir_name + "-" + exe_name.split("-", 1)[1]
+            else:
+                return exe_name
+
+        if len(self.test_source_list) > 0:
+            ret.append("if(KALDI_BUILD_TEST)")
+            for f in self.test_source_list:
+                exe_target = get_test_exe_name(f)
+                depends = (self.target_name + " " + " ".join(get_exe_additional_depends(exe_target))).strip()
+                ret.extend(wrap_notwin32_condition(disable_for_win32(self.target_name),
+                    "    add_kaldi_test_executable(NAME " + exe_target + " SOURCES " + f + " DEPENDS " + depends + ")"))
+            ret.append("endif()")
+
+        ret.append("""
+install(TARGETS {tgt}
+    EXPORT kaldi-targets
+    ARCHIVE DESTINATION ${{CMAKE_INSTALL_LIBDIR}}
+    LIBRARY DESTINATION ${{CMAKE_INSTALL_LIBDIR}}
+    RUNTIME DESTINATION ${{CMAKE_INSTALL_BINDIR}}
+)
+
+install(FILES ${{PUBLIC_HEADERS}} DESTINATION include/kaldi/{dir})
+""".format(tgt=self.target_name, dir=self.dir_name))
+
+        return "\n".join(ret)
+
+
+
+class CMakeListsExecutable(object):
+
+    def __init__(self, dir_name, filename):
+        assert(dir_name.endswith("bin"))
+        self.list = []
+        exe_name = os.path.splitext(os.path.basename(filename))[0]
+        file_name = filename
+        depend = bin_dir_name_to_lib_target(dir_name)
+        self.list.append((exe_name, file_name, depend))
+
+    def gen_code(self):
+        ret = []
+        for exe_name, file_name, depend in self.list:
+            depends = (depend + " " + " ".join(get_exe_additional_depends(exe_name))).strip()
+            ret.extend(wrap_notwin32_condition(disable_for_win32(exe_name),
+                       "add_kaldi_executable(NAME " + exe_name + " SOURCES " + file_name + " DEPENDS " + depends + ")"))
+
+        return "\n".join(ret)
+
+class CMakeListsFile(object):
+
+    GEN_CMAKE_HEADER = "# generated with cmake/gen_cmake_skeleton.py, DO NOT MODIFY.\n"
+
+    def __init__(self, directory):
+        self.path = os.path.realpath(os.path.join(directory, "CMakeLists.txt"))
+        self.sections = []
+
+    def add_section(self, section):
+        self.sections.append(section)
+
+    def write_file(self):
+        with open(self.path, "w", newline='\n') as f: # good luck for python2
+            f.write(CMakeListsFile.GEN_CMAKE_HEADER)
+            for s in self.sections:
+                code = s.gen_code()
+                f.write(code)
+                f.write("\n")
+        print_wrapper("  Writed", self.path)
+
+
+if __name__ == "__main__":
+    os.chdir(args.working_dir)
+    print_wrapper("Working in ", args.working_dir)
+
+    subdirs = get_subdirectories(".")
+    for d in subdirs:
+        if d.startswith('tfrnnlm'):
+            continue
+        cmakelists = CMakeListsFile(d)
+        if is_bin_dir(d):
+            for f in get_files(d):
+                if is_source(f):
+                    dir_name = os.path.basename(d)
+                    filename = os.path.basename(f)
+                    exe = CMakeListsExecutable(dir_name, filename)
+                    cmakelists.add_section(exe)
+        else:
+            dir_name = os.path.basename(d)
+            lib = None
+            makefile = os.path.join(d, "Makefile")
+            if not os.path.exists(makefile):
+                lib = CMakeListsHeaderLibrary(dir_name)
+            else:
+                lib = CMakeListsLibrary(dir_name)
+                lib.load_dependency_from_makefile(makefile)
+            cmakelists.add_section(lib)
+            for f in sorted(get_files(d)):
+                filename = os.path.basename(f)
+                if is_source(filename):
+                    lib.add_source(filename)
+                elif is_cu_source(filename):
+                    lib.add_cuda_source(filename)
+                elif is_test_source(filename):
+                    lib.add_test_source(filename)
+                elif is_header(filename):
+                    lib.add_header(filename)
+
+        cmakelists.write_file()
diff --git a/cmake/kaldi-config.cmake.in b/cmake/kaldi-config.cmake.in
new file mode 100644
index 00000000000..123f58c5699
--- /dev/null
+++ b/cmake/kaldi-config.cmake.in
@@ -0,0 +1,7 @@
+@PACKAGE_INIT@
+
+find_package(Threads)
+
+if(NOT TARGET kaldi-base)
+    include(${CMAKE_CURRENT_LIST_DIR}/kaldi-targets.cmake)
+endif()
diff --git a/cmake/third_party/get_third_party.cmake b/cmake/third_party/get_third_party.cmake
new file mode 100644
index 00000000000..8e24dc9f643
--- /dev/null
+++ b/cmake/third_party/get_third_party.cmake
@@ -0,0 +1,20 @@
+# Download and unpack a third-party library at configure time
+# The original code is at the README of google-test:
+# https://github.com/google/googletest/tree/master/googletest
+function(get_third_party name)
+    configure_file(
+        "${PROJECT_SOURCE_DIR}/cmake/third_party/${name}.cmake"
+        "${CMAKE_CURRENT_BINARY_DIR}/${name}-download/CMakeLists.txt")
+    execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
+        RESULT_VARIABLE result
+        WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${name}-download")
+    if(result)
+        message(FATAL_ERROR "CMake step for ${name} failed: ${result}")
+    endif()
+    execute_process(COMMAND ${CMAKE_COMMAND} --build .
+        RESULT_VARIABLE result
+        WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${name}-download")
+    if(result)
+        message(FATAL_ERROR "Build step for ${name} failed: ${result}")
+    endif()
+endfunction()
diff --git a/cmake/third_party/openfst.cmake b/cmake/third_party/openfst.cmake
new file mode 100644
index 00000000000..19a7f527f8f
--- /dev/null
+++ b/cmake/third_party/openfst.cmake
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 2.8.2)
+project(openfst-download NONE)
+
+include(ExternalProject)
+ExternalProject_Add(openfst
+    GIT_REPOSITORY https://github.com/kkm000/openfst
+    GIT_TAG 0bca6e76d24647427356dc242b0adbf3b5f1a8d9 # tag win/1.7.2.1
+    SOURCE_DIR "${CMAKE_BINARY_DIR}/openfst"
+    BINARY_DIR ""
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+    INSTALL_COMMAND ""
+    TEST_COMMAND ""
+)
diff --git a/cmake/third_party/openfst_lib_target.cmake b/cmake/third_party/openfst_lib_target.cmake
new file mode 100644
index 00000000000..dde5efc402a
--- /dev/null
+++ b/cmake/third_party/openfst_lib_target.cmake
@@ -0,0 +1,31 @@
+if(NOT OPENFST_ROOT_DIR)
+    message(FATAL_ERROR)
+endif()
+
+set(fst_source_dir ${OPENFST_ROOT_DIR}/src/lib)
+set(fst_include_dir ${OPENFST_ROOT_DIR}/src/include)
+
+include_directories(${fst_include_dir})
+file(GLOB fst_sources "${fst_source_dir}/*.cc")
+
+add_library(fst ${fst_sources})
+target_include_directories(fst PUBLIC
+     $<BUILD_INTERFACE:${fst_include_dir}>
+     $<INSTALL_INTERFACE:include/openfst>
+)
+
+install(TARGETS fst
+    EXPORT kaldi-targets
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
+
+install(DIRECTORY ${fst_include_dir}/fst
+    DESTINATION include/openfst
+    PATTERN "test/*.h" EXCLUDE
+)
+
+unset(fst_source_dir)
+unset(fst_include_dir)
+unset(fst_sources)
diff --git a/egs/aidatatang_200zh/s5/local/chain/compare_wer.sh b/egs/aidatatang_200zh/s5/local/chain/compare_wer.sh
index 71e6fbe106d..c365a8ab780 100755
--- a/egs/aidatatang_200zh/s5/local/chain/compare_wer.sh
+++ b/egs/aidatatang_200zh/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018  Emotech LTD (Author: Xuechen Liu)
 
 # compare wer between diff. models in aidatatang_200zh chain directory
diff --git a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh
index 0be0e2c79c6..9af9622d301 100644
--- a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on run_tdnn_7h.sh in swbd chain recipe.
 
diff --git a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh
index 78dd4000e58..0aead9a7103 100644
--- a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh
+++ b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on run_tdnn_1a.sh.
 # This setup used online pitch to train the neural network.
diff --git a/egs/aidatatang_200zh/s5/local/data_prep.sh b/egs/aidatatang_200zh/s5/local/data_prep.sh
index bb278a7d904..1e4bf127b28 100644
--- a/egs/aidatatang_200zh/s5/local/data_prep.sh
+++ b/egs/aidatatang_200zh/s5/local/data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Xingyu Na
 # Apache 2.0
diff --git a/egs/aidatatang_200zh/s5/local/download_and_untar.sh b/egs/aidatatang_200zh/s5/local/download_and_untar.sh
index 39f9ac01ff7..1056ead6d1a 100644
--- a/egs/aidatatang_200zh/s5/local/download_and_untar.sh
+++ b/egs/aidatatang_200zh/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 #             2017  Xingyu Na
diff --git a/egs/aidatatang_200zh/s5/local/format_data.sh b/egs/aidatatang_200zh/s5/local/format_data.sh
index 47af9dd9dfd..2198bae3fe7 100644
--- a/egs/aidatatang_200zh/s5/local/format_data.sh
+++ b/egs/aidatatang_200zh/s5/local/format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 . ./path.sh
diff --git a/egs/aidatatang_200zh/s5/local/nnet3/compare_wer.sh b/egs/aidatatang_200zh/s5/local/nnet3/compare_wer.sh
index 2d85626c356..35c1330aab4 100755
--- a/egs/aidatatang_200zh/s5/local/nnet3/compare_wer.sh
+++ b/egs/aidatatang_200zh/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018  Emotech LTD (Author: Xuechen Liu)
 
 # compare wer between diff. models in aidatatang_200zh nnet3 directory
diff --git a/egs/aidatatang_200zh/s5/local/nnet3/run_ivector_common.sh b/egs/aidatatang_200zh/s5/local/nnet3/run_ivector_common.sh
index 0fe55ecf000..f3ed8623495 100644
--- a/egs/aidatatang_200zh/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/aidatatang_200zh/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/aidatatang_200zh/s5/local/nnet3/tuning/run_tdnn.sh b/egs/aidatatang_200zh/s5/local/nnet3/tuning/run_tdnn.sh
index 2bcded42ed1..ca396e50542 100644
--- a/egs/aidatatang_200zh/s5/local/nnet3/tuning/run_tdnn.sh
+++ b/egs/aidatatang_200zh/s5/local/nnet3/tuning/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh
 
diff --git a/egs/aidatatang_200zh/s5/local/prepare_dict.sh b/egs/aidatatang_200zh/s5/local/prepare_dict.sh
index aa72bcd48d2..8096c45be34 100644
--- a/egs/aidatatang_200zh/s5/local/prepare_dict.sh
+++ b/egs/aidatatang_200zh/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #Copyright 2016 LeSpeech (Author: Xingyu Na)
 
 # prepare dictionary for aidatatang
diff --git a/egs/aidatatang_200zh/s5/local/score.sh b/egs/aidatatang_200zh/s5/local/score.sh
index a9786169973..d283ceb68dc 100644
--- a/egs/aidatatang_200zh/s5/local/score.sh
+++ b/egs/aidatatang_200zh/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 set -x
diff --git a/egs/aidatatang_200zh/s5/local/train_lms.sh b/egs/aidatatang_200zh/s5/local/train_lms.sh
index bc52f8acb20..96da93d3e9f 100644
--- a/egs/aidatatang_200zh/s5/local/train_lms.sh
+++ b/egs/aidatatang_200zh/s5/local/train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/aidatatang_200zh/s5/run.sh b/egs/aidatatang_200zh/s5/run.sh
index 47e46a660cd..3bd20469006 100644
--- a/egs/aidatatang_200zh/s5/run.sh
+++ b/egs/aidatatang_200zh/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019 Beijing DataTang Tech. Co. Ltd. (Author: Liyuan Wang)
 #           2017 Hui Bu
diff --git a/egs/aishell/s5/local/aishell_data_prep.sh b/egs/aishell/s5/local/aishell_data_prep.sh
index 4747e4f4d82..3be62708db2 100755
--- a/egs/aishell/s5/local/aishell_data_prep.sh
+++ b/egs/aishell/s5/local/aishell_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Xingyu Na
 # Apache 2.0
diff --git a/egs/aishell/s5/local/aishell_prepare_dict.sh b/egs/aishell/s5/local/aishell_prepare_dict.sh
index c4cabb24de4..28ab5e2122f 100755
--- a/egs/aishell/s5/local/aishell_prepare_dict.sh
+++ b/egs/aishell/s5/local/aishell_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Xingyu Na
 # Apache 2.0
diff --git a/egs/aishell/s5/local/aishell_train_lms.sh b/egs/aishell/s5/local/aishell_train_lms.sh
index 9b6cdad2960..eaca5e2fafa 100755
--- a/egs/aishell/s5/local/aishell_train_lms.sh
+++ b/egs/aishell/s5/local/aishell_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh
index b38fa4d9c7a..79b2023ab7e 100755
--- a/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on run_tdnn_7h.sh in swbd chain recipe.
 
diff --git a/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh
index 6b7223785d9..669a014e8cf 100755
--- a/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh
+++ b/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on run_tdnn_1a.sh.
 # This setup used online pitch to train the neural network.
diff --git a/egs/aishell/s5/local/download_and_untar.sh b/egs/aishell/s5/local/download_and_untar.sh
index 58a278241d7..9c70836bf46 100755
--- a/egs/aishell/s5/local/download_and_untar.sh
+++ b/egs/aishell/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 #             2017  Xingyu Na
diff --git a/egs/aishell/s5/local/nnet3/run_ivector_common.sh b/egs/aishell/s5/local/nnet3/run_ivector_common.sh
index af0ae122372..8f73deb145b 100755
--- a/egs/aishell/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/aishell/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh b/egs/aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh
index 3cb8cd861a3..db434b2b24b 100755
--- a/egs/aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on swbd/s5c/local/nnet3/run_tdnn.sh
 
diff --git a/egs/aishell/s5/local/nnet3/tuning/run_tdnn_2a.sh b/egs/aishell/s5/local/nnet3/tuning/run_tdnn_2a.sh
index 603149585f2..a5b129be31c 100755
--- a/egs/aishell/s5/local/nnet3/tuning/run_tdnn_2a.sh
+++ b/egs/aishell/s5/local/nnet3/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh
 
diff --git a/egs/aishell/s5/local/score.sh b/egs/aishell/s5/local/score.sh
index a9786169973..d283ceb68dc 100755
--- a/egs/aishell/s5/local/score.sh
+++ b/egs/aishell/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 set -x
diff --git a/egs/aishell/s5/run.sh b/egs/aishell/s5/run.sh
index a99cb51c656..66c85a3f82d 100755
--- a/egs/aishell/s5/run.sh
+++ b/egs/aishell/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Beijing Shell Shell Tech. Co. Ltd. (Authors: Hui Bu)
 #           2017 Jiayu Du
@@ -141,5 +141,6 @@ local/chain/run_tdnn.sh
 
 # getting results (see RESULTS file)
 for x in exp/*/decode_test; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done 2>/dev/null
+for x in exp/*/*/decode_test; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done 2>/dev/null
 
 exit 0;
diff --git a/egs/aishell/v1/local/aishell_data_prep.sh b/egs/aishell/v1/local/aishell_data_prep.sh
index 11d131dcdb1..022276cf2b6 100755
--- a/egs/aishell/v1/local/aishell_data_prep.sh
+++ b/egs/aishell/v1/local/aishell_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Xingyu Na
 # Apache 2.0
diff --git a/egs/aishell/v1/local/download_and_untar.sh b/egs/aishell/v1/local/download_and_untar.sh
index 3578a1c0835..b0636a8cd86 100755
--- a/egs/aishell/v1/local/download_and_untar.sh
+++ b/egs/aishell/v1/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 #             2017  Xingyu Na
diff --git a/egs/aishell/v1/run.sh b/egs/aishell/v1/run.sh
index 0aaa6d493d6..b16939bd37a 100755
--- a/egs/aishell/v1/run.sh
+++ b/egs/aishell/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 Beijing Shell Shell Tech. Co. Ltd. (Authors: Hui Bu)
 #           2017 Jiayu Du
 #           2017 Chao Li
diff --git a/egs/aishell2/s5/local/chain/compare_wer.sh b/egs/aishell2/s5/local/chain/compare_wer.sh
index c66a861c3f3..e5730df9848 100755
--- a/egs/aishell2/s5/local/chain/compare_wer.sh
+++ b/egs/aishell2/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018  Emotech LTD (Author: Xuechen LIU)
 # Apache 2.0
diff --git a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh
index 86c9becac5b..c1cc56ea3c6 100755
--- a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is the original baseline scripts, which is supposed to be deprecated.
 
diff --git a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh
index d8560e63909..f1bfaf8d373 100755
--- a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _1b is as _1a, but with pitch feats, i-vector and dropout schedule added, referenced from wsj
 
diff --git a/egs/aishell2/s5/local/nnet3/compare_wer.sh b/egs/aishell2/s5/local/nnet3/compare_wer.sh
index 84dda2fda14..66c1f640704 100755
--- a/egs/aishell2/s5/local/nnet3/compare_wer.sh
+++ b/egs/aishell2/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018 Emotech LTD (Author: Xuechen LIU)
 # Apache 2.0
diff --git a/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1a.sh b/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1a.sh
index 34ca1f0f224..130aee3cb5e 100755
--- a/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on swbd/s5c/local/nnet3/run_tdnn.sh
 
diff --git a/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1b.sh b/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1b.sh
index ea3a59e90ee..a6fa46f1444 100755
--- a/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1b.sh
+++ b/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on run_tdnn_1a.sh, but with pitch features applied
 
diff --git a/egs/aishell2/s5/local/prepare_all.sh b/egs/aishell2/s5/local/prepare_all.sh
index 3928eb95ca3..b9b9bb271ec 100755
--- a/egs/aishell2/s5/local/prepare_all.sh
+++ b/egs/aishell2/s5/local/prepare_all.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
 #           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
diff --git a/egs/aishell2/s5/local/prepare_data.sh b/egs/aishell2/s5/local/prepare_data.sh
index 4be9664ac31..6e0538155bb 100755
--- a/egs/aishell2/s5/local/prepare_data.sh
+++ b/egs/aishell2/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
 #           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
 # Apache 2.0
diff --git a/egs/aishell2/s5/local/prepare_dict.sh b/egs/aishell2/s5/local/prepare_dict.sh
index 56ab885ae94..9df3d73f972 100755
--- a/egs/aishell2/s5/local/prepare_dict.sh
+++ b/egs/aishell2/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
 #           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
 # Apache 2.0
diff --git a/egs/aishell2/s5/local/run_gmm.sh b/egs/aishell2/s5/local/run_gmm.sh
index 569e5ab570a..f32dde55348 100755
--- a/egs/aishell2/s5/local/run_gmm.sh
+++ b/egs/aishell2/s5/local/run_gmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
 #           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
 #           2018 Emotech LTD (Author: Xuechen LIU)
diff --git a/egs/aishell2/s5/local/score.sh b/egs/aishell2/s5/local/score.sh
index a9786169973..d283ceb68dc 100755
--- a/egs/aishell2/s5/local/score.sh
+++ b/egs/aishell2/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 set -x
diff --git a/egs/aishell2/s5/local/train_lms.sh b/egs/aishell2/s5/local/train_lms.sh
index 179a7b78e14..0efeb2d2fd4 100755
--- a/egs/aishell2/s5/local/train_lms.sh
+++ b/egs/aishell2/s5/local/train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
 #           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
 # Apache 2.0
diff --git a/egs/aishell2/s5/run.sh b/egs/aishell2/s5/run.sh
index 8afdd3ed310..ffa4268081b 100755
--- a/egs/aishell2/s5/run.sh
+++ b/egs/aishell2/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
 #           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
diff --git a/egs/ami/s5/local/ami_beamform.sh b/egs/ami/s5/local/ami_beamform.sh
index b5ff8c23ba8..dd4cf22ac7b 100755
--- a/egs/ami/s5/local/ami_beamform.sh
+++ b/egs/ami/s5/local/ami_beamform.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 # Apache 2.0
diff --git a/egs/ami/s5/local/ami_download.sh b/egs/ami/s5/local/ami_download.sh
index cba130c8467..8c48a16ae7a 100755
--- a/egs/ami/s5/local/ami_download.sh
+++ b/egs/ami/s5/local/ami_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski, Jonathan Kilgour)
 # Copyright 2015, Brno University of Technology (Author: Karel Vesely)
@@ -56,7 +56,7 @@ wgetfile=$wdir/wget_$mic.sh
 manifest="wget --continue -O $adir/MANIFEST.TXT http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-0153-Tue-Oct-2-2018.manifest.txt"
 
 
-echo "#!/bin/bash" > $wgetfile
+echo "#!/usr/bin/env bash" > $wgetfile
 echo $manifest >> $wgetfile
 
 while read line; do
diff --git a/egs/ami/s5/local/ami_format_data.sh b/egs/ami/s5/local/ami_format_data.sh
index b69583850ab..c8c5ff0946d 100755
--- a/egs/ami/s5/local/ami_format_data.sh
+++ b/egs/ami/s5/local/ami_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/ami/s5/local/ami_ihm_data_prep.sh b/egs/ami/s5/local/ami_ihm_data_prep.sh
index 16949aef9b8..1a31ee0c233 100755
--- a/egs/ami/s5/local/ami_ihm_data_prep.sh
+++ b/egs/ami/s5/local/ami_ihm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 # AMI Corpus training data preparation 
diff --git a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh
index 7112e0259a0..b8f9614c907 100755
--- a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh
+++ b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 # AMI Corpus dev/eval data preparation 
diff --git a/egs/ami/s5/local/ami_mdm_data_prep.sh b/egs/ami/s5/local/ami_mdm_data_prep.sh
index 22cebd1ea11..427bf49cd0b 100755
--- a/egs/ami/s5/local/ami_mdm_data_prep.sh
+++ b/egs/ami/s5/local/ami_mdm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 # AMI Corpus dev/eval data preparation 
diff --git a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh
index 9c4b55308f2..c05e80169c2 100755
--- a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh
+++ b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 # AMI Corpus dev/eval data preparation
diff --git a/egs/ami/s5/local/ami_prepare_dict.sh b/egs/ami/s5/local/ami_prepare_dict.sh
index 1834cfd112b..26f75e83e1d 100755
--- a/egs/ami/s5/local/ami_prepare_dict.sh
+++ b/egs/ami/s5/local/ami_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #adapted from fisher dict preparation script, Author: Pawel Swietojanski
 
diff --git a/egs/ami/s5/local/ami_sdm_data_prep.sh b/egs/ami/s5/local/ami_sdm_data_prep.sh
index ea92055e089..055dd61aaa2 100755
--- a/egs/ami/s5/local/ami_sdm_data_prep.sh
+++ b/egs/ami/s5/local/ami_sdm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 # AMI Corpus dev/eval data preparation 
diff --git a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh
index 815e1b2d270..ec6b7933df7 100755
--- a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh
+++ b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 # AMI Corpus dev/eval data preparation
diff --git a/egs/ami/s5/local/ami_text_prep.sh b/egs/ami/s5/local/ami_text_prep.sh
index 777c3d8b086..eace6dfc1c7 100755
--- a/egs/ami/s5/local/ami_text_prep.sh
+++ b/egs/ami/s5/local/ami_text_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Brno University of Technology (Author: Karel Vesely)
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski), 2014, Apache 2.0
diff --git a/egs/ami/s5/local/ami_train_lms.sh b/egs/ami/s5/local/ami_train_lms.sh
index 493a3edb5da..54ad87880a4 100755
--- a/egs/ami/s5/local/ami_train_lms.sh
+++ b/egs/ami/s5/local/ami_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal, Pawel Swietojanski
 
diff --git a/egs/ami/s5/local/ami_xml2text.sh b/egs/ami/s5/local/ami_xml2text.sh
index c4b90a33702..6ccf28c12b8 100755
--- a/egs/ami/s5/local/ami_xml2text.sh
+++ b/egs/ami/s5/local/ami_xml2text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright, University of Edinburgh (Pawel Swietojanski and Jonathan Kilgour)
 
diff --git a/egs/ami/s5/local/beamformit.sh b/egs/ami/s5/local/beamformit.sh
index f50716d8872..563b303ecc0 100755
--- a/egs/ami/s5/local/beamformit.sh
+++ b/egs/ami/s5/local/beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 
diff --git a/egs/ami/s5/local/chain/run_blstm_ami_5.sh b/egs/ami/s5/local/chain/run_blstm_ami_5.sh
index 53221a2bd53..aade87d9a61 100755
--- a/egs/ami/s5/local/chain/run_blstm_ami_5.sh
+++ b/egs/ami/s5/local/chain/run_blstm_ami_5.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 ###
diff --git a/egs/ami/s5/local/chain/run_chain_common.sh b/egs/ami/s5/local/chain/run_chain_common.sh
index a7ed2a8dbab..f74ba71e2a0 100755
--- a/egs/ami/s5/local/chain/run_chain_common.sh
+++ b/egs/ami/s5/local/chain/run_chain_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script has common stages shared across AMI chain recipes
 set -e
diff --git a/egs/ami/s5/local/chain/run_tdnn_ami_5.sh b/egs/ami/s5/local/chain/run_tdnn_ami_5.sh
index df635316127..b63987d1534 100755
--- a/egs/ami/s5/local/chain/run_tdnn_ami_5.sh
+++ b/egs/ami/s5/local/chain/run_tdnn_ami_5.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #adapted from swbd's local/chain/6z.sh script. We change the TDNN config
 # These are the other modifications:
diff --git a/egs/ami/s5/local/confidence_calibration.sh b/egs/ami/s5/local/confidence_calibration.sh
index d1217afe0d0..87be0061803 100755
--- a/egs/ami/s5/local/confidence_calibration.sh
+++ b/egs/ami/s5/local/confidence_calibration.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 . ./cmd.sh
 . ./path.sh
 
diff --git a/egs/ami/s5/local/nnet/prepare_ivectors.sh b/egs/ami/s5/local/nnet/prepare_ivectors.sh
index 5be120d600e..8b62bcc11bb 100755
--- a/egs/ami/s5/local/nnet/prepare_ivectors.sh
+++ b/egs/ami/s5/local/nnet/prepare_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016, Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/ami/s5/local/nnet3/prepare_parallel_datadirs.sh b/egs/ami/s5/local/nnet3/prepare_parallel_datadirs.sh
index cbf47682b1e..189f1b69cd8 100755
--- a/egs/ami/s5/local/nnet3/prepare_parallel_datadirs.sh
+++ b/egs/ami/s5/local/nnet3/prepare_parallel_datadirs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script creates a new data directory data/$new_mic
 # where the train, dev and eval directories are copied from $original_mic
diff --git a/egs/ami/s5/local/nnet3/prepare_parallel_perturbed_alignments.sh b/egs/ami/s5/local/nnet3/prepare_parallel_perturbed_alignments.sh
index 458d31c200a..520a2bc9d84 100755
--- a/egs/ami/s5/local/nnet3/prepare_parallel_perturbed_alignments.sh
+++ b/egs/ami/s5/local/nnet3/prepare_parallel_perturbed_alignments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script creates the parallel data dir based on ihm data,
 # creates speed perturbed versions of this parallel data
diff --git a/egs/ami/s5/local/nnet3/prepare_perturbed_alignments.sh b/egs/ami/s5/local/nnet3/prepare_perturbed_alignments.sh
index 70c429041ca..f9fe0a85ab3 100755
--- a/egs/ami/s5/local/nnet3/prepare_perturbed_alignments.sh
+++ b/egs/ami/s5/local/nnet3/prepare_perturbed_alignments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script creates speed perturbed versions of the training data
 # and generates the corresponding alignments
diff --git a/egs/ami/s5/local/nnet3/run_ivector_common.sh b/egs/ami/s5/local/nnet3/run_ivector_common.sh
index 649f87f33d8..6eedd3df00d 100755
--- a/egs/ami/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/ami/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script contains some common (shared) parts of the run_nnet*.sh scripts.
 # speed perturbation is done for the training data
diff --git a/egs/ami/s5/local/nnet3/run_lstm.sh b/egs/ami/s5/local/nnet3/run_lstm.sh
index b920482252a..d0b3aec1a3c 100755
--- a/egs/ami/s5/local/nnet3/run_lstm.sh
+++ b/egs/ami/s5/local/nnet3/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Johns Hopkins University (Author: Daniel Povey).
 #           2015  Vijayaditya Peddinti
diff --git a/egs/ami/s5/local/nnet3/run_tdnn.sh b/egs/ami/s5/local/nnet3/run_tdnn.sh
index 2175d3bcc66..6c2dd913670 100755
--- a/egs/ami/s5/local/nnet3/run_tdnn.sh
+++ b/egs/ami/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is the standard "tdnn" system, built in nnet3; it's what we use to
 # call multi-splice.
diff --git a/egs/ami/s5/local/online/run_nnet2_common.sh b/egs/ami/s5/local/online/run_nnet2_common.sh
index d03c491f805..ccaeb024e36 100755
--- a/egs/ami/s5/local/online/run_nnet2_common.sh
+++ b/egs/ami/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script contains some common (shared) parts of the run_nnet*.sh scripts.
 
diff --git a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh
index a6c2d02b7af..0f229fb5e7e 100755
--- a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh
+++ b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 #           2014  Tom Ko
diff --git a/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh b/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh
index 9b8d7effd95..7811ef889f6 100755
--- a/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh
+++ b/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script does discriminative training on top of the online, multi-splice
diff --git a/egs/ami/s5/local/score.sh b/egs/ami/s5/local/score.sh
index 6a077c39644..9819a0f56cc 100755
--- a/egs/ami/s5/local/score.sh
+++ b/egs/ami/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012
 # Copyright University of Edinburgh (Author: Pawel Swietojanski) 2014
diff --git a/egs/ami/s5/local/score_asclite.sh b/egs/ami/s5/local/score_asclite.sh
index 741591005a5..4c937a94ef2 100755
--- a/egs/ami/s5/local/score_asclite.sh
+++ b/egs/ami/s5/local/score_asclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 # 2014, University of Edinburgh, (Author: Pawel Swietojanski)
 # 2015, Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/ami/s5/local/tfrnnlm/rnnlm_data_prep.sh b/egs/ami/s5/local/tfrnnlm/rnnlm_data_prep.sh
index 3456a77ca55..de5fa8ee7ff 100755
--- a/egs/ami/s5/local/tfrnnlm/rnnlm_data_prep.sh
+++ b/egs/ami/s5/local/tfrnnlm/rnnlm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script prepares the data directory used for TensorFlow based RNNLM traiing
 # it prepares the following files in the output-directory
diff --git a/egs/ami/s5/local/tfrnnlm/run_lstm.sh b/egs/ami/s5/local/tfrnnlm/run_lstm.sh
index d68fadb10f3..58986991271 100755
--- a/egs/ami/s5/local/tfrnnlm/run_lstm.sh
+++ b/egs/ami/s5/local/tfrnnlm/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 mic=ihm
 ngram_order=4 # this option when used, the rescoring binary makes an approximation
     # to merge the states of the FST generated from RNNLM. e.g. if ngram-order = 4
diff --git a/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh b/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
index 4cc71b55b5c..ae4f26e9cc4 100755
--- a/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
+++ b/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 mic=ihm
 ngram_order=3 # this option when used, the rescoring binary makes an approximation
     # to merge the states of the FST generated from RNNLM. e.g. if ngram-order = 4
diff --git a/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh b/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh
index 7a95f38ba1e..32b6e0ae2c7 100755
--- a/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh
+++ b/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 mic=ihm
 ngram_order=4 # this option when used, the rescoring binary makes an approximation
     # to merge the states of the FST generated from RNNLM. e.g. if ngram-order = 4
diff --git a/egs/ami/s5b/local/ami_beamform.sh b/egs/ami/s5b/local/ami_beamform.sh
index 3397bcf2ab0..ea8ec02af52 100755
--- a/egs/ami/s5b/local/ami_beamform.sh
+++ b/egs/ami/s5b/local/ami_beamform.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 # Apache 2.0
diff --git a/egs/ami/s5b/local/ami_download.sh b/egs/ami/s5b/local/ami_download.sh
index ef7b684df2b..bae72d1716a 100755
--- a/egs/ami/s5b/local/ami_download.sh
+++ b/egs/ami/s5b/local/ami_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2014  University of Edinburgh (Author: Pawel Swietojanski, Jonathan Kilgour)
 #            2015  Brno University of Technology (Author: Karel Vesely)
@@ -59,7 +59,7 @@ wgetfile=$wdir/wget_$mic.sh
 manifest="wget --continue -O $adir/MANIFEST.TXT http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-04237-Sun-Jun-15-2014.manifest.txt"
 license="wget --continue -O $adir/LICENCE.TXT http://groups.inf.ed.ac.uk/ami/download/temp/Creative-Commons-Attribution-NonCommercial-ShareAlike-2.5.txt"
 
-echo "#!/bin/bash" > $wgetfile
+echo "#!/usr/bin/env bash" > $wgetfile
 echo $manifest >> $wgetfile
 echo $license >> $wgetfile
 while read line; do
diff --git a/egs/ami/s5b/local/ami_format_data.sh b/egs/ami/s5b/local/ami_format_data.sh
index b69583850ab..c8c5ff0946d 100755
--- a/egs/ami/s5b/local/ami_format_data.sh
+++ b/egs/ami/s5b/local/ami_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/ami/s5b/local/ami_ihm_data_prep.sh b/egs/ami/s5b/local/ami_ihm_data_prep.sh
index 8ffa1f1e9c5..04cc6a4a68e 100755
--- a/egs/ami/s5b/local/ami_ihm_data_prep.sh
+++ b/egs/ami/s5b/local/ami_ihm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  University of Edinburgh (Author: Pawel Swietojanski)
 #           2016  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh
index c54876331f1..2fe5a9db33d 100755
--- a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh
+++ b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # Copyright 2014  University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/ami/s5b/local/ami_mdm_data_prep.sh b/egs/ami/s5b/local/ami_mdm_data_prep.sh
index d100347a356..a7a0d4fbb31 100755
--- a/egs/ami/s5b/local/ami_mdm_data_prep.sh
+++ b/egs/ami/s5b/local/ami_mdm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # Copyright 2014  University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh
index 475ef5405ba..051079b0c1d 100755
--- a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh
+++ b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  University of Edinburgh (Author: Pawel Swietojanski)
 #           2016  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/ami/s5b/local/ami_prepare_dict.sh b/egs/ami/s5b/local/ami_prepare_dict.sh
index 1834cfd112b..26f75e83e1d 100755
--- a/egs/ami/s5b/local/ami_prepare_dict.sh
+++ b/egs/ami/s5b/local/ami_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #adapted from fisher dict preparation script, Author: Pawel Swietojanski
 
diff --git a/egs/ami/s5b/local/ami_sdm_data_prep.sh b/egs/ami/s5b/local/ami_sdm_data_prep.sh
index 327595070a6..9099b3d9a9e 100755
--- a/egs/ami/s5b/local/ami_sdm_data_prep.sh
+++ b/egs/ami/s5b/local/ami_sdm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # Copyright 2014  University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh
index 580880818fc..d0711b9d71e 100755
--- a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh
+++ b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 #           2016  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/ami/s5b/local/ami_text_prep.sh b/egs/ami/s5b/local/ami_text_prep.sh
index 9170c6729ea..3dbe37a8f4e 100755
--- a/egs/ami/s5b/local/ami_text_prep.sh
+++ b/egs/ami/s5b/local/ami_text_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Brno University of Technology (Author: Karel Vesely)
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski), 2014, Apache 2.0
diff --git a/egs/ami/s5b/local/ami_train_lms.sh b/egs/ami/s5b/local/ami_train_lms.sh
index 104b4ac5dd8..652eb37d20f 100755
--- a/egs/ami/s5b/local/ami_train_lms.sh
+++ b/egs/ami/s5b/local/ami_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal, Pawel Swietojanski
 
diff --git a/egs/ami/s5b/local/ami_xml2text.sh b/egs/ami/s5b/local/ami_xml2text.sh
index c4b90a33702..6ccf28c12b8 100755
--- a/egs/ami/s5b/local/ami_xml2text.sh
+++ b/egs/ami/s5b/local/ami_xml2text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright, University of Edinburgh (Pawel Swietojanski and Jonathan Kilgour)
 
diff --git a/egs/ami/s5b/local/beamformit.sh b/egs/ami/s5b/local/beamformit.sh
index f50716d8872..563b303ecc0 100755
--- a/egs/ami/s5b/local/beamformit.sh
+++ b/egs/ami/s5b/local/beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
 
diff --git a/egs/ami/s5b/local/chain/compare_wer_general.sh b/egs/ami/s5b/local/chain/compare_wer_general.sh
index 73118bf198d..808b26d0fd0 100755
--- a/egs/ami/s5b/local/chain/compare_wer_general.sh
+++ b/egs/ami/s5b/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 mic=$1;
 shift;
diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh
index 4d260e3c517..586398ce085 100755
--- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh
+++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on swbd 7q TDNN-F recipe 
 # with resnet-style skip connections, more layers,
diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh
index 3546b6a7ced..f2ab59abf86 100755
--- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is a chain-training script with TDNN+LSTM neural networks.
 # This script is based on local/chain/tuning/run_tdnn_lstm_1i.sh, but adding
diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh
index 1a839b045bd..f5190f2026b 100755
--- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is a chain-training script with TDNN+LSTM neural networks.
 # This script is similar to local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh,
diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
index d926c1dc6d7..8f656fa6b82 100644
--- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # cnn_tdnn_lstm_1a is based on tdnn_lstm_1j, but adding the cnn front end, and
 # replacing all renorm in tdnn layers with batchnorm
diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh
index d9cd1c356e8..d9c4620f27b 100644
--- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # cnn_tdnn_lstm_1b is based on cnn_tdnn_lstm_1a, but adding dropout and
 # proportional-shrink with value 5
diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh
index a0805b4f9f1..5b4cc5b3d4a 100755
--- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # cnn_tdnn_lstm_1c is based on cnn_tdnn_lstm_1b, but using smaller dropout-schedule
 # and larger decay-time option(40).
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh
index 03ebc5845e4..6bb2698acb9 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is a chain-training script with TDNN neural networks.
 # Please see RESULTS_* for examples of command lines invoking this script.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh
index 997357b80a9..4e2fb5d3070 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is a chain-training script with TDNN neural networks.
 # Please see RESULTS_* for examples of command lines invoking this script.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh
index 4d062e65429..6ada60c7047 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1b but with shorter minibatches
 
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh
index 387570388d0..8d3f1ab95d7 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1b but uses PCA instead of
 # LDA features for the ivector extractor.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh
index 0436b08cdc0..b8497c809c4 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1b but uses batchnorm components instead of renorm
 
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh
index 4ca526d63b8..3e041d4a4cc 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1e but uses batchnorm components instead of renorm also adding
 # proportional-shrink 10, trained with 4 epochs
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh
index baed760bb68..dcf1d7b03a4 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1e but uses batchnorm components instead of renorm also adding
 # proportional-shrink 10, trained with 6 epochs
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh
index e721a858c0a..aca5b26b69b 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1g but adding two non-splicing layers towards the beginning
 # of the network, trained with 9 epochs.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh
index de40cb2d1a4..89390de6690 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1h but replacing proportional-shrink with l2-regularize.
 # The results match those from 1h.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh
index 80b2aee60e9..c6a12b1f4f9 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #  1j is same as swbd 7q. It uses modified topology with resnet-style skip connections, more layers,
 #  skinnier bottlenecks.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
index 4f580b88f6b..f6c65cc0826 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # TDNN+LSTM architecture similar to swbd/tdnn_lstm_1b
 # results on sdm1 with ihm ali
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh
index 904a079d7de..3576179baa9 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1a but the neural network has two more TDNN layers (0,3 0,3)
 # above the lstm
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh
index 511e520465a..dc2705ca577 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1a, but with more TDNN layers between each LSTM
 # results on sdm1 with ihm ali
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh
index bd81b7df4eb..0a30490c9e7 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1c, but with more TDNN layers between each LSTM
 # results on sdm1 with ihm ali
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh
index 50903e78b6d..f41b83fd448 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1c but with only right context for the TDNNs i.e., (0,3) in place
 # of (-3,0,3)
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh
index f6c53001498..78653cd867b 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1a but the neural network has two more TDNN layers (0,3 0,3)
 # above the lstm
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh
index 79fd9ef3fb5..8cd21a28715 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1c but with smaller minibatch
 # using smaller minibatches seems to be better in TDNN+LSTM archs.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh
index e58a7f89e03..e0ac6fc7e8f 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1c but with one more stack of TDNN and LSTM layers
 # results on sdm1 using ihm ali
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh
index 13f894f5a48..b567bcb4527 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1g but with TDNN output dim 1024 instead of 512
 # (num-params 1g:21309812 1i: 43447156)
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh
index 48b31832e8c..806b305a847 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1j is same as 1i but with changes related to fast-lstmp layer
 # changed num-chunk-per-minibatch to be variable
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh
index e675bc494bb..63430d903c1 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1k is same as 1j but with  smaller delay on the first lstm layer
 # there is a 37% increase in training time 11hrs vs 8hrs and the gains are modest
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh
index 2d019398274..5ab8333b043 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This (1l.sh) is the same as 1i but with per-frame dropout on LSTM layer
 # It is a regular (non-fast) LSTM with per-frame dropout on [i, f, o] gates of the LSTM,
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh
index 9e5b971bbe2..a86bab5055a 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This (1m.sh) is the same as 1j but with per-frame dropout on LSTM layer
 # It is a fast LSTM with per-frame dropout on [i, f, o] gates of the LSTM,
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh
index 9575c3cf686..ab3354675e1 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1i but with batchnorm replacing all renorm in TDNN
 # and using proportional-shrink with value 10, this model uses
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh
index a7f2625c181..c260601be75 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1n but replacing proportional-shrink with l2-regularize.
 # Also applied similar changes from 1i to 1j:
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh
index ca920869b30..58c11f4238e 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as tdnn_lstm_1o but use backstitch training.
 # Also num-epochs and l2-regularize are tuned for best performance.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh
index 53dbd5238db..9fd2006aa03 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh
index dafef668e60..3948cf39566 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh
index 677946d0b9a..5bc025d90ef 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
diff --git a/egs/ami/s5b/local/nnet3/multi_condition/run_ivector_common.sh b/egs/ami/s5b/local/nnet3/multi_condition/run_ivector_common.sh
index 5ba35fa421c..1112a7a2968 100755
--- a/egs/ami/s5b/local/nnet3/multi_condition/run_ivector_common.sh
+++ b/egs/ami/s5b/local/nnet3/multi_condition/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/ami/s5b/local/nnet3/prepare_lores_feats.sh b/egs/ami/s5b/local/nnet3/prepare_lores_feats.sh
index efa0046bd62..b98abe32eca 100755
--- a/egs/ami/s5b/local/nnet3/prepare_lores_feats.sh
+++ b/egs/ami/s5b/local/nnet3/prepare_lores_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/ami/s5b/local/nnet3/run_ivector_common.sh b/egs/ami/s5b/local/nnet3/run_ivector_common.sh
index e67d1039c40..7da982d49f9 100755
--- a/egs/ami/s5b/local/nnet3/run_ivector_common.sh
+++ b/egs/ami/s5b/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/ami/s5b/local/nnet3/run_lstm.sh b/egs/ami/s5b/local/nnet3/run_lstm.sh
index c5583e2d0ef..9b544706d36 100755
--- a/egs/ami/s5b/local/nnet3/run_lstm.sh
+++ b/egs/ami/s5b/local/nnet3/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #    This is the standard "lstm" system, built in nnet3.
 # Please see RESULTS_* for examples of command lines invoking this script.
diff --git a/egs/ami/s5b/local/nnet3/run_tdnn.sh b/egs/ami/s5b/local/nnet3/run_tdnn.sh
index cc6b60696b1..a2af870c8a1 100755
--- a/egs/ami/s5b/local/nnet3/run_tdnn.sh
+++ b/egs/ami/s5b/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #    This is the standard "tdnn" system, built in nnet3.
 # Please see RESULTS_* for examples of command lines invoking this script.
diff --git a/egs/ami/s5b/local/prepare_parallel_train_data.sh b/egs/ami/s5b/local/prepare_parallel_train_data.sh
index ad22ad7cf22..63b303d0a85 100755
--- a/egs/ami/s5b/local/prepare_parallel_train_data.sh
+++ b/egs/ami/s5b/local/prepare_parallel_train_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script creates a new data directory data/sdm1/train_cleanali or
 # data/mdm8/train_cleanali which has the segment ids from (e.g.) data/sdm1/train
diff --git a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1a.sh b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
index 0c38955cc32..3b09e3dfbde 100755
--- a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
+++ b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1b.sh b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
index eca8421b0f2..e892bb483f9 100755
--- a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
+++ b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
index 00a6edb8125..769d1e00bc9 100755
--- a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
+++ b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/ami/s5b/local/run_cleanup_segmentation.sh b/egs/ami/s5b/local/run_cleanup_segmentation.sh
index e2f0b0516ce..81d1fce9721 100755
--- a/egs/ami/s5b/local/run_cleanup_segmentation.sh
+++ b/egs/ami/s5b/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/ami/s5b/local/score.sh b/egs/ami/s5b/local/score.sh
index 6a077c39644..9819a0f56cc 100755
--- a/egs/ami/s5b/local/score.sh
+++ b/egs/ami/s5b/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012
 # Copyright University of Edinburgh (Author: Pawel Swietojanski) 2014
diff --git a/egs/ami/s5b/local/score_asclite.sh b/egs/ami/s5b/local/score_asclite.sh
index 7327f6246af..ad6243a6176 100755
--- a/egs/ami/s5b/local/score_asclite.sh
+++ b/egs/ami/s5b/local/score_asclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 # 2014, University of Edinburgh, (Author: Pawel Swietojanski)
 # 2015, Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/ami/s5b/run.sh b/egs/ami/s5b/run.sh
index eacc69a6845..79989f17004 100755
--- a/egs/ami/s5b/run.sh
+++ b/egs/ami/s5b/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 . ./path.sh
diff --git a/egs/an4/s5/local/download_and_untar.sh b/egs/an4/s5/local/download_and_untar.sh
index 81919284da7..ec55749768d 100755
--- a/egs/an4/s5/local/download_and_untar.sh
+++ b/egs/an4/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Allen Guo
 # Copyright 2014  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/an4/s5/run.sh b/egs/an4/s5/run.sh
index eef699edef7..dd9bbe03732 100755
--- a/egs/an4/s5/run.sh
+++ b/egs/an4/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Allen Guo
 
diff --git a/egs/apiai_decode/s5/download-model.sh b/egs/apiai_decode/s5/download-model.sh
index 11a00cb0979..4af6ad58b3d 100755
--- a/egs/apiai_decode/s5/download-model.sh
+++ b/egs/apiai_decode/s5/download-model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Downlaods Api.ai chain model into exp/api.ai-model/ (will replace one if exists)
 
 DOWNLOAD_URL="https://github.com/api-ai/api-ai-english-asr-model/releases/download/1.0/api.ai-kaldi-asr-model.zip"
diff --git a/egs/apiai_decode/s5/local/create-corpus.sh b/egs/apiai_decode/s5/local/create-corpus.sh
index 8071aa226de..8f023d842b7 100755
--- a/egs/apiai_decode/s5/local/create-corpus.sh
+++ b/egs/apiai_decode/s5/local/create-corpus.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Checking arguments
 if [ $# -le 1 ]; then
diff --git a/egs/apiai_decode/s5/recognize-wav.sh b/egs/apiai_decode/s5/recognize-wav.sh
index d76b6293642..c2049bcdb11 100755
--- a/egs/apiai_decode/s5/recognize-wav.sh
+++ b/egs/apiai_decode/s5/recognize-wav.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2016 Api.ai (Author: Ilya Platonov)
 # Apache 2.0
 
diff --git a/egs/aspire/s5/local/build_silprob.sh b/egs/aspire/s5/local/build_silprob.sh
index fbba50990c6..d4367f7f0ed 100755
--- a/egs/aspire/s5/local/build_silprob.sh
+++ b/egs/aspire/s5/local/build_silprob.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
diff --git a/egs/aspire/s5/local/chain/compare_wer_general.sh b/egs/aspire/s5/local/chain/compare_wer_general.sh
index 7b85dc373e0..73627bd585c 100755
--- a/egs/aspire/s5/local/chain/compare_wer_general.sh
+++ b/egs/aspire/s5/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer_general.sh exp/chain/tdnn_7b exp/chain/tdnn_lstm_1a
diff --git a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh
index bd13010c791..fce0e3ec40e 100755
--- a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh
+++ b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
diff --git a/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh b/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh
index b5979a3ce6b..0447bebcec0 100755
--- a/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh
+++ b/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh
index cd548142598..70972f7ae37 100755
--- a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh
+++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
@@ -44,7 +44,7 @@ lang=data/lang_chain
 # The iVector-extraction and feature-dumping parts are the same as the standard
 # nnet3 setup, and you can skip them by setting "--stage 8" if you have already
 # run those things.
-local/nnet3/run_ivector_common.sh --stage $stage --num-data-reps 3|| exit 1;
+local/nnet3/run_ivector_common.sh --stage $stage --num-data-reps ${num_data_reps} || exit 1;
 
 if [ $stage -le 7 ]; then
   # Create a version of the lang/ directory that has one state per phone in the
@@ -92,8 +92,8 @@ if [ $stage -le 9 ]; then
 
  # combine the non-hires features for alignments/lattices
  rm -rf data/${latgen_train_set}_min${min_seg_len}
-  utt_prefix="THISISUNIQUESTRING_"
-  spk_prefix="THISISUNIQUESTRING_"
+  utt_prefix="THISISUNIQUESTRING-"
+  spk_prefix="THISISUNIQUESTRING-"
   utils/copy_data_dir.sh --spk-prefix "$spk_prefix" --utt-prefix "$utt_prefix" \
     data/train data/train_temp_for_lats
   utils/data/combine_short_segments.sh \
diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh
index 5b35c902354..22c7cc3a867 100755
--- a/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh
+++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index f98dff5e6fa..eefd8cbccc2 100755
--- a/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
diff --git a/egs/aspire/s5/local/extract_vad_weights.sh b/egs/aspire/s5/local/extract_vad_weights.sh
index 95e36ad12da..19f1bf037b4 100755
--- a/egs/aspire/s5/local/extract_vad_weights.sh
+++ b/egs/aspire/s5/local/extract_vad_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016.  Apache 2.0.
 # This script converts lattices available from a first pass decode into a per-frame weights file
diff --git a/egs/aspire/s5/local/fisher_create_test_lang.sh b/egs/aspire/s5/local/fisher_create_test_lang.sh
index 6739de822aa..dfe590adf16 100755
--- a/egs/aspire/s5/local/fisher_create_test_lang.sh
+++ b/egs/aspire/s5/local/fisher_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/aspire/s5/local/fisher_data_prep.sh b/egs/aspire/s5/local/fisher_data_prep.sh
index f3ad3c3f5bd..900ee385768 100755
--- a/egs/aspire/s5/local/fisher_data_prep.sh
+++ b/egs/aspire/s5/local/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/aspire/s5/local/fisher_prepare_dict.sh b/egs/aspire/s5/local/fisher_prepare_dict.sh
index 577e2869c0b..c577ecf0c01 100755
--- a/egs/aspire/s5/local/fisher_prepare_dict.sh
+++ b/egs/aspire/s5/local/fisher_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 # To be run from one directory above this script.
diff --git a/egs/aspire/s5/local/fisher_train_lms.sh b/egs/aspire/s5/local/fisher_train_lms.sh
index d338b82adef..bd2fddc3ac0 100755
--- a/egs/aspire/s5/local/fisher_train_lms.sh
+++ b/egs/aspire/s5/local/fisher_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/aspire/s5/local/generate_uniformly_segmented_data_dir.sh b/egs/aspire/s5/local/generate_uniformly_segmented_data_dir.sh
index 2ceb4a4cf05..4100f500bab 100755
--- a/egs/aspire/s5/local/generate_uniformly_segmented_data_dir.sh
+++ b/egs/aspire/s5/local/generate_uniformly_segmented_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Vijayaditya Peddinti, 2016.
 # Apache 2.0.
diff --git a/egs/aspire/s5/local/lattice_to_ctm.sh b/egs/aspire/s5/local/lattice_to_ctm.sh
index e5c88510ac8..aa882de5484 100755
--- a/egs/aspire/s5/local/lattice_to_ctm.sh
+++ b/egs/aspire/s5/local/lattice_to_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/aspire/s5/local/multi_condition/aspire_data_prep.sh b/egs/aspire/s5/local/multi_condition/aspire_data_prep.sh
index 6dd344463ba..b2d988cd2b3 100755
--- a/egs/aspire/s5/local/multi_condition/aspire_data_prep.sh
+++ b/egs/aspire/s5/local/multi_condition/aspire_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015  Johns Hopkins University (Author: Vijayaditya Peddinti)
 # Apache 2.0.
 set -e
diff --git a/egs/aspire/s5/local/multi_condition/check_version.sh b/egs/aspire/s5/local/multi_condition/check_version.sh
index 81c415a3d67..d432b4c3835 100755
--- a/egs/aspire/s5/local/multi_condition/check_version.sh
+++ b/egs/aspire/s5/local/multi_condition/check_version.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script to check the tool versions necessary for the aspire recipe
 function check_for_bad_sox {
diff --git a/egs/aspire/s5/local/multi_condition/copy_ali_dir.sh b/egs/aspire/s5/local/multi_condition/copy_ali_dir.sh
index 42ea2dc4b9d..9c7dc1637a3 100755
--- a/egs/aspire/s5/local/multi_condition/copy_ali_dir.sh
+++ b/egs/aspire/s5/local/multi_condition/copy_ali_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (author: Vijayaditya Peddinti)
 # Apache 2.0
diff --git a/egs/aspire/s5/local/multi_condition/decode.sh b/egs/aspire/s5/local/multi_condition/decode.sh
index b09c4780e71..538e581c169 100755
--- a/egs/aspire/s5/local/multi_condition/decode.sh
+++ b/egs/aspire/s5/local/multi_condition/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey).
 #           2014       Vijayaditya Peddinti
diff --git a/egs/aspire/s5/local/multi_condition/prep_test_aspire.sh b/egs/aspire/s5/local/multi_condition/prep_test_aspire.sh
index 14cc9a9b04f..3ac2b29d780 100755
--- a/egs/aspire/s5/local/multi_condition/prep_test_aspire.sh
+++ b/egs/aspire/s5/local/multi_condition/prep_test_aspire.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2015.  Apache 2.0.
 # This script generates the ctm files for dev_aspire, test_aspire and eval_aspire 
 # for scoring with ASpIRE scoring server.
diff --git a/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh b/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh
index 8297cdee9ca..b94e8b7b344 100755
--- a/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh
+++ b/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # set -e
 
 # Copyright 2014  Johns Hopkins University (Author: Vijayaditya Peddinti)
diff --git a/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh b/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh
index f637c69f7c7..1e6482b8503 100755
--- a/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh
+++ b/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Vijayaditya Peddinti)
 #           2015  Tom Ko
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_aalto.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_aalto.sh
index 381a809744c..4e3ff242e08 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_aalto.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_aalto.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015  Johns Hopkins University (author: Vijayaditya Peddinti)
 # Apache 2.0
 # This script downloads the Concert Hall Impulse Responses - Pori, Finland
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_air.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_air.sh
index 731c9e84317..3d57751934e 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_air.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_air.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015  Johns Hopkins University (author: Vijayaditya Peddinti)
 # Apache 2.0
 # This script downloads the Aachen impulse response database
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_c4dm.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_c4dm.sh
index be1628385f4..ff2ae6eee55 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_c4dm.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_c4dm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015  Johns Hopkins University (author: Vijayaditya Peddinti)
 # Apache 2.0
 # Room impulse responses from Center for Digital Music, Queen Mary University of London
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_mardy.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_mardy.sh
index df365c9c134..2c77a71a022 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_mardy.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_mardy.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015  Johns Hopkins University (author: Vijayaditya Peddinti)
 # Apache 2.0
 # This script downloads Multichannel Acoustic Reverberation Database at
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_openair.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_openair.sh
index a897671213b..8cdfe596dda 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_openair.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_openair.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015  Johns Hopkins University (author: Vijayaditya Peddinti)
 # Apache 2.0
 # This script downloads the impulse responses from http://www.openairlib.net/
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_rvb2014.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_rvb2014.sh
index 25617b22fdb..346bc8bf785 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_rvb2014.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_rvb2014.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015  Johns Hopkins University (author: Vijayaditya Peddinti)
 # Apache 2.0
 # This script downloads the impulse responses and noise files from the
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_rwcp.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_rwcp.sh
index f755a725fe4..c28e6f79952 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_rwcp.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_rwcp.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015  Johns Hopkins University (author: Vijayaditya Peddinti)
 # Apache 2.0
 # This script downloads the RWCP impulse responses and ambient noise
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_varechoic.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_varechoic.sh
index 6e2956e9a37..ee2ee848aa8 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_varechoic.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_varechoic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015  Johns Hopkins University (author: Vijayaditya Peddinti)
 # Apache 2.0
 # This script downloads the impulse responses from the Varechoic room
diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
index 9345dfc92ef..f8a201eef46 100755
--- a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
+++ b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #set -e
 # this script is based on local/online/run_nnet2_comman.sh
 # but it operates on corrupted training/dev/test data sets
diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh
index 56b2de399f2..e9cb025bb5d 100755
--- a/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh
+++ b/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is the "multi-splice" version of the online-nnet2 training script.
 # It's currently the best recipe for aspire.
diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh
index 129b1402cf4..43e5108019c 100755
--- a/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh
+++ b/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # this is run_nnet2_ms_disc.sh but with 4 jobs not 2 (and double the learning rate).
diff --git a/egs/aspire/s5/local/nnet3/decode.sh b/egs/aspire/s5/local/nnet3/decode.sh
index 8f965c51cf1..1eb599441fb 100755
--- a/egs/aspire/s5/local/nnet3/decode.sh
+++ b/egs/aspire/s5/local/nnet3/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016.  Apache 2.0.
 # This script generates the ctm files for dev_aspire, test_aspire and eval_aspire
diff --git a/egs/aspire/s5/local/nnet3/decode_online.sh b/egs/aspire/s5/local/nnet3/decode_online.sh
index 8a51e36b0a5..1e005820c59 100755
--- a/egs/aspire/s5/local/nnet3/decode_online.sh
+++ b/egs/aspire/s5/local/nnet3/decode_online.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016.  Apache 2.0.
 # This script does online decoding, unlike local/nnet3/decode.sh which does 2-pass decoding with
diff --git a/egs/aspire/s5/local/nnet3/run_autoencoder.sh b/egs/aspire/s5/local/nnet3/run_autoencoder.sh
index 3d16a97ab85..e1c94327cb0 100755
--- a/egs/aspire/s5/local/nnet3/run_autoencoder.sh
+++ b/egs/aspire/s5/local/nnet3/run_autoencoder.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is an example to show a "tdnn" system in raw nnet configuration
 # i.e. without a transition model
diff --git a/egs/aspire/s5/local/nnet3/run_blstm.sh b/egs/aspire/s5/local/nnet3/run_blstm.sh
index 8fe53fa4db1..87dde580333 100755
--- a/egs/aspire/s5/local/nnet3/run_blstm.sh
+++ b/egs/aspire/s5/local/nnet3/run_blstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # based on egs/fisher_swbd/s5/local/nnet3/run_lstm.sh
 
diff --git a/egs/aspire/s5/local/nnet3/run_ivector_common.sh b/egs/aspire/s5/local/nnet3/run_ivector_common.sh
index ea226c230af..e867933477a 100755
--- a/egs/aspire/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/aspire/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #set -e
 # this script is based on local/multicondition/run_nnet2_common.sh
 # minor corrections were made to dir names for nnet3
diff --git a/egs/aspire/s5/local/nnet3/run_tdnn.sh b/egs/aspire/s5/local/nnet3/run_tdnn.sh
index 8e6a45ccbb4..f32c2a76038 100755
--- a/egs/aspire/s5/local/nnet3/run_tdnn.sh
+++ b/egs/aspire/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is a script to train the nnet3 TDNN acoustic model
 
diff --git a/egs/aspire/s5/local/nnet3/segment_and_decode.sh b/egs/aspire/s5/local/nnet3/segment_and_decode.sh
index e8917d091e2..80394ae15f3 100755
--- a/egs/aspire/s5/local/nnet3/segment_and_decode.sh
+++ b/egs/aspire/s5/local/nnet3/segment_and_decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016.  Apache 2.0.
 # This script generates the ctm files for dev_aspire, test_aspire and eval_aspire
diff --git a/egs/aspire/s5/local/run_asr_segmentation.sh b/egs/aspire/s5/local/run_asr_segmentation.sh
index 095e47e99de..47977597c9b 100755
--- a/egs/aspire/s5/local/run_asr_segmentation.sh
+++ b/egs/aspire/s5/local/run_asr_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2017  Nagendra Kumar Goel
 #            2017  Vimal Manohar
diff --git a/egs/aspire/s5/local/run_data_cleaning.sh b/egs/aspire/s5/local/run_data_cleaning.sh
index 68b752ad577..7154da73e27 100755
--- a/egs/aspire/s5/local/run_data_cleaning.sh
+++ b/egs/aspire/s5/local/run_data_cleaning.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script shows how you can do data-cleaning, and exclude data that has a
diff --git a/egs/aspire/s5/local/score.sh b/egs/aspire/s5/local/score.sh
index 91f8e77bc99..65e0adf0fd4 100755
--- a/egs/aspire/s5/local/score.sh
+++ b/egs/aspire/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/aspire/s5/local/score_aspire.sh b/egs/aspire/s5/local/score_aspire.sh
index 9c08a6c85d1..d9f7762cb08 100755
--- a/egs/aspire/s5/local/score_aspire.sh
+++ b/egs/aspire/s5/local/score_aspire.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016.  Apache 2.0.
 # This script generates the ctm files, filters and scores them if an stm file is available
diff --git a/egs/aspire/s5/local/score_stm.sh b/egs/aspire/s5/local/score_stm.sh
index 7f559f7dd79..15257491eeb 100755
--- a/egs/aspire/s5/local/score_stm.sh
+++ b/egs/aspire/s5/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_asr_sad_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
index 438cd1f1d5e..8c9d521592b 100755
--- a/egs/aspire/s5/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
+++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017   Nagendra Kumar Goel
 #           2018   Vimal Manohar
diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index 80f9840f160..c789ff11630 100755
--- a/egs/aspire/s5/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017   Nagendra Kumar Goel
 #           2018   Vimal Manohar
diff --git a/egs/aspire/s5/run.sh b/egs/aspire/s5/run.sh
index 851363a7532..ee7b7762383 100755
--- a/egs/aspire/s5/run.sh
+++ b/egs/aspire/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #  ASpIRE submission, based on Fisher-english GMM-HMM system
 # (March 2015)
diff --git a/egs/aurora4/s5/local/aurora4_data_prep.sh b/egs/aurora4/s5/local/aurora4_data_prep.sh
index 6a42c9e543c..7c669b258ce 100755
--- a/egs/aurora4/s5/local/aurora4_data_prep.sh
+++ b/egs/aurora4/s5/local/aurora4_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/aurora4/s5/local/aurora4_format_data.sh b/egs/aurora4/s5/local/aurora4_format_data.sh
index 0b94f7f796d..668fc8e6305 100755
--- a/egs/aurora4/s5/local/aurora4_format_data.sh
+++ b/egs/aurora4/s5/local/aurora4_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/aurora4/s5/local/chain/compare_wer.sh b/egs/aurora4/s5/local/chain/compare_wer.sh
index 91701cad9e9..5f47c11420f 100755
--- a/egs/aurora4/s5/local/chain/compare_wer.sh
+++ b/egs/aurora4/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/aurora4/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aurora4/s5/local/chain/tuning/run_tdnn_1a.sh
index 8bc69f9c8cf..84f6eab457d 100755
--- a/egs/aurora4/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/aurora4/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1a is same as 1h setup in WSJ
 
diff --git a/egs/aurora4/s5/local/cstr_wsj_data_prep.sh b/egs/aurora4/s5/local/cstr_wsj_data_prep.sh
index 35582646d95..a5c327ec34c 100755
--- a/egs/aurora4/s5/local/cstr_wsj_data_prep.sh
+++ b/egs/aurora4/s5/local/cstr_wsj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/aurora4/s5/local/cstr_wsj_extend_dict.sh b/egs/aurora4/s5/local/cstr_wsj_extend_dict.sh
index b2a9faad704..9447cd1249b 100755
--- a/egs/aurora4/s5/local/cstr_wsj_extend_dict.sh
+++ b/egs/aurora4/s5/local/cstr_wsj_extend_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script builds a larger word-list and dictionary 
 # than used for the LMs supplied with the WSJ corpus.
diff --git a/egs/aurora4/s5/local/generate_example_kws.sh b/egs/aurora4/s5/local/generate_example_kws.sh
index 2c849438192..ecba20efbf1 100755
--- a/egs/aurora4/s5/local/generate_example_kws.sh
+++ b/egs/aurora4/s5/local/generate_example_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/aurora4/s5/local/kws_data_prep.sh b/egs/aurora4/s5/local/kws_data_prep.sh
index 5222a88c9ef..fecfda52473 100755
--- a/egs/aurora4/s5/local/kws_data_prep.sh
+++ b/egs/aurora4/s5/local/kws_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/aurora4/s5/local/nnet/run_dnn.sh b/egs/aurora4/s5/local/nnet/run_dnn.sh
index 680a6ca31f0..5deb2c805bc 100755
--- a/egs/aurora4/s5/local/nnet/run_dnn.sh
+++ b/egs/aurora4/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/aurora4/s5/local/nnet2/run_5b.sh b/egs/aurora4/s5/local/nnet2/run_5b.sh
index 676f340ecd1..6666eee769c 100755
--- a/egs/aurora4/s5/local/nnet2/run_5b.sh
+++ b/egs/aurora4/s5/local/nnet2/run_5b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 stage=0
diff --git a/egs/aurora4/s5/local/nnet2/run_5c.sh b/egs/aurora4/s5/local/nnet2/run_5c.sh
index 27f0db36f67..10b658b6da6 100755
--- a/egs/aurora4/s5/local/nnet2/run_5c.sh
+++ b/egs/aurora4/s5/local/nnet2/run_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is neural net training on top of adapted 40-dimensional features.
 # 
diff --git a/egs/aurora4/s5/local/nnet3/run_ivector_common.sh b/egs/aurora4/s5/local/nnet3/run_ivector_common.sh
index a489a273c6b..e13ed59b60e 100755
--- a/egs/aurora4/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/aurora4/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/aurora4/s5/local/run_basis_fmllr.sh b/egs/aurora4/s5/local/run_basis_fmllr.sh
index 4e412535dd5..f7ee77b5506 100755
--- a/egs/aurora4/s5/local/run_basis_fmllr.sh
+++ b/egs/aurora4/s5/local/run_basis_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/aurora4/s5/local/run_mmi_tri2b.sh b/egs/aurora4/s5/local/run_mmi_tri2b.sh
index 8a4d03c59c4..22b670c144e 100755
--- a/egs/aurora4/s5/local/run_mmi_tri2b.sh
+++ b/egs/aurora4/s5/local/run_mmi_tri2b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/aurora4/s5/local/run_mmi_tri4b.sh b/egs/aurora4/s5/local/run_mmi_tri4b.sh
index db34f8e1d84..cac895720a6 100755
--- a/egs/aurora4/s5/local/run_mmi_tri4b.sh
+++ b/egs/aurora4/s5/local/run_mmi_tri4b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 . ./cmd.sh
 
 steps/make_denlats.sh --nj 30 --sub-split 30 --cmd "$train_cmd" \
diff --git a/egs/aurora4/s5/local/run_nnet_cpu.sh b/egs/aurora4/s5/local/run_nnet_cpu.sh
index c72e521f18b..8693d7f7619 100755
--- a/egs/aurora4/s5/local/run_nnet_cpu.sh
+++ b/egs/aurora4/s5/local/run_nnet_cpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/aurora4/s5/local/run_raw_fmllr.sh b/egs/aurora4/s5/local/run_raw_fmllr.sh
index c4847a93f27..26989eb6982 100644
--- a/egs/aurora4/s5/local/run_raw_fmllr.sh
+++ b/egs/aurora4/s5/local/run_raw_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/align_raw_fmllr.sh --nj 10 --cmd "$train_cmd" --use-graphs true \
diff --git a/egs/aurora4/s5/local/run_rnnlms_sgmm5b.sh b/egs/aurora4/s5/local/run_rnnlms_sgmm5b.sh
index 67fcee50a93..867294d2e77 100755
--- a/egs/aurora4/s5/local/run_rnnlms_sgmm5b.sh
+++ b/egs/aurora4/s5/local/run_rnnlms_sgmm5b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 for test in dev93 eval92; do
 
diff --git a/egs/aurora4/s5/local/run_rnnlms_tri3b.sh b/egs/aurora4/s5/local/run_rnnlms_tri3b.sh
index fac8842f960..32d5f55af91 100755
--- a/egs/aurora4/s5/local/run_rnnlms_tri3b.sh
+++ b/egs/aurora4/s5/local/run_rnnlms_tri3b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/aurora4/s5/local/run_sgmm2.sh b/egs/aurora4/s5/local/run_sgmm2.sh
index 2eb70785bcb..c129ff47f2e 100755
--- a/egs/aurora4/s5/local/run_sgmm2.sh
+++ b/egs/aurora4/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is invoked from ../run.sh
 # It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
diff --git a/egs/aurora4/s5/local/score.sh b/egs/aurora4/s5/local/score.sh
index abd8149a672..332f038c575 100755
--- a/egs/aurora4/s5/local/score.sh
+++ b/egs/aurora4/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/aurora4/s5/local/score_combine.sh b/egs/aurora4/s5/local/score_combine.sh
index 65caab06ecc..c4d3c13886a 100755
--- a/egs/aurora4/s5/local/score_combine.sh
+++ b/egs/aurora4/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/aurora4/s5/local/score_mbr.sh b/egs/aurora4/s5/local/score_mbr.sh
index 04b84ccce5a..8c752368906 100755
--- a/egs/aurora4/s5/local/score_mbr.sh
+++ b/egs/aurora4/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script for minimum bayes risk decoding.
 
diff --git a/egs/aurora4/s5/local/wsj_prepare_dict.sh b/egs/aurora4/s5/local/wsj_prepare_dict.sh
index 2bbea907873..8bde9807c03 100755
--- a/egs/aurora4/s5/local/wsj_prepare_dict.sh
+++ b/egs/aurora4/s5/local/wsj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 
diff --git a/egs/aurora4/s5/run.sh b/egs/aurora4/s5/run.sh
index f7eb67580ae..730194fb8b9 100755
--- a/egs/aurora4/s5/run.sh
+++ b/egs/aurora4/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
diff --git a/egs/babel/s5/local/CHECKPOINT.sh b/egs/babel/s5/local/CHECKPOINT.sh
index 91b64d7fe1a..b8bdc48a9aa 100755
--- a/egs/babel/s5/local/CHECKPOINT.sh
+++ b/egs/babel/s5/local/CHECKPOINT.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 function GETAPPROVAL {
   until false ; do 
diff --git a/egs/babel/s5/local/arpa2G.sh b/egs/babel/s5/local/arpa2G.sh
index f037caf0d7b..4209388f61f 100755
--- a/egs/babel/s5/local/arpa2G.sh
+++ b/egs/babel/s5/local/arpa2G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5/local/buildSRILM.sh b/egs/babel/s5/local/buildSRILM.sh
index 0633789a6a9..f113c322444 100755
--- a/egs/babel/s5/local/buildSRILM.sh
+++ b/egs/babel/s5/local/buildSRILM.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 targetDir=$1
 
diff --git a/egs/babel/s5/local/check_models.sh b/egs/babel/s5/local/check_models.sh
index d02fc4e561a..4389344ed85 100755
--- a/egs/babel/s5/local/check_models.sh
+++ b/egs/babel/s5/local/check_models.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 check_model () {
diff --git a/egs/babel/s5/local/check_wers.sh b/egs/babel/s5/local/check_wers.sh
index ebd6bb28790..1204fd459e7 100755
--- a/egs/babel/s5/local/check_wers.sh
+++ b/egs/babel/s5/local/check_wers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 
diff --git a/egs/babel/s5/local/create_shadow_dataset.sh b/egs/babel/s5/local/create_shadow_dataset.sh
index d275b9aaca6..ce2215254e3 100755
--- a/egs/babel/s5/local/create_shadow_dataset.sh
+++ b/egs/babel/s5/local/create_shadow_dataset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University   
 # Apache 2.0.
 
diff --git a/egs/babel/s5/local/cstr_wsj_data_prep.sh b/egs/babel/s5/local/cstr_wsj_data_prep.sh
index 35582646d95..a5c327ec34c 100755
--- a/egs/babel/s5/local/cstr_wsj_data_prep.sh
+++ b/egs/babel/s5/local/cstr_wsj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/babel/s5/local/cstr_wsj_extend_dict.sh b/egs/babel/s5/local/cstr_wsj_extend_dict.sh
index b2a9faad704..9447cd1249b 100755
--- a/egs/babel/s5/local/cstr_wsj_extend_dict.sh
+++ b/egs/babel/s5/local/cstr_wsj_extend_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script builds a larger word-list and dictionary 
 # than used for the LMs supplied with the WSJ corpus.
diff --git a/egs/babel/s5/local/generate_example_kws.sh b/egs/babel/s5/local/generate_example_kws.sh
index 2c849438192..ecba20efbf1 100755
--- a/egs/babel/s5/local/generate_example_kws.sh
+++ b/egs/babel/s5/local/generate_example_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/generate_proxy_keywords.sh b/egs/babel/s5/local/generate_proxy_keywords.sh
index fca2326b278..67111a75165 100755
--- a/egs/babel/s5/local/generate_proxy_keywords.sh
+++ b/egs/babel/s5/local/generate_proxy_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/get_syllable_text.sh b/egs/babel/s5/local/get_syllable_text.sh
index 97d2af7ed65..b98868c8b16 100755
--- a/egs/babel/s5/local/get_syllable_text.sh
+++ b/egs/babel/s5/local/get_syllable_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University 2013 (author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/kws_data_prep.sh b/egs/babel/s5/local/kws_data_prep.sh
index 909e9b2596c..8b52ccc4d20 100755
--- a/egs/babel/s5/local/kws_data_prep.sh
+++ b/egs/babel/s5/local/kws_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/kws_data_prep_syllables.sh b/egs/babel/s5/local/kws_data_prep_syllables.sh
index c6245e52c9e..221647e8d60 100755
--- a/egs/babel/s5/local/kws_data_prep_syllables.sh
+++ b/egs/babel/s5/local/kws_data_prep_syllables.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/kws_gen_oracle_lattices.sh b/egs/babel/s5/local/kws_gen_oracle_lattices.sh
index aa9e22cca96..87a5b5df435 100755
--- a/egs/babel/s5/local/kws_gen_oracle_lattices.sh
+++ b/egs/babel/s5/local/kws_gen_oracle_lattices.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/kws_oracle.sh b/egs/babel/s5/local/kws_oracle.sh
index 44334ba1413..2ba7b070546 100755
--- a/egs/babel/s5/local/kws_oracle.sh
+++ b/egs/babel/s5/local/kws_oracle.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Jan Trmal)
 #           2013  Johns Hopkins University 
diff --git a/egs/babel/s5/local/kws_score_f4de.sh b/egs/babel/s5/local/kws_score_f4de.sh
index 5501bbf84fe..fd194851f96 100755
--- a/egs/babel/s5/local/kws_score_f4de.sh
+++ b/egs/babel/s5/local/kws_score_f4de.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/kws_search.sh b/egs/babel/s5/local/kws_search.sh
index 77fd983ebc1..2dc217fc1cd 100755
--- a/egs/babel/s5/local/kws_search.sh
+++ b/egs/babel/s5/local/kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
@@ -144,7 +144,7 @@ if [ $stage -le 2 ]; then
   echo "Writing normalized results"
   $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/write_normalized.LMWT.log \
     set -e ';' set -o pipefail ';'\
-    cat ${kwsoutdir}_LMWT/result.* \| \
+    gunzip -c ${kwsoutdir}_LMWT/result.* \| \
       utils/write_kwslist.pl  --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$duration \
         --segments=$datadir/segments --normalize=true --duptime=$duptime --remove-dup=true\
         --map-utter=$kwsdatadir/utter_map --digits=3 \
@@ -155,7 +155,7 @@ if [ $stage -le 3 ]; then
   echo "Writing unnormalized results"
   $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/write_unnormalized.LMWT.log \
     set -e ';' set -o pipefail ';'\
-    cat ${kwsoutdir}_LMWT/result.* \| \
+    gunzip -c ${kwsoutdir}_LMWT/result.* \| \
         utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$duration \
           --segments=$datadir/segments --normalize=false --duptime=$duptime --remove-dup=true\
           --map-utter=$kwsdatadir/utter_map \
diff --git a/egs/babel/s5/local/kws_setup.sh b/egs/babel/s5/local/kws_setup.sh
index f49267c233c..c33bb6350ae 100755
--- a/egs/babel/s5/local/kws_setup.sh
+++ b/egs/babel/s5/local/kws_setup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/lattice_to_ctm.sh b/egs/babel/s5/local/lattice_to_ctm.sh
index 9bf1b3ca882..a3f0024e8bc 100755
--- a/egs/babel/s5/local/lattice_to_ctm.sh
+++ b/egs/babel/s5/local/lattice_to_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/babel/s5/local/lattice_to_ctm_syllable.sh b/egs/babel/s5/local/lattice_to_ctm_syllable.sh
index 7165a7a04e5..4a51ef50397 100755
--- a/egs/babel/s5/local/lattice_to_ctm_syllable.sh
+++ b/egs/babel/s5/local/lattice_to_ctm_syllable.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/babel/s5/local/make_ecf_subset.sh b/egs/babel/s5/local/make_ecf_subset.sh
index be81cc21a5d..af8aac116bc 100755
--- a/egs/babel/s5/local/make_ecf_subset.sh
+++ b/egs/babel/s5/local/make_ecf_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/make_ffv.sh b/egs/babel/s5/local/make_ffv.sh
index 3820e4cb659..8036d606c9b 100755
--- a/egs/babel/s5/local/make_ffv.sh
+++ b/egs/babel/s5/local/make_ffv.sh
@@ -1,4 +1,4 @@
-#!/bin/bash 
+#!/usr/bin/env bash 
 
 # Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey)
 #                      Bagher BabaAli
@@ -151,7 +151,7 @@ for ((n=1; n<=nj; n++)); do
 done
 
 cat <<'EOF' > $ffv_script
-#!/bin/bash
+#!/usr/bin/env bash
 # script for execution of ffv
 flen=0.01
 sfreq=8000
@@ -189,7 +189,7 @@ fi
 # script file in the experimental directory.  Quotes around 'EOF' disable any 
 # interpretation in the here-doc.
 cat <<'EOF' > $expdir/convert.sh
-#!/bin/bash
+#!/usr/bin/env bash
 ffv_flist=$1 
 scpfile=$2
 [ $# -ne 2 ] && echo "Usage: convert.sh <ffv-flist-in> <scpfile-out>" && exit 1;
diff --git a/egs/babel/s5/local/make_lexicon_subset.sh b/egs/babel/s5/local/make_lexicon_subset.sh
index 62464f3c461..82ce6a8af78 100755
--- a/egs/babel/s5/local/make_lexicon_subset.sh
+++ b/egs/babel/s5/local/make_lexicon_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 echo "$0 $@"  # Print the command line for logging
 
diff --git a/egs/babel/s5/local/make_pitch.sh b/egs/babel/s5/local/make_pitch.sh
index f3597f504dd..0758e414e24 100755
--- a/egs/babel/s5/local/make_pitch.sh
+++ b/egs/babel/s5/local/make_pitch.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey)
 #                      Bagher BabaAli
@@ -158,7 +158,7 @@ fi
 # script file in the experimental directory.  Quotes around 'EOF' disable any
 # interpretation in the here-doc.
 cat <<'EOF' > $expdir/convert.sh
-#!/bin/bash
+#!/usr/bin/env bash
 sacc_flist=$1
 scpfile=$2
 [ $# -ne 2 ] && echo "Usage: convert.sh <sacc-flist-in> <scpfile-out>" && exit 1;
diff --git a/egs/babel/s5/local/make_syllable_lexicon.sh b/egs/babel/s5/local/make_syllable_lexicon.sh
index 118845982b9..e50e72929f1 100755
--- a/egs/babel/s5/local/make_syllable_lexicon.sh
+++ b/egs/babel/s5/local/make_syllable_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 help="Usage: $(basename $0) <input-lexicon-with-tabs> <word2syllable-lexicon-out> <syllable-lexicon-out>
diff --git a/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh b/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh
index 5ef283af54e..c35ae50c102 100755
--- a/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh
+++ b/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script was copied from ../10hSystem/local (Author: Guoguo Chen?)
 # It will be modified to make it somewhat more reusable
diff --git a/egs/babel/s5/local/score_combine.sh b/egs/babel/s5/local/score_combine.sh
index 42d9cfc0918..d77879541d4 100755
--- a/egs/babel/s5/local/score_combine.sh
+++ b/egs/babel/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Arnab Ghoshal
 #                      Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur)
diff --git a/egs/babel/s5/local/score_map.sh b/egs/babel/s5/local/score_map.sh
index 94c31acc348..c44cc19c94f 100755
--- a/egs/babel/s5/local/score_map.sh
+++ b/egs/babel/s5/local/score_map.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/babel/s5/local/score_mbr.sh b/egs/babel/s5/local/score_mbr.sh
index 1c39830b4c7..b99568668a1 100755
--- a/egs/babel/s5/local/score_mbr.sh
+++ b/egs/babel/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script for minimum bayes risk decoding.
 
diff --git a/egs/babel/s5/local/score_sctk.sh b/egs/babel/s5/local/score_sctk.sh
index cef470421a0..b210c8c5ec5 100755
--- a/egs/babel/s5/local/score_sctk.sh
+++ b/egs/babel/s5/local/score_sctk.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/babel/s5/local/score_sctk_prune.sh b/egs/babel/s5/local/score_sctk_prune.sh
index a6eca9fd071..5ad8e175fd9 100755
--- a/egs/babel/s5/local/score_sctk_prune.sh
+++ b/egs/babel/s5/local/score_sctk_prune.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/babel/s5/local/score_stm.sh b/egs/babel/s5/local/score_stm.sh
index 6a43c718d3b..cecfa3f8d2b 100755
--- a/egs/babel/s5/local/score_stm.sh
+++ b/egs/babel/s5/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5/local/shadow_set_kws_search.sh b/egs/babel/s5/local/shadow_set_kws_search.sh
index 733a84d4acf..6d4498fb5d9 100755
--- a/egs/babel/s5/local/shadow_set_kws_search.sh
+++ b/egs/babel/s5/local/shadow_set_kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/train_lms_srilm.sh b/egs/babel/s5/local/train_lms_srilm.sh
index e35a4322364..814b23cd175 100755
--- a/egs/babel/s5/local/train_lms_srilm.sh
+++ b/egs/babel/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 export LC_ALL=C
 
 words_file=
diff --git a/egs/babel/s5/local/train_mmi_sgmm2.sh b/egs/babel/s5/local/train_mmi_sgmm2.sh
index 2d3d0b5bf49..e8cc4132f5c 100755
--- a/egs/babel/s5/local/train_mmi_sgmm2.sh
+++ b/egs/babel/s5/local/train_mmi_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # MMI training (or optionally boosted MMI, if you give the --boost option),
diff --git a/egs/babel/s5/make_release.sh b/egs/babel/s5/make_release.sh
index 56fdc068442..7dbc0f275a6 100755
--- a/egs/babel/s5/make_release.sh
+++ b/egs/babel/s5/make_release.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 lp=
 lr=
diff --git a/egs/babel/s5/run-1-main.sh b/egs/babel/s5/run-1-main.sh
index a156661c1f3..3ed870a5977 100755
--- a/egs/babel/s5/run-1-main.sh
+++ b/egs/babel/s5/run-1-main.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is not necessarily the top-level run.sh as it is in other directories.   see README.txt first.
 tri5_only=false
diff --git a/egs/babel/s5/run-2a-nnet.sh b/egs/babel/s5/run-2a-nnet.sh
index 00a3b44fe2e..54d6c343b2b 100755
--- a/egs/babel/s5/run-2a-nnet.sh
+++ b/egs/babel/s5/run-2a-nnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 . conf/common_vars.sh
diff --git a/egs/babel/s5/run-2b-bnf.sh b/egs/babel/s5/run-2b-bnf.sh
index 1176834ce70..b3de0b014d1 100755
--- a/egs/babel/s5/run-2b-bnf.sh
+++ b/egs/babel/s5/run-2b-bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is the "final" version of the script that runs trains the bottleneck system.
 # It is to be run after run.sh (the new version, that uses the same number of phases
diff --git a/egs/babel/s5/run-6-combine.sh b/egs/babel/s5/run-6-combine.sh
index 92d749ca486..2cbec7a9816 100755
--- a/egs/babel/s5/run-6-combine.sh
+++ b/egs/babel/s5/run-6-combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 . conf/common_vars.sh
diff --git a/egs/babel/s5/steps_BNF/build_nnet_pfile.sh b/egs/babel/s5/steps_BNF/build_nnet_pfile.sh
index 20e9754bea7..99b373d346a 100755
--- a/egs/babel/s5/steps_BNF/build_nnet_pfile.sh
+++ b/egs/babel/s5/steps_BNF/build_nnet_pfile.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Carnegie Mellon University (Author: Yajie Miao)
 # Apache 2.0
 
diff --git a/egs/babel/s5/steps_BNF/make_bnf_feat.sh b/egs/babel/s5/steps_BNF/make_bnf_feat.sh
index 52f49475076..691d53d2c2b 100755
--- a/egs/babel/s5/steps_BNF/make_bnf_feat.sh
+++ b/egs/babel/s5/steps_BNF/make_bnf_feat.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Carnegie Mellon University (Author: Yajie Miao)
 # Apache 2.0
 
diff --git a/egs/babel/s5/steps_BNF/make_denlats_sgmm2.sh b/egs/babel/s5/steps_BNF/make_denlats_sgmm2.sh
index c3d92561089..7402e8499e7 100755
--- a/egs/babel/s5/steps_BNF/make_denlats_sgmm2.sh
+++ b/egs/babel/s5/steps_BNF/make_denlats_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Carnegie Mellon University (Author: Yajie Miao)
diff --git a/egs/babel/s5b/local/CHECKPOINT.sh b/egs/babel/s5b/local/CHECKPOINT.sh
index 91b64d7fe1a..b8bdc48a9aa 100755
--- a/egs/babel/s5b/local/CHECKPOINT.sh
+++ b/egs/babel/s5b/local/CHECKPOINT.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 function GETAPPROVAL {
   until false ; do 
diff --git a/egs/babel/s5b/local/apply_g2p.sh b/egs/babel/s5b/local/apply_g2p.sh
index f47274cb21c..2342b081893 100755
--- a/egs/babel/s5b/local/apply_g2p.sh
+++ b/egs/babel/s5b/local/apply_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/babel/s5b/local/arpa2G.sh b/egs/babel/s5b/local/arpa2G.sh
index db816abc7a5..7d427314dd6 100755
--- a/egs/babel/s5b/local/arpa2G.sh
+++ b/egs/babel/s5b/local/arpa2G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013-2014  Johns Hopkins University (authors: Yenda Trmal, Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5b/local/best_path_weights.sh b/egs/babel/s5b/local/best_path_weights.sh
index 8e88a3610a4..4ec5dc47a67 100755
--- a/egs/babel/s5b/local/best_path_weights.sh
+++ b/egs/babel/s5b/local/best_path_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vimal Manohar
 
diff --git a/egs/babel/s5b/local/buildSRILM.sh b/egs/babel/s5b/local/buildSRILM.sh
index 0633789a6a9..f113c322444 100755
--- a/egs/babel/s5b/local/buildSRILM.sh
+++ b/egs/babel/s5b/local/buildSRILM.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 targetDir=$1
 
diff --git a/egs/babel/s5b/local/check_models.sh b/egs/babel/s5b/local/check_models.sh
index d02fc4e561a..4389344ed85 100755
--- a/egs/babel/s5b/local/check_models.sh
+++ b/egs/babel/s5b/local/check_models.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 check_model () {
diff --git a/egs/babel/s5b/local/check_wers.sh b/egs/babel/s5b/local/check_wers.sh
index ebd6bb28790..1204fd459e7 100755
--- a/egs/babel/s5b/local/check_wers.sh
+++ b/egs/babel/s5b/local/check_wers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 
diff --git a/egs/babel/s5b/local/create_shadow_dataset.sh b/egs/babel/s5b/local/create_shadow_dataset.sh
index 6783ee49770..2b65392b096 100755
--- a/egs/babel/s5b/local/create_shadow_dataset.sh
+++ b/egs/babel/s5b/local/create_shadow_dataset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University   
 # Apache 2.0.
 
diff --git a/egs/babel/s5b/local/extend_lexicon.sh b/egs/babel/s5b/local/extend_lexicon.sh
index 2250d4f5dcf..3d2724609a0 100755
--- a/egs/babel/s5b/local/extend_lexicon.sh
+++ b/egs/babel/s5b/local/extend_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (authors: Daniel Povey, Yenda Trmal)
 #           2014  Guoguo Chen
diff --git a/egs/babel/s5b/local/generate_confusion_matrix.sh b/egs/babel/s5b/local/generate_confusion_matrix.sh
index 6529057db9e..b52a6a2b271 100755
--- a/egs/babel/s5b/local/generate_confusion_matrix.sh
+++ b/egs/babel/s5b/local/generate_confusion_matrix.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/babel/s5b/local/generate_example_kws.sh b/egs/babel/s5b/local/generate_example_kws.sh
index 2c849438192..ecba20efbf1 100755
--- a/egs/babel/s5b/local/generate_example_kws.sh
+++ b/egs/babel/s5b/local/generate_example_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/generate_proxy_keywords.sh b/egs/babel/s5b/local/generate_proxy_keywords.sh
index 8562953efa4..324044a604a 100755
--- a/egs/babel/s5b/local/generate_proxy_keywords.sh
+++ b/egs/babel/s5b/local/generate_proxy_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Guoguo Chen
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/get_syllable_text.sh b/egs/babel/s5b/local/get_syllable_text.sh
index 97d2af7ed65..b98868c8b16 100755
--- a/egs/babel/s5b/local/get_syllable_text.sh
+++ b/egs/babel/s5b/local/get_syllable_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University 2013 (author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_combine.sh b/egs/babel/s5b/local/kws_combine.sh
index 32ec93fa49e..7f55f798aaa 100755
--- a/egs/babel/s5b/local/kws_combine.sh
+++ b/egs/babel/s5b/local/kws_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013-2014  Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey)
 
diff --git a/egs/babel/s5b/local/kws_data_prep.sh b/egs/babel/s5b/local/kws_data_prep.sh
index 909e9b2596c..8b52ccc4d20 100755
--- a/egs/babel/s5b/local/kws_data_prep.sh
+++ b/egs/babel/s5b/local/kws_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_data_prep_proxy.sh b/egs/babel/s5b/local/kws_data_prep_proxy.sh
index 787cb009960..5a640dcc890 100755
--- a/egs/babel/s5b/local/kws_data_prep_proxy.sh
+++ b/egs/babel/s5b/local/kws_data_prep_proxy.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_data_prep_syllables.sh b/egs/babel/s5b/local/kws_data_prep_syllables.sh
index c6245e52c9e..221647e8d60 100755
--- a/egs/babel/s5b/local/kws_data_prep_syllables.sh
+++ b/egs/babel/s5b/local/kws_data_prep_syllables.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_gen_oracle_lattices.sh b/egs/babel/s5b/local/kws_gen_oracle_lattices.sh
index aa9e22cca96..87a5b5df435 100755
--- a/egs/babel/s5b/local/kws_gen_oracle_lattices.sh
+++ b/egs/babel/s5b/local/kws_gen_oracle_lattices.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_oracle.sh b/egs/babel/s5b/local/kws_oracle.sh
index 44334ba1413..2ba7b070546 100755
--- a/egs/babel/s5b/local/kws_oracle.sh
+++ b/egs/babel/s5b/local/kws_oracle.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Jan Trmal)
 #           2013  Johns Hopkins University 
diff --git a/egs/babel/s5b/local/kws_score_f4de.sh b/egs/babel/s5b/local/kws_score_f4de.sh
index d761e080c1c..ffb77e336b0 100755
--- a/egs/babel/s5b/local/kws_score_f4de.sh
+++ b/egs/babel/s5b/local/kws_score_f4de.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_search.sh b/egs/babel/s5b/local/kws_search.sh
index 4b275048e0e..359cd5c74d0 100755
--- a/egs/babel/s5b/local/kws_search.sh
+++ b/egs/babel/s5b/local/kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_setup.sh b/egs/babel/s5b/local/kws_setup.sh
index f1036f100de..dea9d605e21 100755
--- a/egs/babel/s5b/local/kws_setup.sh
+++ b/egs/babel/s5b/local/kws_setup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/lattice_to_ctm.sh b/egs/babel/s5b/local/lattice_to_ctm.sh
index 08a1b5889a7..5169b275195 100755
--- a/egs/babel/s5b/local/lattice_to_ctm.sh
+++ b/egs/babel/s5b/local/lattice_to_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/babel/s5b/local/lattice_to_ctm_syllable.sh b/egs/babel/s5b/local/lattice_to_ctm_syllable.sh
index 7165a7a04e5..4a51ef50397 100755
--- a/egs/babel/s5b/local/lattice_to_ctm_syllable.sh
+++ b/egs/babel/s5b/local/lattice_to_ctm_syllable.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/babel/s5b/local/make_corpus_subset.sh b/egs/babel/s5b/local/make_corpus_subset.sh
index add194d48e8..acd5e91a18b 100755
--- a/egs/babel/s5b/local/make_corpus_subset.sh
+++ b/egs/babel/s5b/local/make_corpus_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/make_ecf_subset.sh b/egs/babel/s5b/local/make_ecf_subset.sh
index 53bddcbc839..bc776d8446e 100755
--- a/egs/babel/s5b/local/make_ecf_subset.sh
+++ b/egs/babel/s5b/local/make_ecf_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/make_lexicon_subset.sh b/egs/babel/s5b/local/make_lexicon_subset.sh
index 924a22866e7..c66e1164ae7 100755
--- a/egs/babel/s5b/local/make_lexicon_subset.sh
+++ b/egs/babel/s5b/local/make_lexicon_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 echo "$0 $@"  # Print the command line for logging
 
diff --git a/egs/babel/s5b/local/make_syllable_lexicon.sh b/egs/babel/s5b/local/make_syllable_lexicon.sh
index 118845982b9..e50e72929f1 100755
--- a/egs/babel/s5b/local/make_syllable_lexicon.sh
+++ b/egs/babel/s5b/local/make_syllable_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 help="Usage: $(basename $0) <input-lexicon-with-tabs> <word2syllable-lexicon-out> <syllable-lexicon-out>
diff --git a/egs/babel/s5b/local/nist_eval/create_compound_set.sh b/egs/babel/s5b/local/nist_eval/create_compound_set.sh
index 3b35ad11e29..5582536b093 100755
--- a/egs/babel/s5b/local/nist_eval/create_compound_set.sh
+++ b/egs/babel/s5b/local/nist_eval/create_compound_set.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #Simple script to create compound set info that will allow for more automatized
 #work with the shadow set.
diff --git a/egs/babel/s5b/local/nist_eval/export_systems.sh b/egs/babel/s5b/local/nist_eval/export_systems.sh
index 7e514bcc077..7ba72662f6b 100755
--- a/egs/babel/s5b/local/nist_eval/export_systems.sh
+++ b/egs/babel/s5b/local/nist_eval/export_systems.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 set -o pipefail
 
diff --git a/egs/babel/s5b/local/nist_eval/make_release.sh b/egs/babel/s5b/local/nist_eval/make_release.sh
index bb74188b4d9..e32ad82d341 100755
--- a/egs/babel/s5b/local/nist_eval/make_release.sh
+++ b/egs/babel/s5b/local/nist_eval/make_release.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 team=RADICAL
 corpusid=
diff --git a/egs/babel/s5b/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5b/local/nnet2/get_egs_semi_supervised.sh
index 760d7ee80d5..7b16ebf8e3b 100755
--- a/egs/babel/s5b/local/nnet2/get_egs_semi_supervised.sh
+++ b/egs/babel/s5b/local/nnet2/get_egs_semi_supervised.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2014 Vimal Manohar
diff --git a/egs/babel/s5b/local/prepare_kaldi_lm_from_training_text.sh b/egs/babel/s5b/local/prepare_kaldi_lm_from_training_text.sh
index 6566860a5d6..9d48391cc38 100755
--- a/egs/babel/s5b/local/prepare_kaldi_lm_from_training_text.sh
+++ b/egs/babel/s5b/local/prepare_kaldi_lm_from_training_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script was copied from ../10hSystem/local (Author: Guoguo Chen?)
 # It will be modified to make it somewhat more reusable
diff --git a/egs/babel/s5b/local/resegment/generate_segments.sh b/egs/babel/s5b/local/resegment/generate_segments.sh
index 01917c3d4e9..1ca8bd9aa06 100755
--- a/egs/babel/s5b/local/resegment/generate_segments.sh
+++ b/egs/babel/s5b/local/resegment/generate_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0
diff --git a/egs/babel/s5b/local/resegment/train_segmentation.sh b/egs/babel/s5b/local/resegment/train_segmentation.sh
index 511c451993e..264a1cc3c88 100755
--- a/egs/babel/s5b/local/resegment/train_segmentation.sh
+++ b/egs/babel/s5b/local/resegment/train_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0
diff --git a/egs/babel/s5b/local/score_combine.sh b/egs/babel/s5b/local/score_combine.sh
index f425b5afc68..b6c3b613509 100755
--- a/egs/babel/s5b/local/score_combine.sh
+++ b/egs/babel/s5b/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Arnab Ghoshal
 #                      Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur)
diff --git a/egs/babel/s5b/local/score_map.sh b/egs/babel/s5b/local/score_map.sh
index 94c31acc348..c44cc19c94f 100755
--- a/egs/babel/s5b/local/score_map.sh
+++ b/egs/babel/s5b/local/score_map.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/babel/s5b/local/score_mbr.sh b/egs/babel/s5b/local/score_mbr.sh
index 1c39830b4c7..b99568668a1 100755
--- a/egs/babel/s5b/local/score_mbr.sh
+++ b/egs/babel/s5b/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script for minimum bayes risk decoding.
 
diff --git a/egs/babel/s5b/local/score_sctk_prune.sh b/egs/babel/s5b/local/score_sctk_prune.sh
index a6eca9fd071..5ad8e175fd9 100755
--- a/egs/babel/s5b/local/score_sctk_prune.sh
+++ b/egs/babel/s5b/local/score_sctk_prune.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/babel/s5b/local/score_stm.sh b/egs/babel/s5b/local/score_stm.sh
index 2406af4e726..40c082f050f 100755
--- a/egs/babel/s5b/local/score_stm.sh
+++ b/egs/babel/s5b/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5b/local/shadow_set_kws_search.sh b/egs/babel/s5b/local/shadow_set_kws_search.sh
index 76521fda9b6..d8b62418cdf 100755
--- a/egs/babel/s5b/local/shadow_set_kws_search.sh
+++ b/egs/babel/s5b/local/shadow_set_kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/show_lattice.sh b/egs/babel/s5b/local/show_lattice.sh
index 0865d0d1225..df13d3091f3 100755
--- a/egs/babel/s5b/local/show_lattice.sh
+++ b/egs/babel/s5b/local/show_lattice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./path.sh
 
diff --git a/egs/babel/s5b/local/train_g2p.sh b/egs/babel/s5b/local/train_g2p.sh
index 385c474abad..d26617025b8 100755
--- a/egs/babel/s5b/local/train_g2p.sh
+++ b/egs/babel/s5b/local/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/babel/s5b/local/train_lms_srilm.sh b/egs/babel/s5b/local/train_lms_srilm.sh
index 5bb1bfaa760..f9f13bb344b 100755
--- a/egs/babel/s5b/local/train_lms_srilm.sh
+++ b/egs/babel/s5b/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 export LC_ALL=C
 
 words_file=
diff --git a/egs/babel/s5b/local/train_mmi_sgmm2.sh b/egs/babel/s5b/local/train_mmi_sgmm2.sh
index 2d3d0b5bf49..e8cc4132f5c 100755
--- a/egs/babel/s5b/local/train_mmi_sgmm2.sh
+++ b/egs/babel/s5b/local/train_mmi_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # MMI training (or optionally boosted MMI, if you give the --boost option),
diff --git a/egs/babel/s5b/run-1-main-extend-lex.sh b/egs/babel/s5b/run-1-main-extend-lex.sh
index ccc62441c1f..3bb077490c9 100755
--- a/egs/babel/s5b/run-1-main-extend-lex.sh
+++ b/egs/babel/s5b/run-1-main-extend-lex.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Parameters for extended lexicon.
 extend_lexicon=true
diff --git a/egs/babel/s5b/run-1-main.sh b/egs/babel/s5b/run-1-main.sh
index 6f2e8d444be..dc4983c032e 100755
--- a/egs/babel/s5b/run-1-main.sh
+++ b/egs/babel/s5b/run-1-main.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is not necessarily the top-level run.sh as it is in other directories.   see README.txt first.
 tri5_only=false
diff --git a/egs/babel/s5b/run-2-segmentation.sh b/egs/babel/s5b/run-2-segmentation.sh
index d832a9421c8..f7651c2ae91 100755
--- a/egs/babel/s5b/run-2-segmentation.sh
+++ b/egs/babel/s5b/run-2-segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0
diff --git a/egs/babel/s5b/run-2a-nnet-cpu.sh b/egs/babel/s5b/run-2a-nnet-cpu.sh
index 35e7d3ceab3..0f246d9e1aa 100755
--- a/egs/babel/s5b/run-2a-nnet-cpu.sh
+++ b/egs/babel/s5b/run-2a-nnet-cpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . conf/common_vars.sh
 . ./lang.conf
diff --git a/egs/babel/s5b/run-2a-nnet-ensemble-gpu.sh b/egs/babel/s5b/run-2a-nnet-ensemble-gpu.sh
index 06c9a330295..953ee4baef5 100755
--- a/egs/babel/s5b/run-2a-nnet-ensemble-gpu.sh
+++ b/egs/babel/s5b/run-2a-nnet-ensemble-gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . conf/common_vars.sh
 . ./lang.conf
diff --git a/egs/babel/s5b/run-2a-nnet-gpu-realign.sh b/egs/babel/s5b/run-2a-nnet-gpu-realign.sh
index 4652789fb2d..963e0c4a422 100755
--- a/egs/babel/s5b/run-2a-nnet-gpu-realign.sh
+++ b/egs/babel/s5b/run-2a-nnet-gpu-realign.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 dir=exp/tri6_nnet
 train_stage=-10
 realign_epochs="6 10"
diff --git a/egs/babel/s5b/run-2a-nnet-gpu.sh b/egs/babel/s5b/run-2a-nnet-gpu.sh
index 87faa58f733..76dd415667b 100755
--- a/egs/babel/s5b/run-2a-nnet-gpu.sh
+++ b/egs/babel/s5b/run-2a-nnet-gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 dir=exp/tri6_nnet
 train_stage=-10
 
diff --git a/egs/babel/s5b/run-2a-nnet-mpe.sh b/egs/babel/s5b/run-2a-nnet-mpe.sh
index 457d85e4a48..49f1b11a493 100755
--- a/egs/babel/s5b/run-2a-nnet-mpe.sh
+++ b/egs/babel/s5b/run-2a-nnet-mpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . conf/common_vars.sh
 . ./lang.conf
diff --git a/egs/babel/s5b/run-2b-bnf.sh b/egs/babel/s5b/run-2b-bnf.sh
index 505e647cf93..a30a73ebdb8 100755
--- a/egs/babel/s5b/run-2b-bnf.sh
+++ b/egs/babel/s5b/run-2b-bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Pegah Ghahremani
 # Apache 2.0
diff --git a/egs/babel/s5b/run-3b-bnf-nnet.sh b/egs/babel/s5b/run-3b-bnf-nnet.sh
index 169eec6f62f..fff73266305 100755
--- a/egs/babel/s5b/run-3b-bnf-nnet.sh
+++ b/egs/babel/s5b/run-3b-bnf-nnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Pegah Ghahremani
 #           2014  Johns Hopkins (Yenda Trmal)
diff --git a/egs/babel/s5b/run-3b-bnf-sgmm.sh b/egs/babel/s5b/run-3b-bnf-sgmm.sh
index 81e4ae538b4..96fcd396893 100755
--- a/egs/babel/s5b/run-3b-bnf-sgmm.sh
+++ b/egs/babel/s5b/run-3b-bnf-sgmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Pegah Ghahremani
 #           2014  Johns Hopkins (Yenda Trmal)
diff --git a/egs/babel/s5b/run-6-combine.sh b/egs/babel/s5b/run-6-combine.sh
index 7998ade9cfd..9ee8b6274f8 100755
--- a/egs/babel/s5b/run-6-combine.sh
+++ b/egs/babel/s5b/run-6-combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 source_sys=shadow.seg
 master_sys=dev10h.seg
diff --git a/egs/babel/s5c/local/CHECKPOINT.sh b/egs/babel/s5c/local/CHECKPOINT.sh
index ed0ddd18399..b7897a67ad6 100755
--- a/egs/babel/s5c/local/CHECKPOINT.sh
+++ b/egs/babel/s5c/local/CHECKPOINT.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 function GETAPPROVAL {
   until false ; do
diff --git a/egs/babel/s5c/local/ali_to_rttm.sh b/egs/babel/s5c/local/ali_to_rttm.sh
index ef11f516ea3..96cf92b8dc8 100755
--- a/egs/babel/s5c/local/ali_to_rttm.sh
+++ b/egs/babel/s5c/local/ali_to_rttm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2013  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5c/local/apply_g2p.sh b/egs/babel/s5c/local/apply_g2p.sh
index 385b1f3536e..8807a93b762 100755
--- a/egs/babel/s5c/local/apply_g2p.sh
+++ b/egs/babel/s5c/local/apply_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/babel/s5c/local/arpa2G.sh b/egs/babel/s5c/local/arpa2G.sh
index db816abc7a5..7d427314dd6 100755
--- a/egs/babel/s5c/local/arpa2G.sh
+++ b/egs/babel/s5c/local/arpa2G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013-2014  Johns Hopkins University (authors: Yenda Trmal, Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/best_path_weights.sh b/egs/babel/s5c/local/best_path_weights.sh
index 52782ee3655..5a308a13f85 100755
--- a/egs/babel/s5c/local/best_path_weights.sh
+++ b/egs/babel/s5c/local/best_path_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vimal Manohar
 
diff --git a/egs/babel/s5c/local/buildSRILM.sh b/egs/babel/s5c/local/buildSRILM.sh
index 0633789a6a9..f113c322444 100755
--- a/egs/babel/s5c/local/buildSRILM.sh
+++ b/egs/babel/s5c/local/buildSRILM.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 targetDir=$1
 
diff --git a/egs/babel/s5c/local/check_models.sh b/egs/babel/s5c/local/check_models.sh
index 88b3dacc94b..a8ed6247bd4 100755
--- a/egs/babel/s5c/local/check_models.sh
+++ b/egs/babel/s5c/local/check_models.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 check_model () {
diff --git a/egs/babel/s5c/local/check_wers.sh b/egs/babel/s5c/local/check_wers.sh
index 10e1a89ee3a..f3d2483213b 100755
--- a/egs/babel/s5c/local/check_wers.sh
+++ b/egs/babel/s5c/local/check_wers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 
diff --git a/egs/babel/s5c/local/create_shadow_dataset.sh b/egs/babel/s5c/local/create_shadow_dataset.sh
index 49467ed28c1..7e1c675e539 100755
--- a/egs/babel/s5c/local/create_shadow_dataset.sh
+++ b/egs/babel/s5c/local/create_shadow_dataset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University
 # Apache 2.0.
 
diff --git a/egs/babel/s5c/local/extend_lexicon.sh b/egs/babel/s5c/local/extend_lexicon.sh
index 74a0c6a5569..58f1ebc145e 100755
--- a/egs/babel/s5c/local/extend_lexicon.sh
+++ b/egs/babel/s5c/local/extend_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (authors: Daniel Povey, Yenda Trmal)
 #           2014  Guoguo Chen
diff --git a/egs/babel/s5c/local/generate_confusion_matrix.sh b/egs/babel/s5c/local/generate_confusion_matrix.sh
index 6296d7486ce..7a3b059efba 100755
--- a/egs/babel/s5c/local/generate_confusion_matrix.sh
+++ b/egs/babel/s5c/local/generate_confusion_matrix.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/babel/s5c/local/generate_example_kws.sh b/egs/babel/s5c/local/generate_example_kws.sh
index e90752926b3..fca383c64af 100755
--- a/egs/babel/s5c/local/generate_example_kws.sh
+++ b/egs/babel/s5c/local/generate_example_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/generate_proxy_keywords.sh b/egs/babel/s5c/local/generate_proxy_keywords.sh
index 584f7d7902e..67ec24b931f 100755
--- a/egs/babel/s5c/local/generate_proxy_keywords.sh
+++ b/egs/babel/s5c/local/generate_proxy_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Guoguo Chen
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_combine.sh b/egs/babel/s5c/local/kws_combine.sh
index f795c63aad9..35d42599b79 100755
--- a/egs/babel/s5c/local/kws_combine.sh
+++ b/egs/babel/s5c/local/kws_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013-2014  Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey)
 
diff --git a/egs/babel/s5c/local/kws_data_prep.sh b/egs/babel/s5c/local/kws_data_prep.sh
index 3882c99ce6d..442a49eb059 100755
--- a/egs/babel/s5c/local/kws_data_prep.sh
+++ b/egs/babel/s5c/local/kws_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_data_prep_proxy.sh b/egs/babel/s5c/local/kws_data_prep_proxy.sh
index 04cc59b6499..42e330116c9 100755
--- a/egs/babel/s5c/local/kws_data_prep_proxy.sh
+++ b/egs/babel/s5c/local/kws_data_prep_proxy.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_gen_oracle_lattices.sh b/egs/babel/s5c/local/kws_gen_oracle_lattices.sh
index b73112b191d..a1bc1ec7048 100755
--- a/egs/babel/s5c/local/kws_gen_oracle_lattices.sh
+++ b/egs/babel/s5c/local/kws_gen_oracle_lattices.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_oracle.sh b/egs/babel/s5c/local/kws_oracle.sh
index c7aa661664f..2b10c721961 100755
--- a/egs/babel/s5c/local/kws_oracle.sh
+++ b/egs/babel/s5c/local/kws_oracle.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Jan Trmal)
 #           2013  Johns Hopkins University
diff --git a/egs/babel/s5c/local/kws_score_f4de.sh b/egs/babel/s5c/local/kws_score_f4de.sh
index cd6948a8a08..e5acfbd9ee8 100755
--- a/egs/babel/s5c/local/kws_score_f4de.sh
+++ b/egs/babel/s5c/local/kws_score_f4de.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_search.sh b/egs/babel/s5c/local/kws_search.sh
index 9e998d6c3f9..c2ee7671508 100755
--- a/egs/babel/s5c/local/kws_search.sh
+++ b/egs/babel/s5c/local/kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_setup.sh b/egs/babel/s5c/local/kws_setup.sh
index a6b87ef004f..6a77953f181 100755
--- a/egs/babel/s5c/local/kws_setup.sh
+++ b/egs/babel/s5c/local/kws_setup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/lattice_to_ctm.sh b/egs/babel/s5c/local/lattice_to_ctm.sh
index 5fbde42d237..d23c4f17abf 100755
--- a/egs/babel/s5c/local/lattice_to_ctm.sh
+++ b/egs/babel/s5c/local/lattice_to_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/babel/s5c/local/make_L_align.sh b/egs/babel/s5c/local/make_L_align.sh
index 50e46a00493..9d9d48c6530 100755
--- a/egs/babel/s5c/local/make_L_align.sh
+++ b/egs/babel/s5c/local/make_L_align.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Guoguo Chen, Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/make_corpus_subset.sh b/egs/babel/s5c/local/make_corpus_subset.sh
index add194d48e8..acd5e91a18b 100755
--- a/egs/babel/s5c/local/make_corpus_subset.sh
+++ b/egs/babel/s5c/local/make_corpus_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/make_ecf_subset.sh b/egs/babel/s5c/local/make_ecf_subset.sh
index 9bdd95c3e27..9fe8df841b7 100755
--- a/egs/babel/s5c/local/make_ecf_subset.sh
+++ b/egs/babel/s5c/local/make_ecf_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/make_lexicon_subset.sh b/egs/babel/s5c/local/make_lexicon_subset.sh
index bf2ebe45f7d..b6f19c088e9 100755
--- a/egs/babel/s5c/local/make_lexicon_subset.sh
+++ b/egs/babel/s5c/local/make_lexicon_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 echo "$0 $@"  # Print the command line for logging
 
diff --git a/egs/babel/s5c/local/nist_eval/create_compound_set.sh b/egs/babel/s5c/local/nist_eval/create_compound_set.sh
index ae5492a9f9e..7450396aa89 100755
--- a/egs/babel/s5c/local/nist_eval/create_compound_set.sh
+++ b/egs/babel/s5c/local/nist_eval/create_compound_set.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #Simple script to create compound set info that will allow for more automatized
 #work with the shadow set.
diff --git a/egs/babel/s5c/local/nist_eval/export_systems.sh b/egs/babel/s5c/local/nist_eval/export_systems.sh
index d0af608416c..f0984c703ab 100755
--- a/egs/babel/s5c/local/nist_eval/export_systems.sh
+++ b/egs/babel/s5c/local/nist_eval/export_systems.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 set -o pipefail
 
diff --git a/egs/babel/s5c/local/nist_eval/make_release.sh b/egs/babel/s5c/local/nist_eval/make_release.sh
index 179d5cbe619..0e4e1f2cb15 100755
--- a/egs/babel/s5c/local/nist_eval/make_release.sh
+++ b/egs/babel/s5c/local/nist_eval/make_release.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 team=RADICAL
 corpusid=
diff --git a/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh
index 3b12222e13a..968b8b36c1e 100755
--- a/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh
+++ b/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2014 Vimal Manohar
diff --git a/egs/babel/s5c/local/prepare_kaldi_lm_from_training_text.sh b/egs/babel/s5c/local/prepare_kaldi_lm_from_training_text.sh
index 6566860a5d6..9d48391cc38 100755
--- a/egs/babel/s5c/local/prepare_kaldi_lm_from_training_text.sh
+++ b/egs/babel/s5c/local/prepare_kaldi_lm_from_training_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script was copied from ../10hSystem/local (Author: Guoguo Chen?)
 # It will be modified to make it somewhat more reusable
diff --git a/egs/babel/s5c/local/resegment/generate_segments.sh b/egs/babel/s5c/local/resegment/generate_segments.sh
index 95e88deb87d..33eb5b36848 100755
--- a/egs/babel/s5c/local/resegment/generate_segments.sh
+++ b/egs/babel/s5c/local/resegment/generate_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0
diff --git a/egs/babel/s5c/local/resegment/train_segmentation.sh b/egs/babel/s5c/local/resegment/train_segmentation.sh
index 511c451993e..264a1cc3c88 100755
--- a/egs/babel/s5c/local/resegment/train_segmentation.sh
+++ b/egs/babel/s5c/local/resegment/train_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0
diff --git a/egs/babel/s5c/local/run_kws_stt_task.sh b/egs/babel/s5c/local/run_kws_stt_task.sh
index d622aac9442..d21aeabc2e3 100755
--- a/egs/babel/s5c/local/run_kws_stt_task.sh
+++ b/egs/babel/s5c/local/run_kws_stt_task.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/score_combine.sh b/egs/babel/s5c/local/score_combine.sh
index 7e8af85b2d8..eb9f8412089 100755
--- a/egs/babel/s5c/local/score_combine.sh
+++ b/egs/babel/s5c/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Arnab Ghoshal
 #                      Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur)
diff --git a/egs/babel/s5c/local/score_map.sh b/egs/babel/s5c/local/score_map.sh
index 94c31acc348..c44cc19c94f 100755
--- a/egs/babel/s5c/local/score_map.sh
+++ b/egs/babel/s5c/local/score_map.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/babel/s5c/local/score_mbr.sh b/egs/babel/s5c/local/score_mbr.sh
index a86dd5c3f71..66b825f8d1d 100755
--- a/egs/babel/s5c/local/score_mbr.sh
+++ b/egs/babel/s5c/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script for minimum bayes risk decoding.
 
diff --git a/egs/babel/s5c/local/score_sctk_prune.sh b/egs/babel/s5c/local/score_sctk_prune.sh
index 09662af57c8..b7787950744 100755
--- a/egs/babel/s5c/local/score_sctk_prune.sh
+++ b/egs/babel/s5c/local/score_sctk_prune.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/babel/s5c/local/score_stm.sh b/egs/babel/s5c/local/score_stm.sh
index 56835109722..b86d83570e2 100755
--- a/egs/babel/s5c/local/score_stm.sh
+++ b/egs/babel/s5c/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/shadow_set_kws_search.sh b/egs/babel/s5c/local/shadow_set_kws_search.sh
index a67a3a57f6a..b2765928cbc 100755
--- a/egs/babel/s5c/local/shadow_set_kws_search.sh
+++ b/egs/babel/s5c/local/shadow_set_kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/show_lattice.sh b/egs/babel/s5c/local/show_lattice.sh
index 3435fcb8c41..3373b66f666 100755
--- a/egs/babel/s5c/local/show_lattice.sh
+++ b/egs/babel/s5c/local/show_lattice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./path.sh
 
diff --git a/egs/babel/s5c/local/split_ctms.sh b/egs/babel/s5c/local/split_ctms.sh
index b24a1380111..84d8811fc7b 100755
--- a/egs/babel/s5c/local/split_ctms.sh
+++ b/egs/babel/s5c/local/split_ctms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/syllab/ali_to_syllabs.sh b/egs/babel/s5c/local/syllab/ali_to_syllabs.sh
index 8f0cb88771a..84b7d24eeb3 100755
--- a/egs/babel/s5c/local/syllab/ali_to_syllabs.sh
+++ b/egs/babel/s5c/local/syllab/ali_to_syllabs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/babel/s5c/local/syllab/generate_syllable_lang.sh b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh
index 4a0810b9415..26486a08487 100755
--- a/egs/babel/s5c/local/syllab/generate_syllable_lang.sh
+++ b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/babel/s5c/local/train_g2p.sh b/egs/babel/s5c/local/train_g2p.sh
index 08be0014656..5a0594d8f46 100755
--- a/egs/babel/s5c/local/train_g2p.sh
+++ b/egs/babel/s5c/local/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/babel/s5c/local/train_lms_srilm.sh b/egs/babel/s5c/local/train_lms_srilm.sh
index be2b0247aeb..ea8e5840be5 100755
--- a/egs/babel/s5c/local/train_lms_srilm.sh
+++ b/egs/babel/s5c/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 export LC_ALL=C
 
 words_file=
diff --git a/egs/babel/s5c/local/train_mmi_sgmm2.sh b/egs/babel/s5c/local/train_mmi_sgmm2.sh
index cdf9e28b1bf..210269bf23e 100755
--- a/egs/babel/s5c/local/train_mmi_sgmm2.sh
+++ b/egs/babel/s5c/local/train_mmi_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # MMI training (or optionally boosted MMI, if you give the --boost option),
diff --git a/egs/babel/s5c/run-1-main-extend-lex.sh b/egs/babel/s5c/run-1-main-extend-lex.sh
index ccc62441c1f..3bb077490c9 100755
--- a/egs/babel/s5c/run-1-main-extend-lex.sh
+++ b/egs/babel/s5c/run-1-main-extend-lex.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Parameters for extended lexicon.
 extend_lexicon=true
diff --git a/egs/babel/s5c/run-1-main.sh b/egs/babel/s5c/run-1-main.sh
index 61c875fc84e..f5741d4ce8a 100755
--- a/egs/babel/s5c/run-1-main.sh
+++ b/egs/babel/s5c/run-1-main.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is not necessarily the top-level run.sh as it is in other directories.   see README.txt first.
 tri5_only=false
diff --git a/egs/babel/s5c/run-2-segmentation.sh b/egs/babel/s5c/run-2-segmentation.sh
index d832a9421c8..f7651c2ae91 100755
--- a/egs/babel/s5c/run-2-segmentation.sh
+++ b/egs/babel/s5c/run-2-segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0
diff --git a/egs/babel/s5c/run-2a-nnet-cpu.sh b/egs/babel/s5c/run-2a-nnet-cpu.sh
index 35e7d3ceab3..0f246d9e1aa 100755
--- a/egs/babel/s5c/run-2a-nnet-cpu.sh
+++ b/egs/babel/s5c/run-2a-nnet-cpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . conf/common_vars.sh
 . ./lang.conf
diff --git a/egs/babel/s5c/run-2a-nnet-ensemble-gpu.sh b/egs/babel/s5c/run-2a-nnet-ensemble-gpu.sh
index 06c9a330295..953ee4baef5 100755
--- a/egs/babel/s5c/run-2a-nnet-ensemble-gpu.sh
+++ b/egs/babel/s5c/run-2a-nnet-ensemble-gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . conf/common_vars.sh
 . ./lang.conf
diff --git a/egs/babel/s5c/run-2a-nnet-gpu.sh b/egs/babel/s5c/run-2a-nnet-gpu.sh
index 87faa58f733..76dd415667b 100755
--- a/egs/babel/s5c/run-2a-nnet-gpu.sh
+++ b/egs/babel/s5c/run-2a-nnet-gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 dir=exp/tri6_nnet
 train_stage=-10
 
diff --git a/egs/babel/s5c/run-2a-nnet-mpe.sh b/egs/babel/s5c/run-2a-nnet-mpe.sh
index 457d85e4a48..49f1b11a493 100755
--- a/egs/babel/s5c/run-2a-nnet-mpe.sh
+++ b/egs/babel/s5c/run-2a-nnet-mpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . conf/common_vars.sh
 . ./lang.conf
diff --git a/egs/babel/s5c/run-2b-bnf.sh b/egs/babel/s5c/run-2b-bnf.sh
index 505e647cf93..a30a73ebdb8 100755
--- a/egs/babel/s5c/run-2b-bnf.sh
+++ b/egs/babel/s5c/run-2b-bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Pegah Ghahremani
 # Apache 2.0
diff --git a/egs/babel/s5c/run-3b-bnf-nnet.sh b/egs/babel/s5c/run-3b-bnf-nnet.sh
index 169eec6f62f..fff73266305 100755
--- a/egs/babel/s5c/run-3b-bnf-nnet.sh
+++ b/egs/babel/s5c/run-3b-bnf-nnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Pegah Ghahremani
 #           2014  Johns Hopkins (Yenda Trmal)
diff --git a/egs/babel/s5c/run-3b-bnf-sgmm.sh b/egs/babel/s5c/run-3b-bnf-sgmm.sh
index 81e4ae538b4..96fcd396893 100755
--- a/egs/babel/s5c/run-3b-bnf-sgmm.sh
+++ b/egs/babel/s5c/run-3b-bnf-sgmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Pegah Ghahremani
 #           2014  Johns Hopkins (Yenda Trmal)
diff --git a/egs/babel/s5c/run-4-anydecode.sh b/egs/babel/s5c/run-4-anydecode.sh
index 56b7836683f..50300a186fb 100755
--- a/egs/babel/s5c/run-4-anydecode.sh
+++ b/egs/babel/s5c/run-4-anydecode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 set -o pipefail
 
diff --git a/egs/babel/s5c/run-6-combine.sh b/egs/babel/s5c/run-6-combine.sh
index 81dc42caca3..d868c4dcefe 100755
--- a/egs/babel/s5c/run-6-combine.sh
+++ b/egs/babel/s5c/run-6-combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 . conf/common_vars.sh
diff --git a/egs/babel/s5d/local/add_to_multilang.sh b/egs/babel/s5d/local/add_to_multilang.sh
index 162d8ffa709..dbf159a5eda 100755
--- a/egs/babel/s5d/local/add_to_multilang.sh
+++ b/egs/babel/s5d/local/add_to_multilang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/ali_to_rttm.sh b/egs/babel/s5d/local/ali_to_rttm.sh
index cb4f0740130..217758dfea4 100755
--- a/egs/babel/s5d/local/ali_to_rttm.sh
+++ b/egs/babel/s5d/local/ali_to_rttm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2013  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5d/local/apply_g2p.sh b/egs/babel/s5d/local/apply_g2p.sh
index 385b1f3536e..8807a93b762 100755
--- a/egs/babel/s5d/local/apply_g2p.sh
+++ b/egs/babel/s5d/local/apply_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/babel/s5d/local/arpa2G.sh b/egs/babel/s5d/local/arpa2G.sh
index 887b393b459..f3806d010a7 100755
--- a/egs/babel/s5d/local/arpa2G.sh
+++ b/egs/babel/s5d/local/arpa2G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013-2014  Johns Hopkins University (authors: Yenda Trmal, Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5d/local/best_scores.sh b/egs/babel/s5d/local/best_scores.sh
index 33bcdb07183..5e6cd2dd512 100755
--- a/egs/babel/s5d/local/best_scores.sh
+++ b/egs/babel/s5d/local/best_scores.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/best_scores_kws.sh b/egs/babel/s5d/local/best_scores_kws.sh
index 164ec8dc4b0..a50509ae1e5 100755
--- a/egs/babel/s5d/local/best_scores_kws.sh
+++ b/egs/babel/s5d/local/best_scores_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/chain/run_ivector_common.sh b/egs/babel/s5d/local/chain/run_ivector_common.sh
index a1a145564d0..1cf19bd5351 100755
--- a/egs/babel/s5d/local/chain/run_ivector_common.sh
+++ b/egs/babel/s5d/local/chain/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -eu -o pipefail
 
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn.sh
index 7b4535f8c5e..f753e9f3bf2 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh
index 5fc14dda826..c64f4b78edd 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh
index 8c7de5d18d4..91938b9ce99 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh
index 0b3e70b5a04..031af6b468b 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh
index 45f2907645e..2149c9eb263 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh
index 0d92aff5c28..9ed6631ec09 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh
index 4129c00dcb4..6e1f496944d 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh
index 1cfa50c1aa1..e352e6aa5cd 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh
index ba8ac1e0373..eda24b245b3 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh
index 5de285e080e..c65a8eb58ca 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # by default, with cleanup
diff --git a/egs/babel/s5d/local/chain2/run_tdnn.sh b/egs/babel/s5d/local/chain2/run_tdnn.sh
new file mode 100755
index 00000000000..3abcee589cd
--- /dev/null
+++ b/egs/babel/s5d/local/chain2/run_tdnn.sh
@@ -0,0 +1,316 @@
+#!/bin/bash
+# Copyright     2020    Idiap Research Institute (Srikanth Madikeri)
+# chain2 recipe for monolingual systems for BABEL
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=-1
+nj=30
+train_set=train
+gmm=tri5  # the gmm for the target data
+langdir=data/lang
+num_threads_ubm=1
+nnet3_affix=_cleaned  # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+train_stage=-10
+tree_affix=  # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
+tdnn_affix=  #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
+common_egs_dir=  # you can set this to use previously dumped egs.
+chunk_width=150,120,90,75
+frame_subsampling_factor=3
+langs=default  # has multiple values for a multilingual system
+srand=-1
+num_jobs_initial=2
+num_jobs_final=12
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+max_param_change=2.0
+xent_regularize=0.1
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+tree_dir=exp/chain2${nnet3_affix}/tree${tree_affix}
+lat_dir=exp/chain2${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain2${nnet3_affix}/tdnn${tdnn_affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+
+local/chain/run_ivector_common.sh --stage $stage \
+                                  --nj $nj \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --num-threads-ubm $num_threads_ubm \
+                                  --nnet3-affix "$nnet3_affix"
+
+
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 7 ]; then
+  echo "$0: creating lang directory with one state per phone."
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d data/lang_chain ]; then
+    if [ data/lang_chain/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: data/lang_chain already exists, not overwriting it; continuing"
+    else
+      echo "$0: data/lang_chain already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r $langdir data/lang_chain
+    silphonelist=$(cat data/lang_chain/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat data/lang_chain/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >data/lang_chain/topo
+  fi
+fi
+
+if [ $stage -le 8 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    $langdir $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 9 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor $frame_subsampling_factor \
+      --context-opts "--context-width=2 --central-position=1" \
+      --leftmost-questions-truncate -1 \
+      --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir
+fi
+
+if [ $stage -le 10 ]; then
+  mkdir -p $dir
+
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; }
+  learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=43 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=450
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=450
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=450
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=450
+  relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=450
+  relu-batchnorm-layer name=tdnn7 input=Append(-6,-3,0) dim=450
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn7 dim=450 target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-default input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  ## adding the layers for chain branch
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn7 dim=450 target-rms=0.5
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-default-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  if [ ! -f $dir/init/default_trans.mdl ]; then # checking this because it may have been copied in a previous run of the same script
+      copy-transition-model $tree_dir/final.mdl $dir/init/default_trans.mdl  || exit 1 &
+  else
+      echo "Keeping the old $dir/init/default_trans.mdl as it already exists."
+  fi
+
+fi
+
+init_info=$dir/init/info.txt
+if [ $stage -le 11 ]; then
+
+  if [ ! -f $dir/configs/ref.raw ]; then
+      echo "Expected $dir/configs/ref.raw to exist"
+      exit
+  fi
+
+  mkdir  -p $dir/init
+  nnet3-info $dir/configs/ref.raw  > $dir/configs/temp.info 
+  model_left_context=`fgrep 'left-context' $dir/configs/temp.info | awk '{print $2}'`
+  model_right_context=`fgrep 'right-context' $dir/configs/temp.info | awk '{print $2}'`
+  cat >$init_info <<EOF
+frame_subsampling_factor $frame_subsampling_factor
+langs $langs
+model_left_context $model_left_context
+model_right_context $model_right_context
+EOF
+  rm $dir/configs/temp.info
+fi
+
+
+# Make phone LM and denominator and normalization FST
+if [ $stage -le 12 ]; then
+  echo "$0: Making Phone LM and denominator and normalization FST"
+  mkdir -p $dir/den_fsts/log
+
+  # We may later reorganize this.
+  cp $tree_dir/tree $dir/default.tree
+
+  echo "$0: creating phone language-model"
+  $train_cmd $dir/den_fsts/log/make_phone_lm_default.log \
+    chain-est-phone-lm --num-extra-lm-states=2000 \
+       "ark:gunzip -c $ali_dir/ali.*.gz | ali-to-phones $gmm_dir/final.mdl ark:- ark:- |" \
+       $dir/den_fsts/default.phone_lm.fst
+
+  echo "$0: creating denominator FST"
+  $train_cmd $dir/den_fsts/log/make_den_fst.log \
+     chain-make-den-fst $dir/default.tree $dir/init/default_trans.mdl $dir/den_fsts/default.phone_lm.fst \
+     $dir/den_fsts/default.den.fst $dir/den_fsts/default.normalization.fst || exit 1;
+fi
+
+
+model_left_context=$(awk '/^model_left_context/ {print $2;}' $dir/init/info.txt)
+model_right_context=$(awk '/^model_right_context/ {print $2;}' $dir/init/info.txt)
+if [ -z $model_left_context ]; then
+    echo "ERROR: Cannot find entry for model_left_context in $dir/init/info.txt"
+fi
+if [ -z $model_right_context ]; then
+    echo "ERROR: Cannot find entry for model_right_context in $dir/init/info.txt"
+fi
+egs_left_context=$[model_left_context+(frame_subsampling_factor/2)+egs_extra_left_context]
+egs_right_context=$[model_right_context+(frame_subsampling_factor/2)+egs_extra_right_context]
+
+for d in $dir/raw_egs $dir/processed_egs; do
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $d/storage ] ; then
+    mkdir -p $d
+    utils/create_split_dir.pl \
+      /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$d/storage $d/storage
+  fi
+done
+
+if [ -z $common_egs_dir ]; then
+    if [ $stage -le 13 ]; then
+      echo "$0: about to dump raw egs."
+      # Dump raw egs.
+      steps/chain2/get_raw_egs.sh --cmd "$train_cmd" \
+        --lang "default" \
+        --online-ivector-dir $train_ivector_dir \
+        --left-context $egs_left_context \
+        --right-context $egs_right_context \
+        --frame-subsampling-factor $frame_subsampling_factor \
+        --alignment-subsampling-factor $frame_subsampling_factor \
+        --frames-per-chunk $chunk_width \
+        ${train_data_dir} ${dir} ${lat_dir} ${dir}/raw_egs
+    fi
+
+    if [ $stage -le 14 ]; then
+      echo "$0: about to process egs"
+      steps/chain2/process_egs.sh  --cmd "$train_cmd" \
+        ${dir}/raw_egs ${dir}/processed_egs
+    fi
+
+    if [ $stage -le 15 ]; then
+      echo "$0: about to randomize egs"
+      steps/chain2/randomize_egs.sh --frames-per-job 1500000 \
+        ${dir}/processed_egs ${dir}/egs
+    fi
+    common_egs_dir=$dir/egs
+fi
+
+if [ $stage -le 16 ]; then
+    echo "$0: Training pre-conditioning matrix"
+    num_lda_jobs=`find $common_egs_dir/ -iname 'train.*.scp' | wc -l | cut -d ' ' -f2`
+    steps/chain2/compute_preconditioning_matrix.sh --cmd "$train_cmd" \
+        --nj $num_lda_jobs \
+        $dir/configs/init.raw \
+        $common_egs_dir \
+        $dir || exit 1
+fi
+
+
+if [ $stage -le 17 ]; then
+    echo "$0: Preparing initial acoustic model"
+    if [ -f $dir/configs/init.config ]; then
+            $train_cmd ${dir}/log/add_first_layer.log \
+                    nnet3-init --srand=${srand} ${dir}/configs/init.raw \
+                    ${dir}/configs/final.config ${dir}/init/default.raw || exit 1
+    else
+            $train_cmd ${dir}/log/init_model.log \
+               nnet3-init --srand=${srand} ${dir}/configs/final.config ${dir}/init/default.raw || exit 1
+    fi
+
+    $train_cmd $dir/log/init_mdl.log \
+        nnet3-am-init ${dir}/init/default_trans.mdl $dir/init/default.raw $dir/init/default.mdl || exit 1
+fi
+
+if [ $stage -le 18 ]; then
+  echo "$0: Starting model training"
+  steps/chain2/train.sh \
+    --stage $train_stage --cmd "$cuda_cmd" \
+    --xent-regularize $xent_regularize --leaky-hmm-coefficient 0.1 \
+    --initial-effective-lrate $initial_effective_lrate \
+    --final-effective-lrate $final_effective_lrate \
+    --max-param-change $max_param_change \
+    --groups-per-minibatch 128 \
+    --l2-regularize 0.00005 \
+    --num-jobs-initial $num_jobs_initial --num-jobs-final $num_jobs_final \
+     $common_egs_dir $dir
+fi
+
+if [ $stage -le 19 ]; then
+  # Note: it might appear that this data/lang_chain directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  if [ ! -f $dir/tree ]; then
+      cp $tree_dir/tree $dir/tree
+  fi
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_chain $dir $dir/graph
+fi
+
+exit 0
diff --git a/egs/babel/s5d/local/extend_lexicon.sh b/egs/babel/s5d/local/extend_lexicon.sh
index 609cc7d108c..26a262d2cf4 100755
--- a/egs/babel/s5d/local/extend_lexicon.sh
+++ b/egs/babel/s5d/local/extend_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (authors: Daniel Povey, Yenda Trmal)
 #           2014  Guoguo Chen
diff --git a/egs/babel/s5d/local/generate_confusion_matrix.sh b/egs/babel/s5d/local/generate_confusion_matrix.sh
index 9973b6b781c..26337b6f2c7 100755
--- a/egs/babel/s5d/local/generate_confusion_matrix.sh
+++ b/egs/babel/s5d/local/generate_confusion_matrix.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/babel/s5d/local/generate_proxy_keywords.sh b/egs/babel/s5d/local/generate_proxy_keywords.sh
index dcb41be09af..44f800c260f 100755
--- a/egs/babel/s5d/local/generate_proxy_keywords.sh
+++ b/egs/babel/s5d/local/generate_proxy_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Guoguo Chen
 # Apache 2.0.
diff --git a/egs/babel/s5d/local/kws_combine.sh b/egs/babel/s5d/local/kws_combine.sh
index 8934faf7d30..c4fbda613d1 100755
--- a/egs/babel/s5d/local/kws_combine.sh
+++ b/egs/babel/s5d/local/kws_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013-2014  Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey)
 
diff --git a/egs/babel/s5d/local/kws_data_prep.sh b/egs/babel/s5d/local/kws_data_prep.sh
index af1a7d8e59a..73e597dc9de 100755
--- a/egs/babel/s5d/local/kws_data_prep.sh
+++ b/egs/babel/s5d/local/kws_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5d/local/kws_data_prep_proxy.sh b/egs/babel/s5d/local/kws_data_prep_proxy.sh
index 04cc59b6499..42e330116c9 100755
--- a/egs/babel/s5d/local/kws_data_prep_proxy.sh
+++ b/egs/babel/s5d/local/kws_data_prep_proxy.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0.
diff --git a/egs/babel/s5d/local/kws_oracle.sh b/egs/babel/s5d/local/kws_oracle.sh
index c7aa661664f..2b10c721961 100755
--- a/egs/babel/s5d/local/kws_oracle.sh
+++ b/egs/babel/s5d/local/kws_oracle.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Jan Trmal)
 #           2013  Johns Hopkins University
diff --git a/egs/babel/s5d/local/kws_score_f4de.sh b/egs/babel/s5d/local/kws_score_f4de.sh
index 4f79e1925a9..0af53183496 100755
--- a/egs/babel/s5d/local/kws_score_f4de.sh
+++ b/egs/babel/s5d/local/kws_score_f4de.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5d/local/kws_search.sh b/egs/babel/s5d/local/kws_search.sh
index 39177e8a4c5..04515345649 100755
--- a/egs/babel/s5d/local/kws_search.sh
+++ b/egs/babel/s5d/local/kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5d/local/kws_setup.sh b/egs/babel/s5d/local/kws_setup.sh
index 93513a56d94..760046d34f2 100755
--- a/egs/babel/s5d/local/kws_setup.sh
+++ b/egs/babel/s5d/local/kws_setup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5d/local/lattice_to_ctm.sh b/egs/babel/s5d/local/lattice_to_ctm.sh
index d0979374ba8..7f3ae783422 100755
--- a/egs/babel/s5d/local/lattice_to_ctm.sh
+++ b/egs/babel/s5d/local/lattice_to_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/babel/s5d/local/make_L_align.sh b/egs/babel/s5d/local/make_L_align.sh
index 41e9ff32958..210f6ec18e6 100755
--- a/egs/babel/s5d/local/make_L_align.sh
+++ b/egs/babel/s5d/local/make_L_align.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Guoguo Chen, Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5d/local/make_corpus_subset.sh b/egs/babel/s5d/local/make_corpus_subset.sh
index add194d48e8..acd5e91a18b 100755
--- a/egs/babel/s5d/local/make_corpus_subset.sh
+++ b/egs/babel/s5d/local/make_corpus_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5d/local/make_ecf_subset.sh b/egs/babel/s5d/local/make_ecf_subset.sh
index 9bdd95c3e27..9fe8df841b7 100755
--- a/egs/babel/s5d/local/make_ecf_subset.sh
+++ b/egs/babel/s5d/local/make_ecf_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5d/local/make_lexicon_subset.sh b/egs/babel/s5d/local/make_lexicon_subset.sh
index bf2ebe45f7d..b6f19c088e9 100755
--- a/egs/babel/s5d/local/make_lexicon_subset.sh
+++ b/egs/babel/s5d/local/make_lexicon_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 echo "$0 $@"  # Print the command line for logging
 
diff --git a/egs/babel/s5d/local/nist_eval/create_compound_set.sh b/egs/babel/s5d/local/nist_eval/create_compound_set.sh
index b476dc9bf8e..d0b8f77f772 100755
--- a/egs/babel/s5d/local/nist_eval/create_compound_set.sh
+++ b/egs/babel/s5d/local/nist_eval/create_compound_set.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #Simple script to create compound set info that will allow for more automatized
 #work with the shadow set.
diff --git a/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh b/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh
index 5417d91080b..5c8d2a6480e 100755
--- a/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh
+++ b/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh
index a4f301e2c14..4168604fce7 100755
--- a/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh
+++ b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/nist_eval/export_systems.sh b/egs/babel/s5d/local/nist_eval/export_systems.sh
index d0af608416c..f0984c703ab 100755
--- a/egs/babel/s5d/local/nist_eval/export_systems.sh
+++ b/egs/babel/s5d/local/nist_eval/export_systems.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 set -o pipefail
 
diff --git a/egs/babel/s5d/local/nist_eval/make_release.sh b/egs/babel/s5d/local/nist_eval/make_release.sh
index 179d5cbe619..0e4e1f2cb15 100755
--- a/egs/babel/s5d/local/nist_eval/make_release.sh
+++ b/egs/babel/s5d/local/nist_eval/make_release.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 team=RADICAL
 corpusid=
diff --git a/egs/babel/s5d/local/nist_eval/split_compound_set.sh b/egs/babel/s5d/local/nist_eval/split_compound_set.sh
index 59ea4c162d7..3c236430b73 100755
--- a/egs/babel/s5d/local/nist_eval/split_compound_set.sh
+++ b/egs/babel/s5d/local/nist_eval/split_compound_set.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh
index 3b12222e13a..968b8b36c1e 100755
--- a/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh
+++ b/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2014 Vimal Manohar
diff --git a/egs/babel/s5d/local/prepare_extended_lexicon.sh b/egs/babel/s5d/local/prepare_extended_lexicon.sh
index 3cc5ca6c21f..7e8ca02529e 100755
--- a/egs/babel/s5d/local/prepare_extended_lexicon.sh
+++ b/egs/babel/s5d/local/prepare_extended_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/reestimate_langp.sh b/egs/babel/s5d/local/reestimate_langp.sh
index ae70b6a8f46..b31438fca57 100755
--- a/egs/babel/s5d/local/reestimate_langp.sh
+++ b/egs/babel/s5d/local/reestimate_langp.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/resegment/generate_segments.sh b/egs/babel/s5d/local/resegment/generate_segments.sh
index 95e88deb87d..33eb5b36848 100755
--- a/egs/babel/s5d/local/resegment/generate_segments.sh
+++ b/egs/babel/s5d/local/resegment/generate_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0
diff --git a/egs/babel/s5d/local/resegment/train_segmentation.sh b/egs/babel/s5d/local/resegment/train_segmentation.sh
index 511c451993e..264a1cc3c88 100755
--- a/egs/babel/s5d/local/resegment/train_segmentation.sh
+++ b/egs/babel/s5d/local/resegment/train_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0
diff --git a/egs/babel/s5d/local/run_asr_segmentation.sh b/egs/babel/s5d/local/run_asr_segmentation.sh
index f70775526b6..025f645dcf3 100755
--- a/egs/babel/s5d/local/run_asr_segmentation.sh
+++ b/egs/babel/s5d/local/run_asr_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/babel/s5d/local/run_cleanup_segmentation.sh b/egs/babel/s5d/local/run_cleanup_segmentation.sh
index 324d796b1b1..5a320ece5cf 100755
--- a/egs/babel/s5d/local/run_cleanup_segmentation.sh
+++ b/egs/babel/s5d/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/babel/s5d/local/run_kws_stt_task.sh b/egs/babel/s5d/local/run_kws_stt_task.sh
index e2f719bde9f..447adf7ac3f 100755
--- a/egs/babel/s5d/local/run_kws_stt_task.sh
+++ b/egs/babel/s5d/local/run_kws_stt_task.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5d/local/run_kws_stt_task2.sh b/egs/babel/s5d/local/run_kws_stt_task2.sh
index 73c4e730ab5..35efefb775e 100755
--- a/egs/babel/s5d/local/run_kws_stt_task2.sh
+++ b/egs/babel/s5d/local/run_kws_stt_task2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5d/local/score_combine.sh b/egs/babel/s5d/local/score_combine.sh
index 7e8af85b2d8..eb9f8412089 100755
--- a/egs/babel/s5d/local/score_combine.sh
+++ b/egs/babel/s5d/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Arnab Ghoshal
 #                      Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur)
diff --git a/egs/babel/s5d/local/score_stm.sh b/egs/babel/s5d/local/score_stm.sh
index 56835109722..b86d83570e2 100755
--- a/egs/babel/s5d/local/score_stm.sh
+++ b/egs/babel/s5d/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5d/local/search/combine.sh b/egs/babel/s5d/local/search/combine.sh
index 4f77c0f0f7c..612c55a3a3d 100755
--- a/egs/babel/s5d/local/search/combine.sh
+++ b/egs/babel/s5d/local/search/combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013-2014  Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey)
 # Copyright (c) 2016, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/combine_special.sh b/egs/babel/s5d/local/search/combine_special.sh
index 5802f49be06..0c1fbfe1c43 100755
--- a/egs/babel/s5d/local/search/combine_special.sh
+++ b/egs/babel/s5d/local/search/combine_special.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013-2014  Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey)
 # Copyright (c) 2016, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/compile_keywords.sh b/egs/babel/s5d/local/search/compile_keywords.sh
index 92dc4220a8e..39cbb299a8e 100755
--- a/egs/babel/s5d/local/search/compile_keywords.sh
+++ b/egs/babel/s5d/local/search/compile_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/search/compile_proxy_keywords.sh b/egs/babel/s5d/local/search/compile_proxy_keywords.sh
index 33d8dd52938..873a61b7b23 100755
--- a/egs/babel/s5d/local/search/compile_proxy_keywords.sh
+++ b/egs/babel/s5d/local/search/compile_proxy_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 #               2012-2014  Guoguo Chen
 # License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/normalize.sh b/egs/babel/s5d/local/search/normalize.sh
index 38054f75879..4b56fb97a05 100755
--- a/egs/babel/s5d/local/search/normalize.sh
+++ b/egs/babel/s5d/local/search/normalize.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5d/local/search/rttm_to_hitlists.sh b/egs/babel/s5d/local/search/rttm_to_hitlists.sh
index c6a8d2b5ad0..27bbbdd6fe0 100755
--- a/egs/babel/s5d/local/search/rttm_to_hitlists.sh
+++ b/egs/babel/s5d/local/search/rttm_to_hitlists.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/search/run_phn_search.sh b/egs/babel/s5d/local/search/run_phn_search.sh
index 3d39f55efa7..e03fe0dfe84 100755
--- a/egs/babel/s5d/local/search/run_phn_search.sh
+++ b/egs/babel/s5d/local/search/run_phn_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/search/run_search.sh b/egs/babel/s5d/local/search/run_search.sh
index 1fbdb071123..b341062f3cf 100755
--- a/egs/babel/s5d/local/search/run_search.sh
+++ b/egs/babel/s5d/local/search/run_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/search/run_syll_search.sh b/egs/babel/s5d/local/search/run_syll_search.sh
index 0694414b6b6..cce8885c73d 100755
--- a/egs/babel/s5d/local/search/run_syll_search.sh
+++ b/egs/babel/s5d/local/search/run_syll_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/search/score.sh b/egs/babel/s5d/local/search/score.sh
index e429b1da030..bf423648bd8 100755
--- a/egs/babel/s5d/local/search/score.sh
+++ b/egs/babel/s5d/local/search/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5d/local/search/search.sh b/egs/babel/s5d/local/search/search.sh
index 854719b6d24..3da7c9edc46 100755
--- a/egs/babel/s5d/local/search/search.sh
+++ b/egs/babel/s5d/local/search/search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/search/setup.sh b/egs/babel/s5d/local/search/setup.sh
index d4e2013a443..03d5cdc4f37 100755
--- a/egs/babel/s5d/local/search/setup.sh
+++ b/egs/babel/s5d/local/search/setup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/search_index.sh b/egs/babel/s5d/local/search_index.sh
index 9e7cdb77f3d..6f0c6b46fb5 100755
--- a/egs/babel/s5d/local/search_index.sh
+++ b/egs/babel/s5d/local/search_index.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0
diff --git a/egs/babel/s5d/local/segmentation/tuning/train_lstm_asr_sad_1a.sh b/egs/babel/s5d/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
index 63f78aa8092..c14560dfed8 100755
--- a/egs/babel/s5d/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
+++ b/egs/babel/s5d/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is a script to train a TDNN-LSTM for speech activity detection (SAD) 
 # using LSTM for long-context information.
diff --git a/egs/babel/s5d/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/babel/s5d/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index 2dfe9a0bb96..d27a60eb4ac 100755
--- a/egs/babel/s5d/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/babel/s5d/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is a script to train a TDNN for speech activity detection (SAD) 
 # using statistics pooling for long-context information.
diff --git a/egs/babel/s5d/local/syllab/ali_to_syllabs.sh b/egs/babel/s5d/local/syllab/ali_to_syllabs.sh
index 8f0cb88771a..84b7d24eeb3 100755
--- a/egs/babel/s5d/local/syllab/ali_to_syllabs.sh
+++ b/egs/babel/s5d/local/syllab/ali_to_syllabs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/syllab/create_syll_datadir.sh b/egs/babel/s5d/local/syllab/create_syll_datadir.sh
index 4c014285619..6a95d3945a7 100755
--- a/egs/babel/s5d/local/syllab/create_syll_datadir.sh
+++ b/egs/babel/s5d/local/syllab/create_syll_datadir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/syllab/generate_phone_lang.sh b/egs/babel/s5d/local/syllab/generate_phone_lang.sh
index 81d8a0acdc7..d9c91405e64 100755
--- a/egs/babel/s5d/local/syllab/generate_phone_lang.sh
+++ b/egs/babel/s5d/local/syllab/generate_phone_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/syllab/generate_syllable_lang.sh b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh
index a7bd667027c..a375971d432 100755
--- a/egs/babel/s5d/local/syllab/generate_syllable_lang.sh
+++ b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/syllab/lattice_word2syll.sh b/egs/babel/s5d/local/syllab/lattice_word2syll.sh
index 63e9114875d..6e20e78ff73 100755
--- a/egs/babel/s5d/local/syllab/lattice_word2syll.sh
+++ b/egs/babel/s5d/local/syllab/lattice_word2syll.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/syllab/run_phones.sh b/egs/babel/s5d/local/syllab/run_phones.sh
index 7c4a13c61f9..aea28cd4dd7 100755
--- a/egs/babel/s5d/local/syllab/run_phones.sh
+++ b/egs/babel/s5d/local/syllab/run_phones.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/syllab/run_syllabs.sh b/egs/babel/s5d/local/syllab/run_syllabs.sh
index 7366ac9ad35..f9697e86420 100755
--- a/egs/babel/s5d/local/syllab/run_syllabs.sh
+++ b/egs/babel/s5d/local/syllab/run_syllabs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/babel/s5d/local/train_g2p.sh b/egs/babel/s5d/local/train_g2p.sh
index 08be0014656..5a0594d8f46 100755
--- a/egs/babel/s5d/local/train_g2p.sh
+++ b/egs/babel/s5d/local/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/babel/s5d/local/train_lms_srilm.sh b/egs/babel/s5d/local/train_lms_srilm.sh
index cf357260d8c..4283461aa47 100755
--- a/egs/babel/s5d/local/train_lms_srilm.sh
+++ b/egs/babel/s5d/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 export LC_ALL=C
 
 words_file=
diff --git a/egs/babel/s5d/run-1-main-extend-lex.sh b/egs/babel/s5d/run-1-main-extend-lex.sh
index 69651416cfb..38e059fed72 100755
--- a/egs/babel/s5d/run-1-main-extend-lex.sh
+++ b/egs/babel/s5d/run-1-main-extend-lex.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Parameters for extended lexicon.
 extend_lexicon=true
diff --git a/egs/babel/s5d/run-1-main-unicode-extend-lex.sh b/egs/babel/s5d/run-1-main-unicode-extend-lex.sh
index d7e831febeb..eac1f565468 100755
--- a/egs/babel/s5d/run-1-main-unicode-extend-lex.sh
+++ b/egs/babel/s5d/run-1-main-unicode-extend-lex.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Parameters for extended lexicon.
 extend_lexicon=true
diff --git a/egs/babel/s5d/run-1-main-unicode.sh b/egs/babel/s5d/run-1-main-unicode.sh
index b6641433c84..0eefeaed3e5 100755
--- a/egs/babel/s5d/run-1-main-unicode.sh
+++ b/egs/babel/s5d/run-1-main-unicode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is not necessarily the top-level run.sh as it is in other directories.   see README.txt first.
 tri5_only=false
diff --git a/egs/babel/s5d/run-1-main.sh b/egs/babel/s5d/run-1-main.sh
index 329e8480c54..a72596bd15e 100755
--- a/egs/babel/s5d/run-1-main.sh
+++ b/egs/babel/s5d/run-1-main.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is not necessarily the top-level run.sh as it is in other directories.   see README.txt first.
 tri5_only=false
diff --git a/egs/babel/s5d/run-2-segmentation.sh b/egs/babel/s5d/run-2-segmentation.sh
index d832a9421c8..f7651c2ae91 100755
--- a/egs/babel/s5d/run-2-segmentation.sh
+++ b/egs/babel/s5d/run-2-segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0
diff --git a/egs/babel/s5d/run-4-anydecode.sh b/egs/babel/s5d/run-4-anydecode.sh
index 52c997ae26a..2593eb340a8 100755
--- a/egs/babel/s5d/run-4-anydecode.sh
+++ b/egs/babel/s5d/run-4-anydecode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 set -o pipefail
 
diff --git a/egs/babel_multilang/s5/local/chain2/run_tdnn.sh b/egs/babel_multilang/s5/local/chain2/run_tdnn.sh
new file mode 100755
index 00000000000..c0681455bf7
--- /dev/null
+++ b/egs/babel_multilang/s5/local/chain2/run_tdnn.sh
@@ -0,0 +1,447 @@
+#!/bin/bash
+# chain2 recipe for monolingual systems for BABEL
+# Copyright 2016 Pegah Ghahremani
+# Copyright 2020 Srikanth Madikeri (Idiap Research Institute)
+
+# This script is used to train multilingual LF-MMI system with a multi-task training
+# setup.
+
+# local.conf should exists (check README.txt), which contains configs for
+# multilingual training such as lang_list as array of space-separated languages used
+# for multilingual training.
+
+set -e -o pipefail
+
+remove_egs=false
+cmd=queue.pl
+srand=-1
+stage=0
+train_stage=-10
+get_egs_stage=-10
+decode_stage=-10
+
+speed_perturb=true
+use_pitch=true  # if true, pitch feature used to train multilingual setup
+use_pitch_ivector=false # if true, pitch feature used in ivector extraction.
+use_ivector=true
+megs_dir=
+alidir=tri5_ali
+stage=-1
+nj=30
+train_set=train
+gmm=tri5  # the gmm for the target data
+langdir=data/lang
+num_threads_ubm=1
+nnet3_affix=_cleaned  # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
+tree_affix=  # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
+tdnn_affix=  #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
+feat_suffix=_hires      
+
+label_delay=5
+frame_subsampling_factor=3
+xent_regularize=0.01
+max_param_change=2.0
+num_jobs_initial=2
+num_jobs_final=12
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+num_jobs_initial=2
+num_jobs_final=8
+chunk_width=150
+extra_left_context=50
+extra_right_context=0
+common_egs_dir=  # you can set this to use previously dumped egs.
+langconf=local.conf
+
+speed_perturb=true
+global_extractor=exp/multi/nnet3/extractor
+dir=exp/chain2${nnet3_affix}/tdnn${tdnn_affix}_multi
+
+. ./path.sh
+. ./cmd.sh
+. ./utils/parse_options.sh
+
+[ ! -f $langconf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1;
+. $langconf || exit 1;
+
+[ ! -f local.conf ] && echo 'the file local.conf does not exist!' && exit 1;
+. local.conf || exit 1;
+
+suffix=
+if $speed_perturb; then
+  suffix=_sp
+fi
+
+num_langs=${#lang_list[@]}
+echo "$0 $@"  # Print the command line for logging
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+for lang_index in `seq 0 $[$num_langs-1]`; do
+  for f in data/${lang_list[$lang_index]}/train/{feats.scp,text} exp/${lang_list[$lang_index]}/$alidir/ali.1.gz exp/${lang_list[$lang_index]}/$alidir/tree; do
+    [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+  done
+done
+
+if [ "$speed_perturb" == "true" ]; then suffix=_sp; fi
+dir=${dir}${suffix}
+
+ivec_feat_suffix=${feat_suffix}
+if $use_pitch; then feat_suffix=${feat_suffix}_pitch ; fi
+if $use_pitch_ivector; then nnet3_affix=_pitch; ivec_feat_suffix=${feat_suffix}_pitch ; fi
+
+for lang_index in `seq 0 $[$num_langs-1]`; do
+  echo "$0: extract high resolution 40dim MFCC + pitch for speed-perturbed data "
+  echo "and extract alignment."
+  local/nnet3/run_common_langs.sh --stage $stage \
+    --feat-suffix $feat_suffix \
+    --use-pitch $use_pitch \
+    --speed-perturb $speed_perturb ${lang_list[$lang_index]} || exit 1;
+  if $use_pitch && ! $use_pitch_ivector; then
+    echo "$0: select MFCC features for ivector extraction."
+    featdir=data/${lang_list[$lang_index]}/train${suffix}${feat_suffix}
+    ivec_featdir=data/${lang_list[$lang_index]}/train${suffix}${ivec_feat_suffix}
+    mfcc_only_dim=`feat-to-dim scp:$featdir/feats.scp - | awk '{print $1-3}'`
+    if [ ! -f $ivec_featdir/.done ]; then
+      steps/select_feats.sh --cmd "$train_cmd" --nj 70 0-$[$mfcc_only_dim-1] \
+        $featdir ${ivec_featdir} || exit 1;
+      steps/compute_cmvn_stats.sh ${ivec_featdir} || exit 1;
+      touch ${ivec_featdir}/.done || exit 1;
+    fi
+  fi
+done
+
+if $use_ivector; then
+  ivector_suffix=""
+  if [ -z "$ivector_extractor" ]; then
+    mkdir -p data/multi
+    global_extractor=exp/multi/nnet3${nnet3_affix}
+    mkdir -p $global_extractor
+    ivector_extractor=$global_extractor/extractor
+    multi_data_dir_for_ivec=data/multi/train${suffix}${ivec_feat_suffix}
+    ivector_suffix=_gb
+    echo "$0: combine training data using all langs for training global i-vector extractor."
+    if [ ! -f $multi_data_dir_for_ivec/.done ]; then
+      echo ---------------------------------------------------------------------
+      echo "Pooling training data in $multi_data_dir_for_ivec on" `date`
+      echo ---------------------------------------------------------------------
+      mkdir -p $multi_data_dir_for_ivec
+      combine_lang_list=""
+      for lang_index in `seq 0 $[$num_langs-1]`;do
+        lang_name=${lang_list[$lang_index]}
+        utils/copy_data_dir.sh --spk-prefix ${lang_name}- --utt-prefix ${lang_name}- data/${lang_list[$lang_index]}/train${suffix}${ivec_feat_suffix} data/${lang_list[$lang_index]}/train${suffix}${ivec_feat_suffix}_prefixed || exit 1
+        combine_lang_list="$combine_lang_list data/${lang_list[$lang_index]}/train${suffix}${ivec_feat_suffix}_prefixed"
+      done
+      utils/combine_data.sh $multi_data_dir_for_ivec $combine_lang_list
+      utils/validate_data_dir.sh --no-feats $multi_data_dir_for_ivec
+      for lang_index in `seq 0 $[$num_langs-1]`;do
+        lang_name=${lang_list[$lang_index]}
+        rm -r data/${lang_list[$lang_index]}/train${suffix}${ivec_feat_suffix}_prefixed
+      done
+      touch $multi_data_dir_for_ivec/.done
+    fi
+  fi
+  if [ ! -f $global_extractor/extractor/.done ]; then
+    local/nnet3/run_shared_ivector_extractor.sh  \
+      --suffix "$suffix" --nnet3-affix "$nnet3_affix" \
+      --feat-suffix "$ivec_feat_suffix" \
+      --ivector-transform-type pca \
+      --stage $stage multi \
+      $multi_data_dir_for_ivec $global_extractor || exit 1;
+    touch $global_extractor/extractor/.done
+  fi
+  echo "$0: Extracts ivector for all languages using $global_extractor/extractor."
+  for lang_index in `seq 0 $[$num_langs-1]`; do
+    local/nnet3/extract_ivector_lang.sh --stage $stage \
+      --train-set train${suffix}${ivec_feat_suffix} \
+      --ivector-suffix "$ivector_suffix" \
+      --nnet3-affix "$nnet3_affix" \
+      ${lang_list[$lang_index]} \
+      $ivector_extractor || exit;
+  done
+fi
+
+dir_basename=`basename $dir`
+for lang_index in `seq 0 $[$num_langs-1]`; do
+    lang_name=${lang_list[$lang_index]}
+  multi_lores_data_dirs[$lang_index]=data/${lang_list[$lang_index]}/train${suffix}
+  multi_data_dirs[$lang_index]=data/${lang_list[$lang_index]}/train${suffix}${feat_suffix}
+  multi_egs_dirs[$lang_index]=exp/${lang_list[$lang_index]}/nnet3${nnet3_affix}/egs${feat_suffix}${ivector_suffix}
+  multi_ali_dirs[$lang_index]=exp/${lang_list[$lang_index]}/${alidir}${suffix}
+  multi_ivector_dirs[$lang_index]=exp/${lang_list[$lang_index]}/nnet3${nnet3_affix}/ivectors_train${suffix}${ivec_feat_suffix}${ivector_suffix}
+  multi_ali_treedirs[$lang_index]=exp/${lang_list[$lang_index]}/tree${tree_affix}
+  multi_ali_latdirs[$lang_index]=exp/${lang_list[$lang_index]}/chain/${gmm}_train${suffix}_lats
+  multi_lang[$lang_index]=data/${lang_list[$lang_index]}/lang
+  multi_lfmmi_lang[$lang_index]=data/${lang_list[$lang_index]}/lang_chain
+  multi_gmm_dir[$lang_index]=exp/${lang_list[$lang_index]}/$gmm
+  multi_chain_dir[$lang_index]=exp/${lang_list[$lang_index]}/chain/$dir_basename
+done
+
+if $use_ivector; then
+  ivector_dim=$(feat-to-dim scp:${multi_ivector_dirs[0]}/ivector_online.scp -) || exit 1;
+else
+  echo "$0: Not using iVectors in multilingual training."
+  ivector_dim=0
+fi
+feat_dim=`feat-to-dim scp:${multi_data_dirs[0]}/feats.scp -`
+
+if [ $stage -le 8 ]; then
+  for lang_index in `seq 0 $[$num_langs-1]`;do
+      lang_name=${lang_list[$lang_index]}
+      if [ -d ${multi_lfmmi_lang[$lang_index]} ]; then
+        if [ ${multi_lfmmi_lang[$lang_index]}/L.fst -nt ${multi_lang[$lang_index]}/L.fst ]; then
+          echo "$0: ${multi_lfmmi_lang[$lang_index]} already exists, not overwriting it; continuing"
+        else
+          echo "$0: ${multi_lfmmi_lang[$lang_index]} already exists and seems to be older than ${multi_lang[$lang_index]}..."
+          echo " ... not sure what to do.  Exiting."
+          exit 1;
+        fi
+      else
+        echo "$0: creating lang directory with one state per phone."
+        cp -r ${multi_lang[$lang_index]}/ ${multi_lfmmi_lang[$lang_index]} # trailing slash makes sure soft links are copied
+        silphonelist=$(cat ${multi_lfmmi_lang[$lang_index]}/phones/silence.csl) || exit 1;
+        nonsilphonelist=$(cat ${multi_lfmmi_lang[$lang_index]}/phones/nonsilence.csl) || exit 1;
+        # Use our special topology... note that later on may have to tune this
+        # topology.
+        steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >${multi_lfmmi_lang[$lang_index]}/topo
+      fi
+  done
+fi
+
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  for lang_index in `seq 0 $[$num_langs-1]`;do
+      langdir=${multi_lang[$lang_index]}
+      lores_train_data_dir=${multi_lores_data_dirs[$lang_index]}
+      gmm_dir=${multi_gmm_dir[$lang_index]}
+      lat_dir=${multi_ali_latdirs[$lang_index]}
+
+      steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+        $langdir $gmm_dir $lat_dir
+      rm $lat_dir/fsts.*.gz # save space
+      exit
+  done
+fi 
+
+if [ $stage -le 10 ]; then
+  for lang_index in `seq 0 $[$num_langs-1]`;do
+      lang_name=${lang_list[$lang_index]}
+      echo "$0: Building tree for $lang_name"
+
+      tree_dir=${multi_ali_treedirs[$lang_index]}
+      ali_dir=${multi_ali_dirs[$lang_index]}
+      lores_train_data_dir=${multi_lores_data_dirs[$lang_index]}
+      lang_dir=${multi_lfmmi_lang[$lang_index]}
+      if [ -f $tree_dir/final.mdl -a -f $tree_dir/tree ]; then
+        echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+        continue
+      fi
+      steps/nnet3/chain/build_tree.sh --frame-subsampling-factor $frame_subsampling_factor \
+          --context-opts "--context-width=2 --central-position=1" \
+          --leftmost-questions-truncate -1 \
+          --cmd "$train_cmd" 4000 ${lores_train_data_dir} $lang_dir $ali_dir $tree_dir
+  done
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating multilingual neural net configs using the xconfig parser";
+  if [ -z $bnf_dim ]; then
+    bnf_dim=80
+  fi
+  mkdir -p $dir/configs
+  ivector_node_xconfig=""
+  ivector_to_append=""
+  if $use_ivector; then
+    ivector_node_xconfig="input dim=$ivector_dim name=ivector"
+    ivector_to_append=", ReplaceIndex(ivector, t, 0)"
+  fi
+  learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+  dummy_tree_dir=${multi_ali_treedirs[0]}
+  num_targets=`tree-info $dummy_tree_dir/tree 2>/dev/null | grep num-pdfs | awk '{print $2}'` || exit 1;
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=$feat_dim name=input
+  $ivector_node_xconfig
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 input=Append(input@-2,input@-1,input,input@1,input@2$ivector_to_append) dim=450
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=450
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=450
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=450
+  relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=450
+  relu-batchnorm-layer name=tdnn7 input=Append(-6,-3,0) dim=450
+  #relu-batchnorm-layer name=tdnn_bn dim=$bnf_dim
+  # adding the layers for diffrent language's output
+  # dummy output node
+  output-layer name=output dim=$num_targets max-change=1.5 include-log-softmax=false
+  output-layer name=output-xent input=tdnn7 dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+  # added separate outptut layer and softmax for all languages.
+  for lang_index in `seq 0 $[$num_langs-1]`;do
+    tree_dir=${multi_ali_treedirs[$lang_index]}
+    num_targets=`tree-info $tree_dir/tree 2>/dev/null | grep num-pdfs | awk '{print $2}'` || exit 1;
+
+    lang_name=${lang_list[${lang_index}]}
+    #echo "relu-renorm-layer name=prefinal-affine-lang-${lang_name} input=tdnn7 dim=450 target-rms=0.5"
+    echo "output-layer name=output-${lang_name} dim=$num_targets input=tdnn7  max-change=1.5 include-log-softmax=false"
+    echo "output-layer name=output-${lang_name}-xent input=tdnn7 dim=$num_targets  learning-rate-factor=$learning_rate_factor max-change=1.5"
+  done >> $dir/configs/network.xconfig
+
+  lang_name=${lang_list[0]}
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
+    --config-dir $dir/configs/ 
+fi
+
+init_info=$dir/init/info.txt
+if [ $stage -le 12 ]; then
+  if [ ! -f $dir/configs/ref.raw ]; then
+      echo "Expected $dir/configs/ref.raw to exist"
+      exit
+  fi
+  mkdir  -p $dir/init
+  nnet3-info $dir/configs/ref.raw  > $dir/configs/temp.info 
+  model_left_context=`fgrep 'left-context' $dir/configs/temp.info | awk '{print $2}'`
+  model_right_context=`fgrep 'right-context' $dir/configs/temp.info | awk '{print $2}'`
+  cat >$init_info <<EOF
+frame_subsampling_factor $frame_subsampling_factor
+langs $lang_list
+model_left_context $model_left_context
+model_right_context $model_right_context
+EOF
+  rm $dir/configs/temp.info
+fi
+
+model_left_context=$(awk '/^model_left_context/ {print $2;}' $dir/init/info.txt)
+model_right_context=$(awk '/^model_right_context/ {print $2;}' $dir/init/info.txt)
+if [ -z $model_left_context ]; then
+    echo "ERROR: Cannot find entry for model_left_context in $dir/init/info.txt"
+fi
+if [ -z $model_right_context ]; then
+    echo "ERROR: Cannot find entry for model_right_context in $dir/init/info.txt"
+fi
+egs_left_context=$[model_left_context+(frame_subsampling_factor/2)+extra_left_context]
+egs_right_context=$[model_right_context+(frame_subsampling_factor/2)+extra_right_context]
+
+if [ $stage -le 13 ]; then
+  for lang_index in `seq 0 $[$num_langs-1]`;do
+      lang_name=${lang_list[$lang_index]}
+      tree_dir=${multi_ali_treedirs[$lang_index]}
+      ali_dir=${multi_ali_dirs[$lang_index]}
+      gmm_dir=${multi_gmm_dir[$lang_index]}
+
+      cp $tree_dir/tree $dir/${lang_name}.tree
+      echo "$0: creating phone language-model for $lang_name"
+      $train_cmd $dir/den_fsts/log/make_phone_lm_${lang_name}.log \
+        chain-est-phone-lm --num-extra-lm-states=2000 \
+           "ark:gunzip -c $ali_dir/ali.*.gz | ali-to-phones $gmm_dir/final.mdl ark:- ark:- |" \
+           $dir/den_fsts/${lang_name}.phone_lm.fst || exit 1
+      echo "$0: creating denominator FST for $lang_name"
+      copy-transition-model $tree_dir/final.mdl $dir/init/${lang_name}_trans.mdl  || exit 1 
+      $train_cmd $dir/den_fsts/log/make_den_fst.log \
+         chain-make-den-fst $dir/${lang_name}.tree \
+            $dir/init/${lang_name}_trans.mdl $dir/den_fsts/${lang_name}.phone_lm.fst \
+            $dir/den_fsts/${lang_name}.den.fst $dir/den_fsts/${lang_name}.normalization.fst || exit 1;
+  done
+fi
+
+if [ $stage -le 14 ]; then
+  for lang_index in `seq 0 $[$num_langs-1]`;do
+      lang_name=${lang_list[$lang_index]}
+      echo "$0: Generating raw egs for $lang_name"
+      train_ivector_dir=${multi_ivector_dirs[$lang_index]}
+      train_data_dir=${multi_data_dirs[$lang_index]}
+      lat_dir=${multi_ali_latdirs[$lang_index]}
+      if [ ! -f ${dir}/${lang_name}_processed_egs/.done ]; then
+          steps/chain2/get_raw_egs.sh --cmd "$train_cmd" \
+            --lang "$lang_name" \
+            --online-ivector-dir $train_ivector_dir \
+            --left-context $egs_left_context \
+            --right-context $egs_right_context \
+            --frame-subsampling-factor $frame_subsampling_factor \
+            --alignment-subsampling-factor $frame_subsampling_factor \
+            --frames-per-chunk $chunk_width \
+            ${train_data_dir} ${dir} ${lat_dir} ${dir}/${lang_name}_raw_egs || exit 1
+
+          echo "$0: Processing raw egs for $lang_name"
+          steps/chain2/process_egs.sh  --cmd "$train_cmd" \
+              ${dir}/${lang_name}_raw_egs ${dir}/${lang_name}_processed_egs || exit 1
+          touch ${dir}/${lang_name}_processed_egs/.done
+          rm -r ${dir}/${lang_name}_raw_egs # save space
+      fi
+  done
+fi
+
+if [ $stage -le 15 ]; then
+    echo "$0: Combining egs"
+    if [ ! -z "$lang2weight" ]; then
+        egs_opts="--lang2weight '$lang2weight'"
+    fi
+    egs_dir_list=$(for lang_index in `seq 0 $[$num_langs-1]`;do lang_name=${lang_list[$lang_index]}; echo ${dir}/${lang_name}_processed_egs; done)
+    
+    steps/chain2/combine_egs.sh $egs_opts \
+        --cmd "$train_cmd" \
+        $num_langs $egs_dir_list ${dir}/egs
+fi
+[[ -z $common_egs_dir ]] && common_egs_dir=${dir}/egs
+
+if [ $stage -le 16 ]; then
+  [ ! -d ${dir}/egs/misc ] && mkdir  ${dir}/egs/misc
+  echo "$0: Copying den.fst to ${dir}/egs/misc"
+  for lang_index in `seq 0 $[$num_langs-1]`;do
+      lang_name=${lang_list[$lang_index]}
+      cp $dir/den_fsts/${lang_name}.*fst ${dir}/egs/misc/
+      cp $dir/init/${lang_name}_trans.mdl ${dir}/egs/misc/${lang_name}.trans_mdl
+      ln -rs $dir/egs/info.txt $dir/egs/info_${lang_name}.txt
+  done
+  echo "$0: Create a dummy transition model that is never used"
+  first_lang_name=${lang_list[0]}
+  [[ ! -f $dir/init/default_trans.mdl ]] && ln -r -s $dir/init/${first_lang_name}_trans.mdl $dir/init/default_trans.mdl
+fi
+
+if [ $stage -le 17 ]; then
+    echo "$0: Preparing initial acoustic model"
+    $cuda_cmd ${dir}/log/init_model.log \
+           nnet3-init --srand=${srand} ${dir}/configs/final.config ${dir}/init/multi.raw || exit 1
+fi
+
+if [ $stage -le 18 ]; then
+  echo "$0: Starting model training"
+  steps/chain2/train.sh \
+    --stage $train_stage --cmd "$cuda_cmd" \
+    --multilingual-eg true \
+    --xent-regularize $xent_regularize --leaky-hmm-coefficient 0.25  \
+    --initial-effective-lrate $initial_effective_lrate \
+    --final-effective-lrate $final_effective_lrate \
+    --max-param-change $max_param_change \
+    --groups-per-minibatch 128 \
+    --srand 1 \
+    --shuffle-buffer-size 5000 \
+    --l2-regularize 5e-5 \
+    --num-jobs-initial $num_jobs_initial --num-jobs-final $num_jobs_final \
+     $common_egs_dir $dir
+fi
+
+if [ $stage -le 19 ]; then
+    echo "$0: Splitting models"
+    frame_subsampling_factor=`fgrep "frame_subsampling_factor" $dir/init/info.txt | awk '{print $2}'`
+    for lang_index in `seq 0 $[$num_langs-1]`;do
+        lang_name=${lang_list[$lang_index]}
+       [[ ! -d $dir/${lang_name} ]] && mkdir $dir/${lang_name}
+       nnet3-copy --edits="rename-node old-name=output new-name=output-dummy; rename-node old-name=output-${lang_name} new-name=output" \
+           $dir/final.raw - | \
+           nnet3-am-init $dir/init/${lang_name}_trans.mdl - $dir/${lang_name}/final.mdl
+       [[ ! -d $dir/${lang_name}/init ]] && mkdir $dir/${lang_name}/init
+       params="frame_subsampling_factor model_left_context model_right_context feat_dim left_context left_context_initial right_context right_context_final ivector_dim frames_per_chunk"
+       for param_name in $params; do
+            grep -m 1 "^$param_name " $dir/init/info.txt
+       done > $dir/${lang_name}/init/info.txt
+    done
+fi
diff --git a/egs/babel_multilang/s5/local/make_corpus_subset.sh b/egs/babel_multilang/s5/local/make_corpus_subset.sh
index add194d48e8..acd5e91a18b 100755
--- a/egs/babel_multilang/s5/local/make_corpus_subset.sh
+++ b/egs/babel_multilang/s5/local/make_corpus_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh b/egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh
index 70d4ef22148..266f02f7ae1 100755
--- a/egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh
+++ b/egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 Pegah Ghahremani
 
diff --git a/egs/babel_multilang/s5/local/nnet3/prepare_multilingual_egs.sh b/egs/babel_multilang/s5/local/nnet3/prepare_multilingual_egs.sh
index acd88b1cee8..7d678c5f55c 100755
--- a/egs/babel_multilang/s5/local/nnet3/prepare_multilingual_egs.sh
+++ b/egs/babel_multilang/s5/local/nnet3/prepare_multilingual_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # This script generates separate egs directory for each input
 # language in multilingual setup, which contains both egs.*.ark and egs.*.scp.
diff --git a/egs/babel_multilang/s5/local/nnet3/run_common_langs.sh b/egs/babel_multilang/s5/local/nnet3/run_common_langs.sh
index 63b7da82f60..073d01cb257 100755
--- a/egs/babel_multilang/s5/local/nnet3/run_common_langs.sh
+++ b/egs/babel_multilang/s5/local/nnet3/run_common_langs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 Pegah Ghahremani
 
diff --git a/egs/babel_multilang/s5/local/nnet3/run_decode_lang.sh b/egs/babel_multilang/s5/local/nnet3/run_decode_lang.sh
index bd80fe9a701..5456d307d9f 100755
--- a/egs/babel_multilang/s5/local/nnet3/run_decode_lang.sh
+++ b/egs/babel_multilang/s5/local/nnet3/run_decode_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 Pegah Ghahremani
 
diff --git a/egs/babel_multilang/s5/local/nnet3/run_multilingual_bnf.sh b/egs/babel_multilang/s5/local/nnet3/run_multilingual_bnf.sh
index 8fb01c19d00..2e48e057bd3 100755
--- a/egs/babel_multilang/s5/local/nnet3/run_multilingual_bnf.sh
+++ b/egs/babel_multilang/s5/local/nnet3/run_multilingual_bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script trains a multilingual model using 6 layer TDNN + Xent
 # with 42 dim bottleneck layer in th fifth layer.
diff --git a/egs/babel_multilang/s5/local/nnet3/run_shared_ivector_extractor.sh b/egs/babel_multilang/s5/local/nnet3/run_shared_ivector_extractor.sh
index 7034743beca..28006a752c5 100755
--- a/egs/babel_multilang/s5/local/nnet3/run_shared_ivector_extractor.sh
+++ b/egs/babel_multilang/s5/local/nnet3/run_shared_ivector_extractor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 Pegah Ghahremani
 
diff --git a/egs/babel_multilang/s5/local/nnet3/run_tdnn_multilingual.sh b/egs/babel_multilang/s5/local/nnet3/run_tdnn_multilingual.sh
index 22ba636f06a..eb2cb77ba0d 100755
--- a/egs/babel_multilang/s5/local/nnet3/run_tdnn_multilingual.sh
+++ b/egs/babel_multilang/s5/local/nnet3/run_tdnn_multilingual.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 Pegah Ghahremani
 
diff --git a/egs/bentham/v1/local/chain/compare_wer.sh b/egs/bentham/v1/local/chain/compare_wer.sh
index 2ce14e13694..a1b8fffe166 100755
--- a/egs/bentham/v1/local/chain/compare_wer.sh
+++ b/egs/bentham/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index ec530ef1ce4..5343890db4e 100755
--- a/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ exp/chain/cnn_e2eali_1a
 # System                      e2e_cnn_1a cnn_e2eali_1a
diff --git a/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 716bdce3729..eda95d391c5 100755
--- a/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/bentham/v1/local/create_splits.sh b/egs/bentham/v1/local/create_splits.sh
index e8ea2279a49..a510959d472 100755
--- a/egs/bentham/v1/local/create_splits.sh
+++ b/egs/bentham/v1/local/create_splits.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018   Desh Raj (Johns Hopkins University) 
 
 # This script reads the extracted Bentham database files and creates
diff --git a/egs/bentham/v1/local/download_bentham_text.sh b/egs/bentham/v1/local/download_bentham_text.sh
index e09403718a1..5bcd1244b44 100755
--- a/egs/bentham/v1/local/download_bentham_text.sh
+++ b/egs/bentham/v1/local/download_bentham_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2018   Desh Raj
 # Apache 2.0
 
diff --git a/egs/bentham/v1/local/extract_features.sh b/egs/bentham/v1/local/extract_features.sh
index 460e467e99c..374309ba375 100755
--- a/egs/bentham/v1/local/extract_features.sh
+++ b/egs/bentham/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
diff --git a/egs/bentham/v1/local/prepare_data.sh b/egs/bentham/v1/local/prepare_data.sh
index bbcc9863611..018f6843120 100755
--- a/egs/bentham/v1/local/prepare_data.sh
+++ b/egs/bentham/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2018  Desh Raj (Johns Hopkins University) 
 
diff --git a/egs/bentham/v1/local/score.sh b/egs/bentham/v1/local/score.sh
index 1d84815fc69..6168f38a929 100755
--- a/egs/bentham/v1/local/score.sh
+++ b/egs/bentham/v1/local/score.sh
@@ -1,5 +1,5 @@
 
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/bentham/v1/local/train_lm.sh b/egs/bentham/v1/local/train_lm.sh
index 48632a90769..b5434c38e0a 100755
--- a/egs/bentham/v1/local/train_lm.sh
+++ b/egs/bentham/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/bentham/v1/run_end2end.sh b/egs/bentham/v1/run_end2end.sh
index 63c034e41f6..1140fdb8e47 100755
--- a/egs/bentham/v1/run_end2end.sh
+++ b/egs/bentham/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright     2018    Ashish Arora (Johns Hopkins University)
 #               2018    Desh Raj (Johns Hopkins University)
 
diff --git a/egs/bn_music_speech/v1/local/make_bn.sh b/egs/bn_music_speech/v1/local/make_bn.sh
index 5e2a29f0cca..accecba7662 100755
--- a/egs/bn_music_speech/v1/local/make_bn.sh
+++ b/egs/bn_music_speech/v1/local/make_bn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/bn_music_speech/v1/run.sh b/egs/bn_music_speech/v1/run.sh
index 08d5c022a9d..5cc82ddb320 100755
--- a/egs/bn_music_speech/v1/run.sh
+++ b/egs/bn_music_speech/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/callhome_diarization/v1/diarization/VB_resegmentation.sh b/egs/callhome_diarization/v1/diarization/VB_resegmentation.sh
index 765c4eee8b8..ed3b2da0b3a 100755
--- a/egs/callhome_diarization/v1/diarization/VB_resegmentation.sh
+++ b/egs/callhome_diarization/v1/diarization/VB_resegmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019  Zili Huang
 
diff --git a/egs/callhome_diarization/v1/diarization/cluster.sh b/egs/callhome_diarization/v1/diarization/cluster.sh
index 5e5c6e9dbe5..6105129f907 100755
--- a/egs/callhome_diarization/v1/diarization/cluster.sh
+++ b/egs/callhome_diarization/v1/diarization/cluster.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright       2016  David Snyder
 #            2017-2018  Matthew Maciejewski
diff --git a/egs/callhome_diarization/v1/diarization/extract_ivectors.sh b/egs/callhome_diarization/v1/diarization/extract_ivectors.sh
index d7bb389bad5..7fb2c6c510b 100755
--- a/egs/callhome_diarization/v1/diarization/extract_ivectors.sh
+++ b/egs/callhome_diarization/v1/diarization/extract_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright          2013  Daniel Povey
 #                    2016  David Snyder
diff --git a/egs/callhome_diarization/v1/diarization/make_rttm.py b/egs/callhome_diarization/v1/diarization/make_rttm.py
index cc1145ab9ab..fc32eafd530 100755
--- a/egs/callhome_diarization/v1/diarization/make_rttm.py
+++ b/egs/callhome_diarization/v1/diarization/make_rttm.py
@@ -34,9 +34,7 @@
 
 import argparse
 import sys
-
-sys.path.append('steps/libs')
-import common as common_lib
+import codecs
 
 
 def get_args():
@@ -63,14 +61,14 @@ def main():
 
   # File containing speaker labels per segment
   seg2label = {}
-  with common_lib.smart_open(args.labels) as labels_file:
+  with codecs.open(args.labels, 'r', 'utf-8') as labels_file:
     for line in labels_file:
       seg, label = line.strip().split()
       seg2label[seg] = label
 
   # Segments file
   reco2segs = {}
-  with common_lib.smart_open(args.segments) as segments_file:
+  with codecs.open(args.segments, 'r', 'utf-8') as segments_file:
     for line in segments_file:
       seg, reco, start, end = line.strip().split()
       try:
@@ -117,7 +115,7 @@ def main():
     new_segs += " " + start + "," + end + "," + label
     merged_segs.append(reco + new_segs)
 
-  with common_lib.smart_open(args.rttm_file, 'w') as rttm_writer:
+  with codecs.open(args.rttm_file, 'w', 'utf-8') as rttm_writer:
     for reco_line in merged_segs:
       segs = reco_line.strip().split()
       reco = segs[0]
diff --git a/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh b/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh
index 8d579138c73..9091c52cd1a 100755
--- a/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh
+++ b/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright     2017-2018  Daniel Povey
 #               2017-2018  David Snyder
diff --git a/egs/callhome_diarization/v1/diarization/nnet3/xvector/score_plda.sh b/egs/callhome_diarization/v1/diarization/nnet3/xvector/score_plda.sh
index 703bafd8912..5616b032be0 100755
--- a/egs/callhome_diarization/v1/diarization/nnet3/xvector/score_plda.sh
+++ b/egs/callhome_diarization/v1/diarization/nnet3/xvector/score_plda.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2016-2018  David Snyder
 #            2017-2018  Matthew Maciejewski
 # Apache 2.0.
diff --git a/egs/callhome_diarization/v1/diarization/score_plda.sh b/egs/callhome_diarization/v1/diarization/score_plda.sh
index a5be35e8f39..4ea0904aef2 100755
--- a/egs/callhome_diarization/v1/diarization/score_plda.sh
+++ b/egs/callhome_diarization/v1/diarization/score_plda.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2016-2018  David Snyder
 #            2017-2018  Matthew Maciejewski
 # Apache 2.0.
diff --git a/egs/callhome_diarization/v1/diarization/train_ivector_extractor_diag.sh b/egs/callhome_diarization/v1/diarization/train_ivector_extractor_diag.sh
index 9254012f3b0..39b571ddd41 100755
--- a/egs/callhome_diarization/v1/diarization/train_ivector_extractor_diag.sh
+++ b/egs/callhome_diarization/v1/diarization/train_ivector_extractor_diag.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2013  Daniel Povey
 #             2014  David Snyder
diff --git a/egs/callhome_diarization/v1/diarization/vad_to_segments.sh b/egs/callhome_diarization/v1/diarization/vad_to_segments.sh
index d653e0313ea..7f0623c8cdb 100755
--- a/egs/callhome_diarization/v1/diarization/vad_to_segments.sh
+++ b/egs/callhome_diarization/v1/diarization/vad_to_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Matthew Maciejewski
 # Apache 2.0
diff --git a/egs/callhome_diarization/v1/local/make_callhome.sh b/egs/callhome_diarization/v1/local/make_callhome.sh
index 21411fb6194..c2a014d7025 100755
--- a/egs/callhome_diarization/v1/local/make_callhome.sh
+++ b/egs/callhome_diarization/v1/local/make_callhome.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/callhome_diarization/v1/local/make_sre.sh b/egs/callhome_diarization/v1/local/make_sre.sh
index bef4e06e68e..9dc68069a1f 100755
--- a/egs/callhome_diarization/v1/local/make_sre.sh
+++ b/egs/callhome_diarization/v1/local/make_sre.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats.sh b/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats.sh
index 62879623df4..b05dbd552f9 100755
--- a/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats.sh
+++ b/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Apache 2.0.
 
diff --git a/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
index dcdbe1b1593..326b6dbb9fa 100755
--- a/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
+++ b/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Apache 2.0.
 
diff --git a/egs/callhome_diarization/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/callhome_diarization/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
index 4fdf0cfbad6..b08764259f9 100755
--- a/egs/callhome_diarization/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
+++ b/egs/callhome_diarization/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright      2018   David Snyder
 #                2018   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2018   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/callhome_diarization/v1/run.sh b/egs/callhome_diarization/v1/run.sh
index f4652c0c0ef..5fa663f8e84 100755
--- a/egs/callhome_diarization/v1/run.sh
+++ b/egs/callhome_diarization/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017-2018  David Snyder
 #           2017-2018  Matthew Maciejewski
 # Apache 2.0.
diff --git a/egs/callhome_diarization/v2/run.sh b/egs/callhome_diarization/v2/run.sh
index 85a2c7fdf2b..331b2c56613 100755
--- a/egs/callhome_diarization/v2/run.sh
+++ b/egs/callhome_diarization/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017-2018  David Snyder
 #           2017-2018  Matthew Maciejewski
 #
diff --git a/egs/callhome_egyptian/s5/local/callhome_create_test_lang.sh b/egs/callhome_egyptian/s5/local/callhome_create_test_lang.sh
index f4a5cf6d1e2..ab3f5ec0ad8 100755
--- a/egs/callhome_egyptian/s5/local/callhome_create_test_lang.sh
+++ b/egs/callhome_egyptian/s5/local/callhome_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/callhome_egyptian/s5/local/callhome_data_prep.sh b/egs/callhome_egyptian/s5/local/callhome_data_prep.sh
index 8afe6049b8b..08b6866e58f 100755
--- a/egs/callhome_egyptian/s5/local/callhome_data_prep.sh
+++ b/egs/callhome_egyptian/s5/local/callhome_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Johns Hopkins University : (Gaurav Kumar)
 # The input is the Callhome Egyptian Arabic Dataset which contains *.sph files
diff --git a/egs/callhome_egyptian/s5/local/callhome_train_lms.sh b/egs/callhome_egyptian/s5/local/callhome_train_lms.sh
index ec92b43e2f8..e960e2c420d 100755
--- a/egs/callhome_egyptian/s5/local/callhome_train_lms.sh
+++ b/egs/callhome_egyptian/s5/local/callhome_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # To be run from one level above this directory
 # Generate the text for the LM training
diff --git a/egs/callhome_egyptian/s5/local/nnet3/run_ivector_common.sh b/egs/callhome_egyptian/s5/local/nnet3/run_ivector_common.sh
index f062af8e89d..5a810c72379 100755
--- a/egs/callhome_egyptian/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/callhome_egyptian/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Inherited from the WSJ nnet3 recipe, modified for use with ECA
 
diff --git a/egs/callhome_egyptian/s5/local/nnet3/run_tdnn.sh b/egs/callhome_egyptian/s5/local/nnet3/run_tdnn.sh
index bd3868da42d..7c8f0d8e6d9 100755
--- a/egs/callhome_egyptian/s5/local/nnet3/run_tdnn.sh
+++ b/egs/callhome_egyptian/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is the standard "tdnn" system, built in nnet3; it's what we use to
 # call multi-splice.
diff --git a/egs/callhome_egyptian/s5/local/score.sh b/egs/callhome_egyptian/s5/local/score.sh
index 1e493d44a98..be4322eb2db 100755
--- a/egs/callhome_egyptian/s5/local/score.sh
+++ b/egs/callhome_egyptian/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/callhome_egyptian/s5/run.sh b/egs/callhome_egyptian/s5/run.sh
index ebe550c9814..d0069c4df2c 100755
--- a/egs/callhome_egyptian/s5/run.sh
+++ b/egs/callhome_egyptian/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Johns Hopkins University (Author : Gaurav Kumar, Daniel Povey)
 # Recipe for CallHome Egyptian Arabic
diff --git a/egs/casia_hwdb/v1/local/augment_data.sh b/egs/casia_hwdb/v1/local/augment_data.sh
index 1f13ed15ded..d498c54665c 100755
--- a/egs/casia_hwdb/v1/local/augment_data.sh
+++ b/egs/casia_hwdb/v1/local/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018 Hossein Hadian
 #             2018 Ashish Arora
 
diff --git a/egs/casia_hwdb/v1/local/chain/compare_wer.sh b/egs/casia_hwdb/v1/local/chain/compare_wer.sh
index ab880c1adb5..eeb831e8e6b 100755
--- a/egs/casia_hwdb/v1/local/chain/compare_wer.sh
+++ b/egs/casia_hwdb/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/casia_hwdb/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/casia_hwdb/v1/local/chain/run_cnn_e2eali_1b.sh
index 300c8ae8e31..0ed75baa41b 100755
--- a/egs/casia_hwdb/v1/local/chain/run_cnn_e2eali_1b.sh
+++ b/egs/casia_hwdb/v1/local/chain/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
 # lattice alignments and to build a tree
diff --git a/egs/casia_hwdb/v1/local/chain/run_flatstart_cnn1a.sh b/egs/casia_hwdb/v1/local/chain/run_flatstart_cnn1a.sh
index 023fbff1c14..55be6acbadc 100755
--- a/egs/casia_hwdb/v1/local/chain/run_flatstart_cnn1a.sh
+++ b/egs/casia_hwdb/v1/local/chain/run_flatstart_cnn1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/casia_hwdb/v1/local/extract_database.sh b/egs/casia_hwdb/v1/local/extract_database.sh
index 1af3713d586..62c8151f9c5 100755
--- a/egs/casia_hwdb/v1/local/extract_database.sh
+++ b/egs/casia_hwdb/v1/local/extract_database.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Chun-Chieh Chang
 
 # The original format of the dataset given is GEDI and page images.
diff --git a/egs/casia_hwdb/v1/local/extract_features.sh b/egs/casia_hwdb/v1/local/extract_features.sh
index f75837ae5b3..c9a36991e94 100755
--- a/egs/casia_hwdb/v1/local/extract_features.sh
+++ b/egs/casia_hwdb/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
 
diff --git a/egs/casia_hwdb/v1/local/score.sh b/egs/casia_hwdb/v1/local/score.sh
index f2405205f02..6e98902f5bd 100755
--- a/egs/casia_hwdb/v1/local/score.sh
+++ b/egs/casia_hwdb/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh --max-lmwt 10 "$@"
diff --git a/egs/casia_hwdb/v1/local/train_lm.sh b/egs/casia_hwdb/v1/local/train_lm.sh
index bc738f217da..9e651d63aff 100755
--- a/egs/casia_hwdb/v1/local/train_lm.sh
+++ b/egs/casia_hwdb/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/casia_hwdb/v1/local/train_lm_lr.sh b/egs/casia_hwdb/v1/local/train_lm_lr.sh
index a8b1bfb76a4..70efc7fd8dd 100755
--- a/egs/casia_hwdb/v1/local/train_lm_lr.sh
+++ b/egs/casia_hwdb/v1/local/train_lm_lr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/casia_hwdb/v1/run.sh b/egs/casia_hwdb/v1/run.sh
index 44d1f26117c..987ca5d5078 100755
--- a/egs/casia_hwdb/v1/run.sh
+++ b/egs/casia_hwdb/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 stage=0
diff --git a/egs/chime1/s5/local/chime1_prepare_data.sh b/egs/chime1/s5/local/chime1_prepare_data.sh
index c5963b5d4ab..153890d7554 100755
--- a/egs/chime1/s5/local/chime1_prepare_data.sh
+++ b/egs/chime1/s5/local/chime1_prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  University of Sheffield (Author: Ning Ma)
 # Apache 2.0.
diff --git a/egs/chime1/s5/local/chime1_prepare_dict.sh b/egs/chime1/s5/local/chime1_prepare_dict.sh
index a5dc4cbd50d..4ccff9274c4 100755
--- a/egs/chime1/s5/local/chime1_prepare_dict.sh
+++ b/egs/chime1/s5/local/chime1_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  University of Sheffield (Author: Ning Ma)
 # Apache 2.0.
diff --git a/egs/chime1/s5/local/chime1_prepare_grammar.sh b/egs/chime1/s5/local/chime1_prepare_grammar.sh
index e06f736245f..689704aa24f 100755
--- a/egs/chime1/s5/local/chime1_prepare_grammar.sh
+++ b/egs/chime1/s5/local/chime1_prepare_grammar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  University of Sheffield (Author: Ning Ma)
 # Apache 2.0.
diff --git a/egs/chime1/s5/local/score.sh b/egs/chime1/s5/local/score.sh
index 778a4283461..0c2f14de7ad 100755
--- a/egs/chime1/s5/local/score.sh
+++ b/egs/chime1/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/chime1/s5/run.sh b/egs/chime1/s5/run.sh
index 617c1a99f5c..8087f326bc1 100755
--- a/egs/chime1/s5/run.sh
+++ b/egs/chime1/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  University of Sheffield (Author: Ning Ma)
 # Apache 2.0.
diff --git a/egs/chime2/s5/local/chime_format_data.sh b/egs/chime2/s5/local/chime_format_data.sh
index 5870174aff4..fd9276ae8bf 100755
--- a/egs/chime2/s5/local/chime_format_data.sh
+++ b/egs/chime2/s5/local/chime_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/chime2/s5/local/clean_wsj0_data_prep.sh b/egs/chime2/s5/local/clean_wsj0_data_prep.sh
index 7cc39e4a847..ae5750f3572 100755
--- a/egs/chime2/s5/local/clean_wsj0_data_prep.sh
+++ b/egs/chime2/s5/local/clean_wsj0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime2/s5/local/noisy_wsj0_data_prep.sh b/egs/chime2/s5/local/noisy_wsj0_data_prep.sh
index 8744f25d67e..c98804a3468 100755
--- a/egs/chime2/s5/local/noisy_wsj0_data_prep.sh
+++ b/egs/chime2/s5/local/noisy_wsj0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime2/s5/local/reverb_wsj0_data_prep.sh b/egs/chime2/s5/local/reverb_wsj0_data_prep.sh
index c6903f21c13..47d2e03b867 100755
--- a/egs/chime2/s5/local/reverb_wsj0_data_prep.sh
+++ b/egs/chime2/s5/local/reverb_wsj0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime2/s5/local/score.sh b/egs/chime2/s5/local/score.sh
index 93d8a11613c..6e2af231b07 100755
--- a/egs/chime2/s5/local/score.sh
+++ b/egs/chime2/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/chime2/s5/local/wsj_prepare_dict.sh b/egs/chime2/s5/local/wsj_prepare_dict.sh
index 1fa59e69875..ff71fd0c4a7 100755
--- a/egs/chime2/s5/local/wsj_prepare_dict.sh
+++ b/egs/chime2/s5/local/wsj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 
diff --git a/egs/chime2/s5/run.sh b/egs/chime2/s5/run.sh
index 138ce941ce7..894e961f8aa 100755
--- a/egs/chime2/s5/run.sh
+++ b/egs/chime2/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
diff --git a/egs/chime3/s5/local/bth_chime3_data_prep.sh b/egs/chime3/s5/local/bth_chime3_data_prep.sh
index 6fefc798487..8cc5cea86cf 100755
--- a/egs/chime3/s5/local/bth_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/bth_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/chime3_beamform.sh b/egs/chime3/s5/local/chime3_beamform.sh
index 170a37ccd84..39055196f41 100755
--- a/egs/chime3/s5/local/chime3_beamform.sh
+++ b/egs/chime3/s5/local/chime3_beamform.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
 
diff --git a/egs/chime3/s5/local/chime3_calc_wers.sh b/egs/chime3/s5/local/chime3_calc_wers.sh
index 4770e5cf38c..58fba170c06 100755
--- a/egs/chime3/s5/local/chime3_calc_wers.sh
+++ b/egs/chime3/s5/local/chime3_calc_wers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #  Copyright  2015  Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
 #  Apache 2.0.
diff --git a/egs/chime3/s5/local/chime3_calc_wers_smbr.sh b/egs/chime3/s5/local/chime3_calc_wers_smbr.sh
index ba5fc03d1da..66ac84690cc 100755
--- a/egs/chime3/s5/local/chime3_calc_wers_smbr.sh
+++ b/egs/chime3/s5/local/chime3_calc_wers_smbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #  Copyright  2015  Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
 #  Apache 2.0.
diff --git a/egs/chime3/s5/local/chime3_train_lms.sh b/egs/chime3/s5/local/chime3_train_lms.sh
index 984ef766b2a..9ff1fad7e35 100755
--- a/egs/chime3/s5/local/chime3_train_lms.sh
+++ b/egs/chime3/s5/local/chime3_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Modified from the script for CHiME3 baseline
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori)
diff --git a/egs/chime3/s5/local/chime3_train_rnnlms.sh b/egs/chime3/s5/local/chime3_train_rnnlms.sh
index 429ca828aa3..37a75d38b38 100755
--- a/egs/chime3/s5/local/chime3_train_rnnlms.sh
+++ b/egs/chime3/s5/local/chime3_train_rnnlms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori)
 
diff --git a/egs/chime3/s5/local/clean_chime3_format_data.sh b/egs/chime3/s5/local/clean_chime3_format_data.sh
index f2d81bc5324..51201c7ec65 100755
--- a/egs/chime3/s5/local/clean_chime3_format_data.sh
+++ b/egs/chime3/s5/local/clean_chime3_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/chime3/s5/local/clean_wsj0_data_prep.sh b/egs/chime3/s5/local/clean_wsj0_data_prep.sh
index fe96881cf8d..671379ae732 100755
--- a/egs/chime3/s5/local/clean_wsj0_data_prep.sh
+++ b/egs/chime3/s5/local/clean_wsj0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/real_close_chime3_data_prep.sh b/egs/chime3/s5/local/real_close_chime3_data_prep.sh
index 4ef1fc4dffc..a420d094805 100755
--- a/egs/chime3/s5/local/real_close_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/real_close_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/real_enhan_chime3_data_prep.sh b/egs/chime3/s5/local/real_enhan_chime3_data_prep.sh
index 4230a1adbed..5c53f4c4f95 100755
--- a/egs/chime3/s5/local/real_enhan_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/real_enhan_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/real_noisy_chime3_data_prep.sh b/egs/chime3/s5/local/real_noisy_chime3_data_prep.sh
index 94a2d0226db..96428e143a9 100755
--- a/egs/chime3/s5/local/real_noisy_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/real_noisy_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/run_dnn.sh b/egs/chime3/s5/local/run_dnn.sh
index 78dc4283ee3..6962896e7b4 100755
--- a/egs/chime3/s5/local/run_dnn.sh
+++ b/egs/chime3/s5/local/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
 #                Inria (Emmanuel Vincent)
diff --git a/egs/chime3/s5/local/run_gmm.sh b/egs/chime3/s5/local/run_gmm.sh
index 5b9fbaa1736..f2afbc5be65 100755
--- a/egs/chime3/s5/local/run_gmm.sh
+++ b/egs/chime3/s5/local/run_gmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
 #                Inria (Emmanuel Vincent)
diff --git a/egs/chime3/s5/local/run_init.sh b/egs/chime3/s5/local/run_init.sh
index 9db289a12a5..2350599411f 100755
--- a/egs/chime3/s5/local/run_init.sh
+++ b/egs/chime3/s5/local/run_init.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
 #                Inria (Emmanuel Vincent)
diff --git a/egs/chime3/s5/local/run_lmrescore.sh b/egs/chime3/s5/local/run_lmrescore.sh
index 0c364367c98..20d22890f36 100755
--- a/egs/chime3/s5/local/run_lmrescore.sh
+++ b/egs/chime3/s5/local/run_lmrescore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
 #                Inria (Emmanuel Vincent)
diff --git a/egs/chime3/s5/local/score.sh b/egs/chime3/s5/local/score.sh
index 93d8a11613c..6e2af231b07 100755
--- a/egs/chime3/s5/local/score.sh
+++ b/egs/chime3/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/chime3/s5/local/simu_enhan_chime3_data_prep.sh b/egs/chime3/s5/local/simu_enhan_chime3_data_prep.sh
index 8f47bcff095..827dfa5cba0 100755
--- a/egs/chime3/s5/local/simu_enhan_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/simu_enhan_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/simu_noisy_chime3_data_prep.sh b/egs/chime3/s5/local/simu_noisy_chime3_data_prep.sh
index 68c3ba2f0c7..16fe2ee6271 100755
--- a/egs/chime3/s5/local/simu_noisy_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/simu_noisy_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/wsj_prepare_dict.sh b/egs/chime3/s5/local/wsj_prepare_dict.sh
index 6ddebd60293..7c5b4f98506 100755
--- a/egs/chime3/s5/local/wsj_prepare_dict.sh
+++ b/egs/chime3/s5/local/wsj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation  
 #           2012-2014 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/run.sh b/egs/chime3/s5/run.sh
index 3ef5e630c13..d10e83c0cac 100755
--- a/egs/chime3/s5/run.sh
+++ b/egs/chime3/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Kaldi ASR baseline for the 3rd CHiME Challenge
 #
diff --git a/egs/chime4/s5_1ch/local/chain/compare_wer.sh b/egs/chime4/s5_1ch/local/chain/compare_wer.sh
index edfefad547f..f59cac3bf5b 100755
--- a/egs/chime4/s5_1ch/local/chain/compare_wer.sh
+++ b/egs/chime4/s5_1ch/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh
index 3f8b7c60090..593ffe290ae 100755
--- a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This was modified from wsj/local/chain/tunning/run_tdnn_1e.sh to be
 # used in Chime4.
diff --git a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh
index 8b4e93cd05b..8a7cb1813a2 100755
--- a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # this is a TDNN+LSTM chain system.
diff --git a/egs/chime4/s5_1ch/local/chime4_calc_wers.sh b/egs/chime4/s5_1ch/local/chime4_calc_wers.sh
index a4c115c1093..a0eacac54bf 100755
--- a/egs/chime4/s5_1ch/local/chime4_calc_wers.sh
+++ b/egs/chime4/s5_1ch/local/chime4_calc_wers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #  Copyright  2015  Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
 #  Apache 2.0.
diff --git a/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh b/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh
index 84bb2cb8dbd..a78e7d4583b 100755
--- a/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh
+++ b/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #  Copyright  2015  Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
 #  Apache 2.0.
diff --git a/egs/chime4/s5_1ch/local/chime4_calc_wers_smbr.sh b/egs/chime4/s5_1ch/local/chime4_calc_wers_smbr.sh
index b316208b168..a56dc66dfd1 100755
--- a/egs/chime4/s5_1ch/local/chime4_calc_wers_smbr.sh
+++ b/egs/chime4/s5_1ch/local/chime4_calc_wers_smbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #  Copyright  2015  Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
 #  Apache 2.0.
diff --git a/egs/chime4/s5_1ch/local/chime4_train_lms.sh b/egs/chime4/s5_1ch/local/chime4_train_lms.sh
index 06dd716e789..f3002741bde 100755
--- a/egs/chime4/s5_1ch/local/chime4_train_lms.sh
+++ b/egs/chime4/s5_1ch/local/chime4_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Modified from the script for CHiME3 baseline
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori)
diff --git a/egs/chime4/s5_1ch/local/chime4_train_rnnlms.sh b/egs/chime4/s5_1ch/local/chime4_train_rnnlms.sh
index 8324c8e06b1..fa539584067 100755
--- a/egs/chime4/s5_1ch/local/chime4_train_rnnlms.sh
+++ b/egs/chime4/s5_1ch/local/chime4_train_rnnlms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori)
 
diff --git a/egs/chime4/s5_1ch/local/clean_chime4_format_data.sh b/egs/chime4/s5_1ch/local/clean_chime4_format_data.sh
index 23dc8a70d9e..42f3ee70209 100755
--- a/egs/chime4/s5_1ch/local/clean_chime4_format_data.sh
+++ b/egs/chime4/s5_1ch/local/clean_chime4_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 #           2015  Guoguo Chen
diff --git a/egs/chime4/s5_1ch/local/clean_wsj0_data_prep.sh b/egs/chime4/s5_1ch/local/clean_wsj0_data_prep.sh
index 8c6989bc0b2..9f831546e5c 100755
--- a/egs/chime4/s5_1ch/local/clean_wsj0_data_prep.sh
+++ b/egs/chime4/s5_1ch/local/clean_wsj0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/local/compute_pesq.sh b/egs/chime4/s5_1ch/local/compute_pesq.sh
index 1d290a4893f..7121a02dca8 100755
--- a/egs/chime4/s5_1ch/local/compute_pesq.sh
+++ b/egs/chime4/s5_1ch/local/compute_pesq.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/chime4/s5_1ch/local/compute_stoi_estoi_sdr.sh b/egs/chime4/s5_1ch/local/compute_stoi_estoi_sdr.sh
index b7627560b67..bb2accc3a81 100755
--- a/egs/chime4/s5_1ch/local/compute_stoi_estoi_sdr.sh
+++ b/egs/chime4/s5_1ch/local/compute_stoi_estoi_sdr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/chime4/s5_1ch/local/download_se_eval_tool.sh b/egs/chime4/s5_1ch/local/download_se_eval_tool.sh
index ddd86a03d8a..a926ba6fda9 100755
--- a/egs/chime4/s5_1ch/local/download_se_eval_tool.sh
+++ b/egs/chime4/s5_1ch/local/download_se_eval_tool.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/chime4/s5_1ch/local/nnet3/compare_wer.sh b/egs/chime4/s5_1ch/local/nnet3/compare_wer.sh
index 7a2fbd8a123..605c870a264 100755
--- a/egs/chime4/s5_1ch/local/nnet3/compare_wer.sh
+++ b/egs/chime4/s5_1ch/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/nnet3/compare_wer.sh exp/nnet3/tdnn_{c,d}_sp
diff --git a/egs/chime4/s5_1ch/local/nnet3/run_ivector_common.sh b/egs/chime4/s5_1ch/local/nnet3/run_ivector_common.sh
index 1009958dc0f..e3584f5f06e 100755
--- a/egs/chime4/s5_1ch/local/nnet3/run_ivector_common.sh
+++ b/egs/chime4/s5_1ch/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/chime4/s5_1ch/local/real_enhan_chime4_data_prep.sh b/egs/chime4/s5_1ch/local/real_enhan_chime4_data_prep.sh
index 7d4f9c892a8..d3892ac2197 100755
--- a/egs/chime4/s5_1ch/local/real_enhan_chime4_data_prep.sh
+++ b/egs/chime4/s5_1ch/local/real_enhan_chime4_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/local/real_noisy_chime4_data_prep.sh b/egs/chime4/s5_1ch/local/real_noisy_chime4_data_prep.sh
index 0173b022176..f319d30c314 100755
--- a/egs/chime4/s5_1ch/local/real_noisy_chime4_data_prep.sh
+++ b/egs/chime4/s5_1ch/local/real_noisy_chime4_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/local/rnnlm/run_lstm_back.sh b/egs/chime4/s5_1ch/local/rnnlm/run_lstm_back.sh
index 76e2b563e6b..be46d1934d4 100755
--- a/egs/chime4/s5_1ch/local/rnnlm/run_lstm_back.sh
+++ b/egs/chime4/s5_1ch/local/rnnlm/run_lstm_back.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
diff --git a/egs/chime4/s5_1ch/local/rnnlm/tuning/run_lstm_1a.sh b/egs/chime4/s5_1ch/local/rnnlm/tuning/run_lstm_1a.sh
index 8825364e6fa..dfec3e4915e 100755
--- a/egs/chime4/s5_1ch/local/rnnlm/tuning/run_lstm_1a.sh
+++ b/egs/chime4/s5_1ch/local/rnnlm/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
@@ -66,6 +66,7 @@ if [ $stage -le 0 ]; then
   mkdir -p $text_dir
   cp $srcdir/train.rnn $text_dir/chime4.txt.tmp
   sed -e "s/<RNN_UNK>/<UNK>/g" $text_dir/chime4.txt.tmp > $text_dir/chime4.txt
+  rm $text_dir/chime4.txt.tmp
   cp $srcdir/valid.rnn $text_dir/dev.txt
 fi
 
diff --git a/egs/chime4/s5_1ch/local/run_beamform_2ch_track.sh b/egs/chime4/s5_1ch/local/run_beamform_2ch_track.sh
index c9ce5a72040..6282b1c8dd5 100755
--- a/egs/chime4/s5_1ch/local/run_beamform_2ch_track.sh
+++ b/egs/chime4/s5_1ch/local/run_beamform_2ch_track.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
 
diff --git a/egs/chime4/s5_1ch/local/run_beamform_6ch_track.sh b/egs/chime4/s5_1ch/local/run_beamform_6ch_track.sh
index 6bdbc61adc2..2145cb3480b 100755
--- a/egs/chime4/s5_1ch/local/run_beamform_6ch_track.sh
+++ b/egs/chime4/s5_1ch/local/run_beamform_6ch_track.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
 
diff --git a/egs/chime4/s5_1ch/local/run_blstm_gev.sh b/egs/chime4/s5_1ch/local/run_blstm_gev.sh
index 2ee92b70fbd..d581a9d1e09 100755
--- a/egs/chime4/s5_1ch/local/run_blstm_gev.sh
+++ b/egs/chime4/s5_1ch/local/run_blstm_gev.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/chime4/s5_1ch/local/run_gmm.sh b/egs/chime4/s5_1ch/local/run_gmm.sh
index 5178433dfc2..d148761ce40 100755
--- a/egs/chime4/s5_1ch/local/run_gmm.sh
+++ b/egs/chime4/s5_1ch/local/run_gmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 University of Sheffield (Jon Barker, Ricard Marxer)
 #                Inria (Emmanuel Vincent)
diff --git a/egs/chime4/s5_1ch/local/run_init.sh b/egs/chime4/s5_1ch/local/run_init.sh
index f8c4782cc48..36ae519dfe4 100755
--- a/egs/chime4/s5_1ch/local/run_init.sh
+++ b/egs/chime4/s5_1ch/local/run_init.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 University of Sheffield (Jon Barker, Ricard Marxer)
 #                Inria (Emmanuel Vincent)
diff --git a/egs/chime4/s5_1ch/local/run_lmrescore.sh b/egs/chime4/s5_1ch/local/run_lmrescore.sh
index 58a19c6da25..0c5bef0b757 100755
--- a/egs/chime4/s5_1ch/local/run_lmrescore.sh
+++ b/egs/chime4/s5_1ch/local/run_lmrescore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
 #                Inria (Emmanuel Vincent)
diff --git a/egs/chime4/s5_1ch/local/run_lmrescore_tdnn.sh b/egs/chime4/s5_1ch/local/run_lmrescore_tdnn.sh
index 58af793615e..5d2555fb0dd 100755
--- a/egs/chime4/s5_1ch/local/run_lmrescore_tdnn.sh
+++ b/egs/chime4/s5_1ch/local/run_lmrescore_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
 #                Inria (Emmanuel Vincent)
diff --git a/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh b/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh
index 0bea4dd7102..1b7c28654d5 100755
--- a/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh
+++ b/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
 #                Inria (Emmanuel Vincent)
diff --git a/egs/chime4/s5_1ch/local/run_nn-gev.sh b/egs/chime4/s5_1ch/local/run_nn-gev.sh
index a17dd3d3f15..2f9222fefc2 100755
--- a/egs/chime4/s5_1ch/local/run_nn-gev.sh
+++ b/egs/chime4/s5_1ch/local/run_nn-gev.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/chime4/s5_1ch/local/score.sh b/egs/chime4/s5_1ch/local/score.sh
index 93d8a11613c..6e2af231b07 100755
--- a/egs/chime4/s5_1ch/local/score.sh
+++ b/egs/chime4/s5_1ch/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/chime4/s5_1ch/local/simu_enhan_chime4_data_prep.sh b/egs/chime4/s5_1ch/local/simu_enhan_chime4_data_prep.sh
index d6419fa90b9..10873130853 100755
--- a/egs/chime4/s5_1ch/local/simu_enhan_chime4_data_prep.sh
+++ b/egs/chime4/s5_1ch/local/simu_enhan_chime4_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/local/simu_noisy_chime4_data_prep.sh b/egs/chime4/s5_1ch/local/simu_noisy_chime4_data_prep.sh
index 124cde82b8a..0fed4cde63b 100755
--- a/egs/chime4/s5_1ch/local/simu_noisy_chime4_data_prep.sh
+++ b/egs/chime4/s5_1ch/local/simu_noisy_chime4_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/local/write_se_results.sh b/egs/chime4/s5_1ch/local/write_se_results.sh
index 7ada63f8ccc..8a844467d5a 100755
--- a/egs/chime4/s5_1ch/local/write_se_results.sh
+++ b/egs/chime4/s5_1ch/local/write_se_results.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/chime4/s5_1ch/local/wsj_prepare_dict.sh b/egs/chime4/s5_1ch/local/wsj_prepare_dict.sh
index 6ddebd60293..7c5b4f98506 100755
--- a/egs/chime4/s5_1ch/local/wsj_prepare_dict.sh
+++ b/egs/chime4/s5_1ch/local/wsj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation  
 #           2012-2014 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/run.sh b/egs/chime4/s5_1ch/run.sh
index 5b980dec827..4265ed92a9e 100755
--- a/egs/chime4/s5_1ch/run.sh
+++ b/egs/chime4/s5_1ch/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Kaldi ASR baseline for the CHiME-4 Challenge (1ch track: single channel track)
 #
diff --git a/egs/chime4/s5_2ch/run.sh b/egs/chime4/s5_2ch/run.sh
index 7ae5048c6fa..702a5b2fc58 100755
--- a/egs/chime4/s5_2ch/run.sh
+++ b/egs/chime4/s5_2ch/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Kaldi ASR baseline for the CHiME-4 Challenge (2ch track: 2 channel track)
 #
diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
index f0f469e46c8..5e8df0a64ae 100755
--- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1b.sh
index 920f2543132..d0ee46e8288 100755
--- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This factorized TDNN (TDNN-F) script is ported from s5b recipe
 # It uses resnet-style skip connections.
diff --git a/egs/chime5/s5/local/nnet3/compare_wer.sh b/egs/chime5/s5/local/nnet3/compare_wer.sh
index 095e85cc338..4888de1f159 100755
--- a/egs/chime5/s5/local/nnet3/compare_wer.sh
+++ b/egs/chime5/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/chime5/s5/local/nnet3/run_ivector_common.sh b/egs/chime5/s5/local/nnet3/run_ivector_common.sh
index 2b672063be7..2da57372a45 100755
--- a/egs/chime5/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/chime5/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/chime5/s5/local/prepare_data.sh b/egs/chime5/s5/local/prepare_data.sh
index 98087322c38..ac07d02270f 100755
--- a/egs/chime5/s5/local/prepare_data.sh
+++ b/egs/chime5/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
 # Apache 2.0
diff --git a/egs/chime5/s5/local/prepare_dict.sh b/egs/chime5/s5/local/prepare_dict.sh
index 09083d0e795..1ea75af8a11 100755
--- a/egs/chime5/s5/local/prepare_dict.sh
+++ b/egs/chime5/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2018, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/chime5/s5/local/run_beamformit.sh b/egs/chime5/s5/local/run_beamformit.sh
index aa3badd90d8..4ac45eb98db 100755
--- a/egs/chime5/s5/local/run_beamformit.sh
+++ b/egs/chime5/s5/local/run_beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
 
diff --git a/egs/chime5/s5/local/run_recog.sh b/egs/chime5/s5/local/run_recog.sh
index 5c74c9ff242..4e13ae8c0fb 100755
--- a/egs/chime5/s5/local/run_recog.sh
+++ b/egs/chime5/s5/local/run_recog.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the TED-LIUM and Switchboard recipe
 #
diff --git a/egs/chime5/s5/local/run_wpe.sh b/egs/chime5/s5/local/run_wpe.sh
index 8ecbbd6182a..0e6ba2676ba 100755
--- a/egs/chime5/s5/local/run_wpe.sh
+++ b/egs/chime5/s5/local/run_wpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/chime5/s5/local/score_for_submit.sh b/egs/chime5/s5/local/score_for_submit.sh
index 23121d68b93..c08fc022840 100755
--- a/egs/chime5/s5/local/score_for_submit.sh
+++ b/egs/chime5/s5/local/score_for_submit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
 # Apache 2.0
 #
diff --git a/egs/chime5/s5/local/train_lms_srilm.sh b/egs/chime5/s5/local/train_lms_srilm.sh
index 5a1d56d24b3..3b19e58bb48 100755
--- a/egs/chime5/s5/local/train_lms_srilm.sh
+++ b/egs/chime5/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017  Johns Hopkins University (Author: Yenda Trmal, Shinji Watanabe)
 # Apache 2.0
 
diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh
index 024c0190b3e..08779bd1aa1 100755
--- a/egs/chime5/s5/run.sh
+++ b/egs/chime5/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the TED-LIUM and Switchboard recipe
 #
diff --git a/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh b/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
index 95e9d934bd3..f81bbd59258 100755
--- a/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
+++ b/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
diff --git a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh
index daad37e2cd7..ca1de635168 100755
--- a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
diff --git a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh
index e033715d884..d9c4b20e513 100755
--- a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This factorized TDNN (TDNN-F) script is adapted from SWBD recipe 7q.
 # It uses resnet-style skip connections.
diff --git a/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
index e3d8e6ac4dc..2de79fdf593 100755
--- a/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
diff --git a/egs/chime5/s5b/local/copy_lat_dir_parallel.sh b/egs/chime5/s5b/local/copy_lat_dir_parallel.sh
index 82839604c9e..3e2c1b445b8 100755
--- a/egs/chime5/s5b/local/copy_lat_dir_parallel.sh
+++ b/egs/chime5/s5b/local/copy_lat_dir_parallel.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 cmd=queue.pl
 nj=40
diff --git a/egs/chime5/s5b/local/extract_vad_weights.sh b/egs/chime5/s5b/local/extract_vad_weights.sh
index 250b021bd8f..d5019f100b1 100755
--- a/egs/chime5/s5b/local/extract_vad_weights.sh
+++ b/egs/chime5/s5b/local/extract_vad_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti)
 #           2019 Vimal Manohar
diff --git a/egs/chime5/s5b/local/nnet3/compare_wer.sh b/egs/chime5/s5b/local/nnet3/compare_wer.sh
index fa627acd27b..6e4965dd819 100644
--- a/egs/chime5/s5b/local/nnet3/compare_wer.sh
+++ b/egs/chime5/s5b/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/chime5/s5b/local/nnet3/decode.sh b/egs/chime5/s5b/local/nnet3/decode.sh
index 8fa54e0d4a6..9c108430b94 100755
--- a/egs/chime5/s5b/local/nnet3/decode.sh
+++ b/egs/chime5/s5b/local/nnet3/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti)
 #           2019 Vimal Manohar 
diff --git a/egs/chime5/s5b/local/nnet3/run_ivector_common.sh b/egs/chime5/s5b/local/nnet3/run_ivector_common.sh
index 3910e1812a3..ef7ce7c3534 100755
--- a/egs/chime5/s5b/local/nnet3/run_ivector_common.sh
+++ b/egs/chime5/s5b/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/chime5/s5b/local/prepare_data.sh b/egs/chime5/s5b/local/prepare_data.sh
index 98087322c38..ac07d02270f 100755
--- a/egs/chime5/s5b/local/prepare_data.sh
+++ b/egs/chime5/s5b/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
 # Apache 2.0
diff --git a/egs/chime5/s5b/local/prepare_dict.sh b/egs/chime5/s5b/local/prepare_dict.sh
index 09083d0e795..1ea75af8a11 100755
--- a/egs/chime5/s5b/local/prepare_dict.sh
+++ b/egs/chime5/s5b/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2018, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/chime5/s5b/local/reverberate_lat_dir.sh b/egs/chime5/s5b/local/reverberate_lat_dir.sh
index f601a37c0e1..4a56d910489 100755
--- a/egs/chime5/s5b/local/reverberate_lat_dir.sh
+++ b/egs/chime5/s5b/local/reverberate_lat_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018  Vimal Manohar
 # Apache 2.0
diff --git a/egs/chime5/s5b/local/run_beamformit.sh b/egs/chime5/s5b/local/run_beamformit.sh
index aa3badd90d8..4ac45eb98db 100755
--- a/egs/chime5/s5b/local/run_beamformit.sh
+++ b/egs/chime5/s5b/local/run_beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
 
diff --git a/egs/chime5/s5b/local/run_recog.sh b/egs/chime5/s5b/local/run_recog.sh
index 989a5f95d01..4da9b1bf2fb 100755
--- a/egs/chime5/s5b/local/run_recog.sh
+++ b/egs/chime5/s5b/local/run_recog.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the TED-LIUM and Switchboard recipe
 #
diff --git a/egs/chime5/s5b/local/run_wpe.sh b/egs/chime5/s5b/local/run_wpe.sh
index ed512e69aae..4c6ff0c7e71 100755
--- a/egs/chime5/s5b/local/run_wpe.sh
+++ b/egs/chime5/s5b/local/run_wpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/chime5/s5b/local/score_for_submit.sh b/egs/chime5/s5b/local/score_for_submit.sh
index 23121d68b93..c08fc022840 100755
--- a/egs/chime5/s5b/local/score_for_submit.sh
+++ b/egs/chime5/s5b/local/score_for_submit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
 # Apache 2.0
 #
diff --git a/egs/chime5/s5b/local/train_lms_srilm.sh b/egs/chime5/s5b/local/train_lms_srilm.sh
index 5a1d56d24b3..3b19e58bb48 100755
--- a/egs/chime5/s5b/local/train_lms_srilm.sh
+++ b/egs/chime5/s5b/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017  Johns Hopkins University (Author: Yenda Trmal, Shinji Watanabe)
 # Apache 2.0
 
diff --git a/egs/chime5/s5b/run.sh b/egs/chime5/s5b/run.sh
index 37bc5c2c94e..0358fab5269 100755
--- a/egs/chime5/s5b/run.sh
+++ b/egs/chime5/s5b/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the TED-LIUM and Switchboard recipe
 #
diff --git a/egs/chime6/s5_track1/local/add_location_to_uttid.sh b/egs/chime6/s5_track1/local/add_location_to_uttid.sh
index 91bd0c0dd37..edb88c3f295 100755
--- a/egs/chime6/s5_track1/local/add_location_to_uttid.sh
+++ b/egs/chime6/s5_track1/local/add_location_to_uttid.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Author: Ashish Arora
 # Apache 2.0
 
diff --git a/egs/chime6/s5_track1/local/chain/compare_wer.sh b/egs/chime6/s5_track1/local/chain/compare_wer.sh
index cd6be14ed88..736a3177f17 100755
--- a/egs/chime6/s5_track1/local/chain/compare_wer.sh
+++ b/egs/chime6/s5_track1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh
index daad37e2cd7..ca1de635168 100755
--- a/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
diff --git a/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh
index a9c797ffa33..031a3687262 100755
--- a/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This factorized TDNN (TDNN-F) script is adapted from SWBD recipe 7q.
 # It uses resnet-style skip connections.
diff --git a/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh b/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh
index 82839604c9e..3e2c1b445b8 100755
--- a/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh
+++ b/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 cmd=queue.pl
 nj=40
diff --git a/egs/chime6/s5_track1/local/decode.sh b/egs/chime6/s5_track1/local/decode.sh
index 7283a171000..cabf473535f 100755
--- a/egs/chime6/s5_track1/local/decode.sh
+++ b/egs/chime6/s5_track1/local/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the TED-LIUM and Switchboard recipe
 #
diff --git a/egs/chime6/s5_track1/local/extract_vad_weights.sh b/egs/chime6/s5_track1/local/extract_vad_weights.sh
index 250b021bd8f..d5019f100b1 100755
--- a/egs/chime6/s5_track1/local/extract_vad_weights.sh
+++ b/egs/chime6/s5_track1/local/extract_vad_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti)
 #           2019 Vimal Manohar
diff --git a/egs/chime6/s5_track1/local/generate_chime6_data.sh b/egs/chime6/s5_track1/local/generate_chime6_data.sh
index 93106cf605a..9ecdbe4208f 100755
--- a/egs/chime6/s5_track1/local/generate_chime6_data.sh
+++ b/egs/chime6/s5_track1/local/generate_chime6_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019, Johns Hopkins University (Author: Shinji Watanabe)
 # Apache 2.0
diff --git a/egs/chime6/s5_track1/local/install_pb_chime5.sh b/egs/chime6/s5_track1/local/install_pb_chime5.sh
index a151dc60f12..7863cbed437 100755
--- a/egs/chime6/s5_track1/local/install_pb_chime5.sh
+++ b/egs/chime6/s5_track1/local/install_pb_chime5.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Installs pb_chime5
 # miniconda should be installed in $HOME/miniconda3/ 
diff --git a/egs/chime6/s5_track1/local/nnet3/compare_wer.sh b/egs/chime6/s5_track1/local/nnet3/compare_wer.sh
index 095e85cc338..4888de1f159 100755
--- a/egs/chime6/s5_track1/local/nnet3/compare_wer.sh
+++ b/egs/chime6/s5_track1/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/chime6/s5_track1/local/nnet3/decode.sh b/egs/chime6/s5_track1/local/nnet3/decode.sh
index 8fa54e0d4a6..9c108430b94 100755
--- a/egs/chime6/s5_track1/local/nnet3/decode.sh
+++ b/egs/chime6/s5_track1/local/nnet3/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti)
 #           2019 Vimal Manohar 
diff --git a/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh b/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh
index cfa18cb7617..0afb983d2fc 100755
--- a/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh
+++ b/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/chime6/s5_track1/local/prepare_data.sh b/egs/chime6/s5_track1/local/prepare_data.sh
index 3d1ffe859a5..b7cc56c4be4 100755
--- a/egs/chime6/s5_track1/local/prepare_data.sh
+++ b/egs/chime6/s5_track1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
 # Apache 2.0
diff --git a/egs/chime6/s5_track1/local/prepare_dict.sh b/egs/chime6/s5_track1/local/prepare_dict.sh
index 09083d0e795..1ea75af8a11 100755
--- a/egs/chime6/s5_track1/local/prepare_dict.sh
+++ b/egs/chime6/s5_track1/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2018, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/chime6/s5_track1/local/reverberate_lat_dir.sh b/egs/chime6/s5_track1/local/reverberate_lat_dir.sh
index f601a37c0e1..4a56d910489 100755
--- a/egs/chime6/s5_track1/local/reverberate_lat_dir.sh
+++ b/egs/chime6/s5_track1/local/reverberate_lat_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018  Vimal Manohar
 # Apache 2.0
diff --git a/egs/chime6/s5_track1/local/run_beamformit.sh b/egs/chime6/s5_track1/local/run_beamformit.sh
index aa3badd90d8..4ac45eb98db 100755
--- a/egs/chime6/s5_track1/local/run_beamformit.sh
+++ b/egs/chime6/s5_track1/local/run_beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
 
diff --git a/egs/chime6/s5_track1/local/run_gss.sh b/egs/chime6/s5_track1/local/run_gss.sh
index fbdc4af25d1..a9c01b20564 100755
--- a/egs/chime6/s5_track1/local/run_gss.sh
+++ b/egs/chime6/s5_track1/local/run_gss.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
 
diff --git a/egs/chime6/s5_track1/local/run_wpe.sh b/egs/chime6/s5_track1/local/run_wpe.sh
index ed512e69aae..4c6ff0c7e71 100755
--- a/egs/chime6/s5_track1/local/run_wpe.sh
+++ b/egs/chime6/s5_track1/local/run_wpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/chime6/s5_track1/local/score_for_submit.sh b/egs/chime6/s5_track1/local/score_for_submit.sh
index ba7d6cde574..1d7564c6ee0 100755
--- a/egs/chime6/s5_track1/local/score_for_submit.sh
+++ b/egs/chime6/s5_track1/local/score_for_submit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
 # Copyright 2019       Johns Hopkins University (Author: Shinji Watanabe)
 # Apache 2.0
diff --git a/egs/chime6/s5_track1/local/train_lms_srilm.sh b/egs/chime6/s5_track1/local/train_lms_srilm.sh
index 5a1d56d24b3..3b19e58bb48 100755
--- a/egs/chime6/s5_track1/local/train_lms_srilm.sh
+++ b/egs/chime6/s5_track1/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017  Johns Hopkins University (Author: Yenda Trmal, Shinji Watanabe)
 # Apache 2.0
 
diff --git a/egs/chime6/s5_track1/run.sh b/egs/chime6/s5_track1/run.sh
index 0890a939faf..cbcb3cd2102 100755
--- a/egs/chime6/s5_track1/run.sh
+++ b/egs/chime6/s5_track1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the TED-LIUM and Switchboard recipe
 #
diff --git a/egs/chime6/s5_track2/RESULTS b/egs/chime6/s5_track2/RESULTS
index cf87e7cc109..131b43cecf8 100644
--- a/egs/chime6/s5_track2/RESULTS
+++ b/egs/chime6/s5_track2/RESULTS
@@ -1,18 +1,23 @@
 # Results for Chime-6 track 2 for dev and eval, using pretrained models
 # available at http://kaldi-asr.org/models/m12.
 
-# Speech Activity Detection (SAD)
-          Missed speech   False alarm   Total error
-Dev         4.3             2.1           6.4                                                
-Eval        5.6             5.9           11.5
+# These results are reported only for array U06, which is the default
+# array selection method in the baseline system.
 
-# The results for the remaining pipeline are only for array U06.
+# Speech Activity Detection (SAD)
+                  Missed speech   False alarm   Total error
+Dev (old RTTM)        2.5             0.8           3.3                                                
+Dev (new RTTM)        1.9             0.7           2.6                                                
+Eval (old RTTM)       4.1             1.8           5.9           
+Eval (new RTTM)       4.3             1.5           5.8           
 
 # Diarization
-        DER       JER
-Dev     57.15     83.96
-Eval    54.12     80.33
+                    DER      JER
+Dev (old RTTM)    61.56     69.75
+Dev (new RTTM)    63.42     70.83
+Eval (old RTTM)   61.96     71.40
+Eval (new RTTM)   68.20     72.54
 
 # ASR nnet3 tdnn+chain
-Dev:  U06 %WER 81.18 [ 58881 / 47798, 1638 ins, 30528 del, 15632 sub ]
-Eval: U06 %WER 85.39 [ 55132 / 47076, 1107 ins, 27768 del, 18201 sub ]
+Dev:  %WER 84.25 [ 49610 / 58881, 1937 ins, 34685 del, 12988 sub ]
+Eval: %WER 77.94 [ 42971 / 55132, 1086 ins, 30839 del, 11046 sub ]
diff --git a/egs/chime6/s5_track2/local/decode.sh b/egs/chime6/s5_track2/local/decode.sh
index 876cc0be126..8f094f5c4df 100755
--- a/egs/chime6/s5_track2/local/decode.sh
+++ b/egs/chime6/s5_track2/local/decode.sh
@@ -1,22 +1,29 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # This script decodes raw utterances through the entire pipeline:
 # Feature extraction -> SAD -> Diarization -> ASR
 #
 # Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe and Yenda Trmal)
-#            2019  Desh Raj, David Snyder, Ashish Arora
+#            2019  Desh Raj, David Snyder, Ashish Arora, Zhaoheng Ni
 # Apache 2.0
 
 # Begin configuration section.
 nj=8
-decode_nj=10
 stage=0
 sad_stage=0
+score_sad=true
 diarizer_stage=0
 decode_diarize_stage=0
 score_stage=0
+
 enhancement=beamformit
 
+# option to use the new RTTM reference for sad and diarization
+use_new_rttm_reference=false
+if $use_new_rttm_reference == "true"; then
+  git clone https://github.com/nateanl/chime6_rttm
+fi
+
 # chime5 main directory path
 # please change the path accordingly
 chime5_corpus=/export/corpora4/CHiME5
@@ -93,6 +100,7 @@ if [ $stage -le 1 ]; then
       "$PWD/${enhandir}/${dset}_${enhancement}_u0*" \
       ${json_dir}/${dset} data/${dset}_${enhancement}_dereverb
   done
+
 fi
 
 if [ $stage -le 2 ]; then
@@ -100,7 +108,7 @@ if [ $stage -le 2 ]; then
   # want to store MFCC features.
   mfccdir=mfcc
   for x in ${test_sets}; do
-    steps/make_mfcc.sh --nj $decode_nj --cmd "$train_cmd" \
+    steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" \
       --mfcc-config conf/mfcc_hires.conf \
       data/$x exp/make_mfcc/$x $mfccdir
   done
@@ -121,18 +129,44 @@ if [ $stage -le 3 ]; then
       exit 0
     fi
     # Perform segmentation
-    local/segmentation/detect_speech_activity.sh --nj $decode_nj --stage $sad_stage \
+    local/segmentation/detect_speech_activity.sh --nj $nj --stage $sad_stage \
       $test_set $sad_nnet_dir mfcc $sad_work_dir \
       data/${datadir} || exit 1
 
-    mv data/${datadir}_seg data/${datadir}_${nnet_type}_seg
-    mv data/${datadir}/{segments.bak,utt2spk.bak} data/${datadir}_${nnet_type}_seg
+    test_dir=data/${datadir}_${nnet_type}_seg
+    mv data/${datadir}_seg ${test_dir}/
+    cp data/${datadir}/{segments.bak,utt2spk.bak} ${test_dir}/
     # Generate RTTM file from segmentation performed by SAD. This can
     # be used to evaluate the performance of the SAD as an intermediate
     # step.
     steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
-      data/${datadir}_${nnet_type}_seg/utt2spk data/${datadir}_${nnet_type}_seg/segments \
-      data/${datadir}_${nnet_type}_seg/rttm
+      ${test_dir}/utt2spk ${test_dir}/segments ${test_dir}/rttm
+
+    if [ $score_sad == "true" ]; then
+      echo "Scoring $datadir.."
+      # We first generate the reference RTTM from the backed up utt2spk and segments
+      # files.
+      ref_rttm=${test_dir}/ref_rttm
+      steps/segmentation/convert_utt2spk_and_segments_to_rttm.py ${test_dir}/utt2spk.bak \
+        ${test_dir}/segments.bak ${test_dir}/ref_rttm
+
+      # To score, we select just U06 segments from the hypothesis RTTM.
+      hyp_rttm=${test_dir}/rttm.U06
+      grep 'U06' ${test_dir}/rttm > ${test_dir}/rttm.U06
+      echo "Array U06 selected for scoring.."
+      
+      if $use_new_rttm_reference == "true"; then
+        echo "Use the new RTTM reference."
+        mode="$(cut -d'_' -f1 <<<"$datadir")"
+        ref_rttm=./chime6_rttm/${mode}_rttm
+      fi
+
+      sed 's/_U0[1-6].ENH//g' $ref_rttm > $ref_rttm.scoring
+      sed 's/_U0[1-6].ENH//g' $hyp_rttm > $hyp_rttm.scoring
+      cat ./local/uem_file | grep 'U06' | sed 's/_U0[1-6]//g' > ./local/uem_file.tmp
+      md-eval.pl -1 -c 0.25 -u ./local/uem_file.tmp -r $ref_rttm.scoring -s $hyp_rttm.scoring |\
+        awk 'or(/MISSED SPEECH/,/FALARM SPEECH/)'
+    fi
   done
 fi
 
@@ -141,7 +175,14 @@ fi
 #######################################################################
 if [ $stage -le 4 ]; then
   for datadir in ${test_sets}; do
-    local/diarize.sh --nj 10 --cmd "$train_cmd" --stage $diarizer_stage \
+    if $use_new_rttm_reference == "true"; then
+      mode="$(cut -d'_' -f1 <<<"$datadir")"
+      ref_rttm=./chime6_rttm/${mode}_rttm
+    else
+      ref_rttm=data/${datadir}_${nnet_type}_seg/ref_rttm
+    fi
+    local/diarize.sh --nj $nj --cmd "$train_cmd" --stage $diarizer_stage \
+      --ref-rttm $ref_rttm \
       exp/xvector_nnet_1a \
       data/${datadir}_${nnet_type}_seg \
       exp/${datadir}_${nnet_type}_seg_diarization
@@ -156,7 +197,7 @@ if [ $stage -le 5 ]; then
     local/decode_diarized.sh --nj $nj --cmd "$decode_cmd" --stage $decode_diarize_stage \
       exp/${datadir}_${nnet_type}_seg_diarization data/$datadir data/lang \
       exp/chain_${train_set}_cleaned_rvb exp/nnet3_${train_set}_cleaned_rvb \
-      data/${datadir}_diarized
+      data/${datadir}_diarized || exit 1
   done
 fi
 
diff --git a/egs/chime6/s5_track2/local/decode_diarized.sh b/egs/chime6/s5_track2/local/decode_diarized.sh
index 2d0ad6a3b95..f687b313893 100755
--- a/egs/chime6/s5_track2/local/decode_diarized.sh
+++ b/egs/chime6/s5_track2/local/decode_diarized.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2019   Ashish Arora, Vimal Manohar
 # Apache 2.0.
 # This script takes an rttm file, and performs decoding on on a test directory.
@@ -38,6 +38,9 @@ if [ $stage -le 0 ]; then
   echo "$0 copying data files in output directory"
   cp $rttm_dir/rttm $rttm_dir/rttm_1
   sed -i 's/'.ENH'/''/g' $rttm_dir/rttm_1
+  # removing participant introduction from the hypothesis rttm
+  # UEM file contains the scoring durations for each recording
+  local/truncate_rttm.py $rttm_dir/rttm_1 local/uem_file $rttm_dir/rttm_introduction_removed
   mkdir -p ${out_dir}_hires
   cp ${data_in}/{wav.scp,utt2spk} ${out_dir}_hires
   utils/data/get_reco2dur.sh ${out_dir}_hires
@@ -45,8 +48,8 @@ fi
 
 if [ $stage -le 1 ]; then
   echo "$0 creating segments file from rttm and utt2spk, reco2file_and_channel "
-  local/convert_rttm_to_utt2spk_and_segments.py --append-reco-id-to-spkr=true $rttm_dir/rttm_1 \
-    <(awk '{print $2".ENH "$2" "$3}' $rttm_dir/rttm_1 |sort -u) \
+  local/convert_rttm_to_utt2spk_and_segments.py --append-reco-id-to-spkr=true $rttm_dir/rttm_introduction_removed \
+    <(awk '{print $2".ENH "$2" "$3}' $rttm_dir/rttm_introduction_removed |sort -u) \
     ${out_dir}_hires/utt2spk ${out_dir}_hires/segments
 
   utils/utt2spk_to_spk2utt.pl ${out_dir}_hires/utt2spk > ${out_dir}_hires/spk2utt
diff --git a/egs/chime6/s5_track2/local/diarize.sh b/egs/chime6/s5_track2/local/diarize.sh
index 561d5fe7755..d555e92c0e8 100755
--- a/egs/chime6/s5_track2/local/diarize.sh
+++ b/egs/chime6/s5_track2/local/diarize.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
-# Copyright   2019   David Snder
+# Copyright   2019   David Snyder
+#             2020   Desh Raj
+
 # Apache 2.0.
 #
 # This script takes an input directory that has a segments file (and
@@ -20,7 +22,7 @@ if [ $# != 3 ]; then
   echo "Options: "
   echo "  --nj <nj>                                        # number of parallel jobs."
   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
-  echo "  --ref-rttm <path to reference RTTM>              # if present, used to score output RTTM."
+  echo "  --ref_rttm ./local/dev_rttm                      # the location of the reference RTTM file"
   exit 1;
 fi
 
@@ -85,29 +87,33 @@ if [ $stage -le 4 ]; then
   echo "$0: wrote RTTM to output directory ${out_dir}"
 fi
 
+hyp_rttm=${out_dir}/rttm
+
 # For scoring the diarization system, we use the same tool that was
 # used in the DIHARD II challenge. This is available at:
 # https://github.com/nryant/dscore
+# Note that the scoring takes a single reference RTTM and a single
+# hypothesis RTTM.
 if [ $stage -le 5 ]; then
   # If a reference RTTM file is not provided, we create one using the backed up
   # segments and utt2spk files in the original data directory.
-  if [ -z $ref_rttm ]; then
-    ref_rttm=data/$name/rttm
-    echo "$0: preparing ref RTTM file from segments and utt2spk"
+  if [ -z "$ref_rttm" ]; then
     steps/segmentation/convert_utt2spk_and_segments_to_rttm.py data/$name/utt2spk.bak \
-      data/$name/segments.bak $ref_rttm
+      data/$name/segments.bak data/$name/rttm
+    ref_rttm=data/$name/rttm
   fi
-  grep 'U06' $ref_rttm > ${ref_rttm}.U06
-  ref_rttm_path=$(readlink -f ${ref_rttm}.U06)
-  out_rttm_path=$(readlink -f $out_dir/rttm)
+  echo "Diarization results for "${name}
   if ! [ -d dscore ]; then
     git clone https://github.com/nryant/dscore.git || exit 1;
     cd dscore
     python -m pip install --user -r requirements.txt
     cd ..
   fi
-  cd dscore
-  python score.py -r $ref_rttm_path -s $out_rttm_path
-  cd ..
+  sed 's/_U0[1-6]\.ENH//g' $ref_rttm > $ref_rttm.scoring
+  sed 's/_U0[1-6]\.ENH//g' $hyp_rttm > $hyp_rttm.scoring
+  ref_rttm_path=$(readlink -f ${ref_rttm}.scoring)
+  hyp_rttm_path=$(readlink -f ${hyp_rttm}.scoring)
+  cat ./local/uem_file | grep 'U06' | sed 's/_U0[1-6]//g' > ./local/uem_file.scoring
+  cd dscore && python score.py -u ../local/uem_file.scoring -r $ref_rttm_path \
+    -s $hyp_rttm_path && cd .. || exit 1;
 fi
-
diff --git a/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py b/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py
index 7b3e14aaa49..091cf7c05b1 100755
--- a/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py
+++ b/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py
@@ -39,11 +39,18 @@ def main():
         combined_hyp_file = args.output_dir_path + '/' + 'hyp' + '_' + sessionid_micid_speakerid + '_comb'
         combined_hyp_writer = open(combined_hyp_file, 'w')
         utterances = sessionid_micid_speakerid_dict[sessionid_micid_speakerid]
-        text = ''
+        # sorting utterances by start and end time
+        sessionid_micid_speakerid_utterances={}
         for line in utterances:
             parts = line.strip().split()
+            utt_parts = parts[0].strip().split('-')
+            time ='-'.join(utt_parts[2:])
+            sessionid_micid_speakerid_utterances[time] = line
+        text = ''
+        for time_key in sorted(sessionid_micid_speakerid_utterances):
+            parts = sessionid_micid_speakerid_utterances[time_key].strip().split()
             text = text + ' ' + ' '.join(parts[1:])
-            hyp_writer.write(line)
+            hyp_writer.write(sessionid_micid_speakerid_utterances[time_key])
         combined_utterance = 'utt' + " " + text
         combined_hyp_writer.write(combined_utterance)
         combined_hyp_writer.write('\n')
diff --git a/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py b/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py
index 6b00e29e6b1..a4394984876 100755
--- a/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py
+++ b/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py
@@ -55,14 +55,21 @@ def main():
             spkrid_mapping[sessionid_speakerid.split('_')[1]]) + '_comb'
         combined_ref_writer = open(combined_ref_file, 'w')
         utterances = sessionid_speakerid_dict[sessionid_speakerid]
-        text = ''
-        uttid_wc = 'utt'
+        sessionid_speakerid_utterances = {}
+        # sorting utterances by start and end time
         for line in utterances:
             parts = line.strip().split()
+            utt_parts = parts[0].strip().split('-')
+            time ='-'.join(utt_parts[1:])
+            sessionid_speakerid_utterances[time] = line
+        text = ''
+        uttid_wc = 'utt'
+        for time_key in sorted(sessionid_speakerid_utterances):
+            parts = sessionid_speakerid_utterances[time_key].strip().split()
             uttid_id = parts[0]
             utt_text = ' '.join(parts[1:])
             text = text + ' ' + ' '.join(parts[1:])
-            ref_writer.write(line)
+            ref_writer.write(sessionid_speakerid_utterances[time_key])
             length = str(len(utt_text.split()))
             uttid_id_len = uttid_id + ":" + length
             uttid_wc = uttid_wc + ' ' + uttid_id_len
diff --git a/egs/chime6/s5_track2/local/install_dscore.sh b/egs/chime6/s5_track2/local/install_dscore.sh
new file mode 100755
index 00000000000..314f86f938e
--- /dev/null
+++ b/egs/chime6/s5_track2/local/install_dscore.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+# Installs dscore
+git clone https://github.com/nryant/dscore.git
+pip3 install intervaltree --user
+pip3 install tabulate --user
+pip3 install munkres --user
+pip3 install pytest --user
diff --git a/egs/chime6/s5_track2/local/multispeaker_score.sh b/egs/chime6/s5_track2/local/multispeaker_score.sh
index 74e089c4052..c7075d6cf14 100755
--- a/egs/chime6/s5_track2/local/multispeaker_score.sh
+++ b/egs/chime6/s5_track2/local/multispeaker_score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2019   Ashish Arora, Yusuke Fujita
 # Apache 2.0.
 # This script takes a reference and hypothesis text file, and performs 
diff --git a/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh
index cb8fe2e6326..6b5ccd466c3 100755
--- a/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh
+++ b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Apache 2.0.
 
diff --git a/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh
index dcdbe1b1593..326b6dbb9fa 100755
--- a/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh
+++ b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Apache 2.0.
 
diff --git a/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh
index 94fc7e7682f..2189e406a7e 100755
--- a/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh
+++ b/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright      2018   David Snyder
 #                2018   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2018   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime6/s5_track2/local/prepare_data.sh b/egs/chime6/s5_track2/local/prepare_data.sh
index c6b8121dab0..8bd2530d6db 100755
--- a/egs/chime6/s5_track2/local/prepare_data.sh
+++ b/egs/chime6/s5_track2/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2017  Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
 # Apache 2.0
diff --git a/egs/chime6/s5_track2/local/print_dset_error.py b/egs/chime6/s5_track2/local/print_dset_error.py
index 1a7fd4ff365..8ffe930f4f6 100755
--- a/egs/chime6/s5_track2/local/print_dset_error.py
+++ b/egs/chime6/s5_track2/local/print_dset_error.py
@@ -30,6 +30,6 @@
 
 for arrayid in sorted(array_id_error_dict):
     wer = float(array_id_error_dict[arrayid][1])/float(array_id_error_dict[arrayid][0])*100
-    wer_detail = "%WER {0:5.2f} [ {1} / {2}, {3} ins, {4} del, {5} sub ]".format(wer, array_id_error_dict[arrayid][0], array_id_error_dict[arrayid][1], array_id_error_dict[arrayid][2], array_id_error_dict[arrayid][3], array_id_error_dict[arrayid][4])
+    wer_detail = "%WER {0:5.2f} [ {1} / {2}, {3} ins, {4} del, {5} sub ]".format(wer, array_id_error_dict[arrayid][1], array_id_error_dict[arrayid][0], array_id_error_dict[arrayid][2], array_id_error_dict[arrayid][3], array_id_error_dict[arrayid][4])
     output.write(arrayid + ' ' + wer_detail + '\n')
 
diff --git a/egs/chime6/s5_track2/local/score_for_submit.sh b/egs/chime6/s5_track2/local/score_for_submit.sh
index 29dfac529b4..71a3a4dd607 100755
--- a/egs/chime6/s5_track2/local/score_for_submit.sh
+++ b/egs/chime6/s5_track2/local/score_for_submit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Apache 2.0
 #
 # This script provides CHiME-6 challenge track 2 submission scores.
@@ -56,12 +56,19 @@ if [ $stage -le 2 ]; then
     | utils/best_wer.sh >& $dev_decodedir/scoring_kaldi_multispeaker/best_wer
 
   best_wer_file=$(awk '{print $NF}' $dev_decodedir/scoring_kaldi_multispeaker/best_wer)
+  best_array=$(echo $best_wer_file | awk -F: '{N=NF; print $N}')
   best_lmwt=$(echo $best_wer_file | awk -F/ '{N=NF-2; print $N}')
   best_wip=$(echo $best_wer_file | awk -F_ '{N=NF-3; print $N}' | awk -F/ '{N=NF-2; print $N}')
-fi
 
-echo "best LM weight: $best_lmwt"
-echo "best insertion penalty weight: $best_wip"
+  # printing and storing best lmwt, best_array and wip
+  echo "best array: $best_array"
+  echo "best LM weight: $best_lmwt"
+  echo "best insertion penalty weight: $best_wip"
+
+  echo $best_lmwt > $dev_decodedir/scoring_kaldi_multispeaker/lmwt
+  echo $best_wip >  $dev_decodedir/scoring_kaldi_multispeaker/wip
+  echo $best_array >  $dev_decodedir/scoring_kaldi_multispeaker/best_array
+fi
 
 if [ $stage -le 3 ]; then
   # obtaining per utterance stats for dev
@@ -80,11 +87,16 @@ if [ $stage -le 4 ]; then
 fi
 
 if [ $stage -le 5 ]; then
-  # storing best lmwt and wip and printing best wer for dev and eval
-  echo $best_lmwt > $dev_decodedir/scoring_kaldi_multispeaker/lmwt
-  echo $best_wip >  $dev_decodedir/scoring_kaldi_multispeaker/wip
+  # obtaining eval wer corresponding to best lmwt, best_array and wip of dev
+  best_array="$(cat $dev_decodedir/scoring_kaldi_multispeaker/best_array)"
+  best_lmwt="$(cat $dev_decodedir/scoring_kaldi_multispeaker/lmwt)"
+  best_wip="$(cat $dev_decodedir/scoring_kaldi_multispeaker/wip)"
+
+  grep WER $eval_decodedir/scoring_kaldi_multispeaker/penalty_$best_wip/$best_lmwt/per_speaker_wer/array_wer.txt /dev/null \
+    | grep $best_array | utils/best_wer.sh >& $eval_decodedir/scoring_kaldi_multispeaker/best_wer
 
-  echo "$(<$dev_decodedir/scoring_kaldi_multispeaker/penalty_$best_wip/$best_lmwt/per_speaker_wer/array_wer.txt)"
-  echo "$(<$eval_decodedir/scoring_kaldi_multispeaker/penalty_$best_wip/$best_lmwt/per_speaker_wer/array_wer.txt)"
+  # printing dev and eval wer
+  echo "Dev:  $(<$dev_decodedir/scoring_kaldi_multispeaker/best_wer)" | cut -d " " -f 1-15
+  echo "Eval: $(<$eval_decodedir/scoring_kaldi_multispeaker/best_wer)" | cut -d " " -f 1-14
 fi
 
diff --git a/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh b/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh
index 91d52b39269..c9719d472f3 100755
--- a/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh
+++ b/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016-17  Vimal Manohar
 #              2017  Nagendra Kumar Goel
diff --git a/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh b/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh
index 5701424869a..7ea39f45639 100755
--- a/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh
+++ b/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017   Nagendra Kumar Goel
 #           2018   Vimal Manohar
diff --git a/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh b/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh
index bb985462f49..83bcd587d88 100755
--- a/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh
+++ b/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017   Nagendra Kumar Goel
 #           2018   Vimal Manohar
diff --git a/egs/chime6/s5_track2/local/train_diarizer.sh b/egs/chime6/s5_track2/local/train_diarizer.sh
index 71918e7cabc..845ac7840d5 100755
--- a/egs/chime6/s5_track2/local/train_diarizer.sh
+++ b/egs/chime6/s5_track2/local/train_diarizer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright
 #        2019   David Snyder
 # Apache 2.0.
diff --git a/egs/chime6/s5_track2/local/train_sad.sh b/egs/chime6/s5_track2/local/train_sad.sh
index e12a0cad694..cbaf3dfc5de 100755
--- a/egs/chime6/s5_track2/local/train_sad.sh
+++ b/egs/chime6/s5_track2/local/train_sad.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2017  Nagendra Kumar Goel
 #            2017  Vimal Manohar
diff --git a/egs/chime6/s5_track2/local/truncate_rttm.py b/egs/chime6/s5_track2/local/truncate_rttm.py
new file mode 100755
index 00000000000..3de0c0a60d6
--- /dev/null
+++ b/egs/chime6/s5_track2/local/truncate_rttm.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+# Apache 2.0
+# This script truncates the rttm file
+# using UEM file and writes it to a new rttm file
+#
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+from scorelib.turn import trim_turns
+import scorelib.rttm as rttm_func
+from scorelib.uem import load_uem
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""This script truncates the rttm file
+                       using UEM file""")
+    parser.add_argument("rttm_file", type=str,
+                        help="""Input RTTM file.
+                            The format of the RTTM file is
+                            <type> <file-id> <channel-id> <begin-time> """
+                             """<end-time> <NA> <NA> <speaker> <conf>""")
+    parser.add_argument("uem_file", type=str,
+                        help="""Input UEM file.
+                            The format of the UEM file is
+                            <file-id> <channel-id> <begin-time> <end-time>""")
+    parser.add_argument("rttm_file_write", type=str,
+                        help="""output RTTM file.""")
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == '__main__':
+    args = get_args()
+    rttm_writer = open(args.rttm_file_write, 'w')
+    turns, speaker_ids, file_ids = rttm_func.load_rttm(args.rttm_file)
+    loaded_uem = load_uem(args.uem_file)
+    truncated_turns = trim_turns(turns, loaded_uem)
+    rttm_func.write_rttm(args.rttm_file_write,truncated_turns)
diff --git a/egs/chime6/s5_track2/local/uem_file b/egs/chime6/s5_track2/local/uem_file
new file mode 100644
index 00000000000..c1d4dbcd5d4
--- /dev/null
+++ b/egs/chime6/s5_track2/local/uem_file
@@ -0,0 +1,20 @@
+S01_U01 1 0 12000
+S02_U01 1 75 12000
+S09_U01 1 64 12000
+S21_U01 1 59 12000
+S01_U02 1 0 12000
+S02_U02 1 75 12000
+S09_U02 1 64 12000
+S21_U02 1 59 12000
+S01_U03 1 0 12000
+S02_U03 1 75 12000
+S09_U03 1 64 12000
+S21_U03 1 59 12000
+S01_U04 1 0 12000
+S02_U04 1 75 12000
+S09_U04 1 64 12000
+S21_U04 1 59 12000
+S01_U06 1 0 12000
+S02_U06 1 75 12000
+S09_U06 1 64 12000
+S21_U06 1 59 12000
diff --git a/egs/chime6/s5_track2/path.sh b/egs/chime6/s5_track2/path.sh
index c2526194bee..2f4e4e4fb21 100644
--- a/egs/chime6/s5_track2/path.sh
+++ b/egs/chime6/s5_track2/path.sh
@@ -1,6 +1,8 @@
 export KALDI_ROOT=`pwd`/../../..
 [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
 export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
+export PATH=$PWD/dscore:$PATH
+export PYTHONPATH="${PYTHONPATH}:$PWD/dscore"
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
diff --git a/egs/chime6/s5_track2/run.sh b/egs/chime6/s5_track2/run.sh
index 1350b8e14d5..d5548518287 100755
--- a/egs/chime6/s5_track2/run.sh
+++ b/egs/chime6/s5_track2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Chime-6 Track 2 baseline. Based mostly on the Chime-5 recipe, with the exception
 # that we are required to perform speech activity detection and speaker
@@ -16,7 +16,7 @@ stage=0
 nnet_stage=-10
 sad_stage=0
 diarizer_stage=0
-decode_stage=1
+decode_stage=0
 enhancement=beamformit # for a new enhancement method,
                        # change this variable and decode stage
 decode_only=false
@@ -111,8 +111,12 @@ if [ $stage -le 4 ]; then
   utils/copy_data_dir.sh data/train_worn data/train_worn_org # back up
   grep -v -e "^P11_S03" -e "^P52_S19" -e "^P53_S24" -e "^P54_S24" data/train_worn_org/text > data/train_worn/text
   utils/fix_data_dir.sh data/train_worn
-fi
 
+  # Remove S12_U05 from training data since it has known issues
+  utils/copy_data_dir.sh data/train_u05 data/train_u05_org # back up
+  grep -v -e "^S12_U05" data/train_u05_org/text > data/train_u05/text
+  utils/fix_data_dir.sh data/train_u05
+fi
 
 #########################################################################################
 # In stages 5 and 6, we augment and fix train data for our training purpose. point source
diff --git a/egs/cifar/v1/image/copy_data_dir.sh b/egs/cifar/v1/image/copy_data_dir.sh
index c923f5cc07a..dd9fdbc086a 100755
--- a/egs/cifar/v1/image/copy_data_dir.sh
+++ b/egs/cifar/v1/image/copy_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/cifar/v1/image/fix_data_dir.sh b/egs/cifar/v1/image/fix_data_dir.sh
index b85623b6e85..20f3de5dec6 100755
--- a/egs/cifar/v1/image/fix_data_dir.sh
+++ b/egs/cifar/v1/image/fix_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script makes sure that only the segments present in
 # all of "feats.scp", "images.scp" [if present], segments [if present]
diff --git a/egs/cifar/v1/image/validate_data_dir.sh b/egs/cifar/v1/image/validate_data_dir.sh
index e4db9c2c92c..bf56c17632a 100755
--- a/egs/cifar/v1/image/validate_data_dir.sh
+++ b/egs/cifar/v1/image/validate_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 no_feats=false
diff --git a/egs/cifar/v1/local/nnet3/compare.sh b/egs/cifar/v1/local/nnet3/compare.sh
index c5208c38ac0..8524efe9f60 100755
--- a/egs/cifar/v1/local/nnet3/compare.sh
+++ b/egs/cifar/v1/local/nnet3/compare.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing trained models between systems.
 # e.g. local/nnet3/compare.sh exp/resnet1{b,c}_cifar10
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1a.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1a.sh
index 3854bf24d82..94565e6588a 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1a.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # note: the final 'valid accuracy' (0.69) is actually the test accuracy.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1b.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1b.sh
index 907682454b9..6929eff139e 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1b.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1b is like 1a but a smaller model.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1c.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1c.sh
index 6cb94df49a0..c617145104e 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1c.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1c uses dropout with fewer but larger layers
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1d.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1d.sh
index 6baad31fcbb..864f2cda711 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1d.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1d is as 1c but adding batch-norm to all convolutional layers.
 # batch-norm helps (0.78 -> 0.8).
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1e.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1e.sh
index a4dbc949d56..4c30016f656 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1e.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1e is as 1d but making the time subsampling symmetric with the
 #   height subsampling (unfortunately this symmetry is not very visible
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1a.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1a.sh
index 1e3a6e10760..1f77a874ee7 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1a.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # aug_1a is as 1a but with data augmentation
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1b.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1b.sh
index 8e5f83ea2d5..2ae51a6320a 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1b.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_cnn_aug_1b is the same as run_cnn_1e but with data augmentation.
 
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1c.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1c.sh
index 184ea0fa306..28e3006bded 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1c.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # aug_1c is the same as aug_1b but with many more epochs and smaller
 # final learning rate
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1d.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1d.sh
index 1eb448149ba..5c7a89113e6 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1d.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1d is as 1c but setting num-minibatches-history=40.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1a.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1a.sh
index 8f41bb96c07..f5fc96a10f4 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1a.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_resnet_1a.sh is a quite well-performing resnet.
 #  It includes a form of shrinkage that approximates l2 regularization.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1b.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1b.sh
index f8f3b563e6c..3d4d4bfc3e3 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1b.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1b is as 1a but using more epochs: 100 instead of 60.
 # This helps a bit.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1c.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1c.sh
index 0708b3d6eaa..34c487e4d00 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1c.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1c is as 1b but setting num-minibatches-history=40.0 in the configs,
 # so the Fisher matrix estimates change less fast.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1d.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1d.sh
index 2d1ba279284..635dba70800 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1d.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1d is as 1c but adding rotation in image augmentation.
 
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1e.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1e.sh
index 0b6bd5ce2a9..35615985621 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1e.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1e is as 1d but with more filters and epochs.
 
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1f.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1f.sh
index 3bb3316441a..5f9fe9d692a 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1f.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1f is as 1e but with l2-regularize instead of proportional shrink
 
diff --git a/egs/cifar/v1/local/prepare_data.sh b/egs/cifar/v1/local/prepare_data.sh
index f73cbe41e3c..6eb44668217 100755
--- a/egs/cifar/v1/local/prepare_data.sh
+++ b/egs/cifar/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Johns Hopkins University (author: Hossein Hadian)
 # Apache 2.0
diff --git a/egs/cifar/v1/run.sh b/egs/cifar/v1/run.sh
index 084a8a53041..a180920ac4d 100755
--- a/egs/cifar/v1/run.sh
+++ b/egs/cifar/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 stage=0
 
diff --git a/egs/cmu_cslu_kids/README b/egs/cmu_cslu_kids/README
new file mode 100644
index 00000000000..0b8512e2487
--- /dev/null
+++ b/egs/cmu_cslu_kids/README
@@ -0,0 +1,21 @@
+This is an ASR recipe for children speech using cmu_kids and cslu_kids.
+Both of the corpora can be found on LDC:
+    - cmu_kids : https://catalog.ldc.upenn.edu/LDC97S63
+    - cslu_kids: https://catalog.ldc.upenn.edu/LDC2007S18
+
+To run this recipe, you'll need a copy of both corpora:
+    ./run.sh --cmu_kids <path_to_cmu_corpus> --cslu_kids <path_to_cslu_corpus>
+
+By default, this recipe will download an LM pretrained on LibriSpeech from 
+lm_url=www.openslr.org/resources/11. If you already have a copy of this LM 
+and do not wish to redownload, you can specify the LM path using the --lm_src option:
+    ./run.sh --cmu_kids <path_to_cmu_corpus> --cslu_kids <path_to_cslu_corpus>\
+        --lm_src <path_to_librispeech_lm>
+
+This recipe will also download and clean CMU_Dict by default. If you have a clean copy 
+already, or wish to use your own dictionary, simply copy your version of the dict to 
+        data/local/dict
+
+To run extra features for triphone models or VLTN, set the following options true:
+    ./run.sh --cmu_kids <path_to_cmu_corpus> --cslu_kids <path_to_cslu_corpus>\
+        --vtln true --extra_features true
diff --git a/egs/cmu_cslu_kids/s5/cmd.sh b/egs/cmu_cslu_kids/s5/cmd.sh
new file mode 100644
index 00000000000..179307556d5
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/cmd.sh
@@ -0,0 +1,23 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd=queue.pl
+export decode_cmd="queue.pl --mem 2G"
+# the use of cuda_cmd is deprecated, used only in 'nnet1',
+export cuda_cmd="queue.pl --gpu 1"
+
+if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then
+  queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+  export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+  export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+  export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
diff --git a/egs/cmu_cslu_kids/s5/conf/decode.config b/egs/cmu_cslu_kids/s5/conf/decode.config
new file mode 100644
index 00000000000..10b0eee900b
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/decode.config
@@ -0,0 +1,4 @@
+# Use wider-than-normal decoding beams for RM.
+first_beam=16.0
+beam=20.0
+lattice_beam=10.0
diff --git a/egs/cmu_cslu_kids/s5/conf/decode_dnn.config b/egs/cmu_cslu_kids/s5/conf/decode_dnn.config
new file mode 100644
index 00000000000..e7cfca74763
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/decode_dnn.config
@@ -0,0 +1,8 @@
+# In RM, the optimal decode LMWT is in range 2..5, which is different from usual 10..15
+# (it is caused by using simple rule-based LM, instead of n-gram LM),
+scoring_opts="--min-lmwt 2 --max-lmwt 10"
+# Still, it is better to use --acwt 0.1, both for decoding and sMBR,
+acwt=0.1
+# For this small task we can afford to have large beams,
+beam=30.0 # beam for decoding.  Was 13.0 in the scripts.
+lattice_beam=18.0 # this has most effect on size of the lattices.
diff --git a/egs/cmu_cslu_kids/s5/conf/mfcc.conf b/egs/cmu_cslu_kids/s5/conf/mfcc.conf
new file mode 100644
index 00000000000..6bbcb763153
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/mfcc.conf
@@ -0,0 +1,2 @@
+--use-energy=false   # only non-default option.
+--allow_downsample=true
diff --git a/egs/cmu_cslu_kids/s5/conf/mfcc_hires.conf b/egs/cmu_cslu_kids/s5/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..40f95e97010
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/mfcc_hires.conf
@@ -0,0 +1,11 @@
+# config for high-resolution MFCC features, intended for neural network training
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why 
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=20     # low cutoff frequency for mel bins... this is high-bandwidth data, so
+                  # there might be some information at the low end.
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
+--allow-downsample=true
diff --git a/egs/cmu_cslu_kids/s5/conf/online_cmvn.conf b/egs/cmu_cslu_kids/s5/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/cmu_cslu_kids/s5/conf/plp.conf b/egs/cmu_cslu_kids/s5/conf/plp.conf
new file mode 100644
index 00000000000..e7e8a9e14af
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/plp.conf
@@ -0,0 +1,2 @@
+# No non-default options for now.
+--allow_downsample=true 
diff --git a/egs/cmu_cslu_kids/s5/local/chain/compare_wer.sh b/egs/cmu_cslu_kids/s5/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..411d2691bb9
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/chain/compare_wer.sh
@@ -0,0 +1,137 @@
+#!/usr/bin/env bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+  include_looped=true
+  shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+  include_online=true
+  shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System               "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+strings=(
+  "#WER dev_clean_2 (tgsmall) "
+  "#WER dev_clean_2 (tglarge) ")
+
+for n in 0 1; do
+   echo -n "${strings[$n]}"
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+    decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2)
+
+     wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+     printf "% 10s" $wer
+   done
+   echo
+   if $include_looped; then
+     echo -n "#             [looped:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+   if $include_online; then
+     echo -n "#             [online:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+done
+
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+
+echo -n "# Final train prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Num-params              "
+for x in $*; do
+  printf "% 10s" $(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
+done
+echo
diff --git a/egs/cmu_cslu_kids/s5/local/chain/run_tdnnf.sh b/egs/cmu_cslu_kids/s5/local/chain/run_tdnnf.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/chain/run_tdnnf.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/cmu_cslu_kids/s5/local/chain/tdnnf_decode.sh b/egs/cmu_cslu_kids/s5/local/chain/tdnnf_decode.sh
new file mode 100755
index 00000000000..8d124193584
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/chain/tdnnf_decode.sh
@@ -0,0 +1,82 @@
+#! /bin/bash
+
+# Copyright Johns Hopkins University
+#   2019 Fei Wu
+
+# Decode on new data set using trained model. 
+# The data directory should be prepared in kaldi style.
+# Usage:
+#     ./local/chain/tdnnF_decode.sh --data_src <prepared_data_dir> 
+
+set -euo pipefail
+echo "$0 $@"
+
+stage=0
+decode_nj=10
+data_src=
+affix=
+tree_affix=
+nnet3_affix=
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+data_name=$(basename $data_src)
+data_hires="data/${data_name}_hires"
+ivect_dir=exp/nnet3${nnet3_affix}/ivector_$data_name
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+dir=exp/chain${nnet3_affix}/tdnn${affix}_sp
+
+mfcc=mfcc_hires_$data_name
+chunk_width=140,100,160
+reporting_email=
+
+if [ $stage -le 0 ]; then 
+    rm -rf $data_hires
+    cp -r $data_src $data_hires
+fi
+# High resolution mfcc
+if [ $stage -le 1 ]; then 
+    steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
+        --cmd "$train_cmd" $data_hires \
+        exp/$data_name/make_feat_hires  $mfcc|| exit 1;
+    steps/compute_cmvn_stats.sh $data_hires || exit 1;
+    utils/fix_data_dir.sh $data_hires || exit 1;
+fi
+
+# Extract i-vector
+if [ $stage -le 2 ]; then 
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \
+        $data_hires exp/nnet3"$affix"/extractor $ivect_dir
+fi
+
+if [ $stage -le 3 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+
+    (
+      nspk=$(wc -l <$data_hires/spk2utt)
+      steps/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nspk --cmd "$decode_cmd"  --num-threads 4 \
+          --online-ivector-dir $ivect_dir \
+          $tree_dir/graph_tgsmall $data_hires ${dir}/decode_tgsmall_$data_name || exit 1
+      
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,tglarge} \
+       $data_hires ${dir}/decode_{tgsmall,tglarge}_$data_name || exit 1
+    ) || touch $dir/.error &
+
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
diff --git a/egs/cmu_cslu_kids/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/cmu_cslu_kids/s5/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..ca08fd4ada8
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,279 @@
+#!/usr/bin/env bash
+
+# Copyright 2017-2018  Johns Hopkins University (author: Daniel Povey)
+#           2017-2018  Yiming Wang
+#           2019       Fei Wu
+
+# Based on material recipe for low-resource languages
+# Factored TDNN with skip connectiong and splicing (two bottle neck layers)
+
+#   WER results on dev
+#   Model       LM          Corpus      WER(%)
+#   tdnn_1a     tg_large    Combined    11.72
+#   tdnn_1a     tg_small    Combined    13.61
+#   tdnn_1a     tg_large    CMU_Kids    17.26
+#   tdnn_1a     tg_small    CMU_Kids    26.43
+#   tdnn_1a     tg_large    CSLU_Kids   10.80
+#   tdnn_1a     tg_small    CSLU_Kids   12.50
+
+# steps/info/chain_dir_info.pl exp/chain/tdnn1a_sp
+# exp/chain/tdnn1a_sp/: num-iters=342 nj=2..5 num-params=17.9M dim=40+100->3192 combine=-0.042->-0.041 (over 8) xent:train/valid[227,341,final]=(-0.451,-0.363,-0.346/-0.524,-0.466,-0.434) logprob:train/valid[227,341,final]=(-0.047,-0.043,-0.042/-0.058,-0.056,-0.054) 
+
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=10
+train_set=train
+test_sets="test"
+gmm=tri3       
+nnet3_affix=
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1a  
+tree_affix=
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# training chunk-options
+chunk_width=140,100,160
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+common_egs_dir=
+xent_regularize=0.1
+
+# training options
+srand=0
+remove_egs=true
+reporting_email=
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 7" if you have already
+# run those things.
+local/nnet3/run_ivector_common.sh\
+    --stage $stage \
+    --train-set $train_set \
+    --test-sets $test_sets \
+    --gmm $gmm \
+    --nnet3_affix "$nnet3_affix" || exit 1;
+
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang_chain
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn${affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz; do
+    [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 7 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 8 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+
+if [ $stage -le 10 ]; then
+    # Build a tree using our new topology.  We know we have alignments for the
+    # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+    # those.  The num-leaves is always somewhat less than the num-leaves from
+    # the GMM baseline.
+     if [ -f $tree_dir/final.mdl ]; then
+       echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+       exit 1;
+    fi
+    steps/nnet3/chain/build_tree.sh \
+      --frame-subsampling-factor 3 \
+      --context-opts "--context-width=2 --central-position=1" \
+      --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+      $lang $ali_dir $tree_dir
+fi
+
+if [ $stage -le 11 ]; then
+    mkdir -p $dir
+    echo "$0: creating neural net configs using the xconfig parser";
+    
+    num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+    learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+    opts="l2-regularize=0.004 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+    linear_opts="orthonormal-constraint=-1.0 l2-regularize=0.004"
+    output_opts="l2-regularize=0.002"
+    
+    mkdir -p $dir/configs
+    
+    cat <<EOF > $dir/configs/network.xconfig
+    input dim=100 name=ivector
+    input dim=40 name=input
+    
+    # please note that it is important to have input layer with the name=input
+    # as the layer immediately preceding the fixed-affine-layer to enable
+    # the use of short notation for the descriptor
+    fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+    
+    # the first splicing is moved before the lda layer, so no splicing here
+    relu-batchnorm-dropout-layer name=tdnn1 $opts dim=1024
+    linear-component name=tdnn2l0 dim=256 $linear_opts input=Append(-1,0)
+    linear-component name=tdnn2l dim=256 $linear_opts input=Append(-1,0)
+    relu-batchnorm-dropout-layer name=tdnn2 $opts input=Append(0,1) dim=1024
+    linear-component name=tdnn3l dim=256 $linear_opts input=Append(-1,0)
+    relu-batchnorm-dropout-layer name=tdnn3 $opts dim=1024 input=Append(0,1)
+    linear-component name=tdnn4l0 dim=256 $linear_opts input=Append(-1,0)
+    linear-component name=tdnn4l dim=256 $linear_opts input=Append(0,1)
+    relu-batchnorm-dropout-layer name=tdnn4 $opts input=Append(0,1) dim=1024
+    linear-component name=tdnn5l dim=256 $linear_opts
+    relu-batchnorm-dropout-layer name=tdnn5 $opts dim=1024 input=Append(0, tdnn3l)
+    linear-component name=tdnn6l0 dim=256 $linear_opts input=Append(-3,0)
+    linear-component name=tdnn6l dim=256 $linear_opts input=Append(-3,0)
+    relu-batchnorm-dropout-layer name=tdnn6 $opts input=Append(0,3) dim=1280
+    linear-component name=tdnn7l0 dim=256 $linear_opts input=Append(-3,0)
+    linear-component name=tdnn7l dim=256 $linear_opts input=Append(0,3)
+    relu-batchnorm-dropout-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1024
+    linear-component name=tdnn8l0 dim=256 $linear_opts input=Append(-3,0)
+    linear-component name=tdnn8l dim=256 $linear_opts input=Append(0,3)
+    relu-batchnorm-dropout-layer name=tdnn8 $opts input=Append(0,3) dim=1280
+    linear-component name=tdnn9l0 dim=256 $linear_opts input=Append(-3,0)
+    linear-component name=tdnn9l dim=256 $linear_opts input=Append(-3,0)
+    relu-batchnorm-dropout-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn5l) dim=1024
+    linear-component name=tdnn10l0 dim=256 $linear_opts input=Append(-3,0)
+    linear-component name=tdnn10l dim=256 $linear_opts input=Append(0,3)
+    relu-batchnorm-dropout-layer name=tdnn10 $opts input=Append(0,3) dim=1280
+    linear-component name=tdnn11l0 dim=256 $linear_opts input=Append(-3,0)
+    linear-component name=tdnn11l dim=256 $linear_opts input=Append(-3,0)
+    relu-batchnorm-dropout-layer name=tdnn11 $opts input=Append(0,3,tdnn10l,tdnn9l,tdnn7l) dim=1024
+    linear-component name=prefinal-l dim=256 $linear_opts
+    
+    relu-batchnorm-layer name=prefinal-chain input=prefinal-l $opts dim=1280
+    linear-component name=prefinal-chain-l dim=256 $linear_opts
+    batchnorm-component name=prefinal-chain-batchnorm
+    output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+    
+    relu-batchnorm-layer name=prefinal-xent input=prefinal-l $opts dim=1280
+    linear-component name=prefinal-xent-l dim=256 $linear_opts
+    batchnorm-component name=prefinal-xent-batchnorm
+    output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+    
+EOF
+
+    steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+    
+fi
+
+
+if [ $stage -le 12 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+    fi
+    
+    steps/nnet3/chain/train.py --stage=$train_stage \
+      --cmd="$decode_cmd" \
+      --feat.online-ivector-dir=$train_ivector_dir \
+      --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+      --chain.xent-regularize $xent_regularize \
+      --chain.leaky-hmm-coefficient=0.1 \
+      --chain.l2-regularize=0.0 \
+      --chain.apply-deriv-weights=false \
+      --chain.lm-opts="--num-extra-lm-states=2000" \
+      --trainer.dropout-schedule $dropout_schedule \
+      --trainer.add-option="--optimization.memory-compression-level=2" \
+      --trainer.srand=$srand \
+      --trainer.max-param-change=2.0 \
+      --trainer.num-epochs=20 \
+      --trainer.frames-per-iter=3000000 \
+      --trainer.optimization.num-jobs-initial=2 \
+      --trainer.optimization.num-jobs-final=5 \
+      --trainer.optimization.initial-effective-lrate=0.002 \
+      --trainer.optimization.final-effective-lrate=0.0002 \
+      --trainer.num-chunk-per-minibatch=128,64 \
+      --egs.chunk-width=$chunk_width \
+      --egs.dir="$common_egs_dir" \
+      --egs.opts="--frames-overlap-per-eg 0" \
+      --cleanup.remove-egs=$remove_egs \
+      --use-gpu=true \
+      --reporting.email="$reporting_email" \
+      --feat-dir=$train_data_dir \
+      --tree-dir=$tree_dir \
+      --lat-dir=$lat_dir \
+      --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 13 ]; then
+    # Note: it's not important to give mkgraph.sh the lang directory with the
+    # matched topology (since it gets the topology file from the model).
+    utils/mkgraph.sh \
+      --self-loop-scale 1.0 data/lang_test_tgsmall \
+      $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if [ $stage -le 14 ]; then
+    frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+    rm $dir/.error 2>/dev/null || true
+
+    for data in $test_sets; do
+        (
+        nspk=$(wc -l <data/${data}_hires/spk2utt)
+        steps/nnet3/decode.sh \
+            --acwt 1.0 --post-decode-acwt 10.0 \
+            --frames-per-chunk $frames_per_chunk \
+            --nj $nspk --cmd "$decode_cmd"  --num-threads 4 \
+            --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \
+            $tree_dir/graph_tgsmall data/${data}_hires ${dir}/decode_tgsmall_${data} || exit 1
+        steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,tglarge} \
+        data/${data}_hires ${dir}/decode_{tgsmall,tglarge}_${data} || exit 1
+        ) || touch $dir/.error &
+    done
+    wait
+    [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+exit 0
diff --git a/egs/cmu_cslu_kids/s5/local/cmu_prepare_data.sh b/egs/cmu_cslu_kids/s5/local/cmu_prepare_data.sh
new file mode 100755
index 00000000000..d452b8574b6
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/cmu_prepare_data.sh
@@ -0,0 +1,89 @@
+#! /bin/bash
+
+# Copyright Johns Hopkins University
+#   2019 Fei Wu
+
+# Prepares cmu_kids. 
+# Should be run from egs/cmu_cslu_kids
+
+set -eu
+corpus=cmu_kids/kids
+data=data/data_cmu
+test_percentage=30
+
+. ./path.sh
+. ./utils/parse_options.sh
+
+total_cnt=0
+test_cnt=0
+train_cnt=0
+
+for d in $data/train $data/test; do
+    mkdir -p $d
+    ./local/file_check.sh $d
+done
+
+echo "Preparing cmu_kids..."
+for kid in $corpus/*; do 
+	if [ -d $kid ]; then
+        # echo "Kid: $kid"
+		spkID=$(basename $kid)
+		sph="$kid/signal"
+	    if [ -d $sph ];then
+            # echo "$sph"
+            for utt in $sph/*; do
+                if [ ${utt: -4} == ".sph" ]; then
+                    total_cnt=$[$total_cnt+1]   
+                    rnd=$((1+RANDOM % 100))
+                    uttID=$(basename $utt)
+                    uttID=${uttID%".sph"}
+                    sentID=${uttID#$spkID}
+                    sentID=${sentID:0:3}
+
+                    # Find the sentence
+                    grep $sentID cmu_kids/tables/sentence.tbl > tmp
+                    cut -f 3- < tmp > out                    
+        
+                    tr '[:lower:]' '[:upper:]' < out > tmp
+                    tr -d '[:cntrl:]' < tmp > out
+                    sent=$(<out)
+
+                    # Clean transcript 
+                    cp $kid/trans/$uttID.trn tmp
+                    tr -d '\n' < tmp > out
+                    tr '[:lower:]' '[:upper:]' < tmp > out
+                    trans=$(<out)
+                     
+                    if [ $rnd -le $test_percentage ]; then
+                        target="test"
+                        test_cnt=$[$test_cnt+1]
+                    else
+                        target="train"
+                        train_cnt=$[$train_cnt+1]
+                    fi
+
+                    echo "$uttID $spkID" >> $data/$target/utt2spk
+                    echo "$uttID $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe -f wav -p -c 1 $utt|" >> $data/$target/wav.scp
+                    echo "$spkID f" >> $data/$target/spk2gender
+                    echo "$uttID $sent" >> $data/$target/text
+                fi
+            done
+        fi
+	fi
+done
+
+for d in $data/train $data/test; do
+    utils/utt2spk_to_spk2utt.pl $d/utt2spk > $d/spk2utt
+    utils/fix_data_dir.sh $d
+done
+
+printf "\t total: %s; train: %s; test: %s.\n" "$total_cnt" "$train_cnt" "$test_cnt" 
+rm -f out tmp
+
+# Optional
+# Get data duration, just for book keeping
+# for data in $data/train $data/test; do
+#     ./local/data_duration.sh $data
+# done
+# 
+
diff --git a/egs/cmu_cslu_kids/s5/local/cslu_aud_prep.sh b/egs/cmu_cslu_kids/s5/local/cslu_aud_prep.sh
new file mode 100755
index 00000000000..735f87eca9f
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/cslu_aud_prep.sh
@@ -0,0 +1,43 @@
+#/bin/bash 
+
+# Copyright Johns Hopkins University
+#   2019 Fei Wu
+
+# Called by local/cslu_DataPrep.shi
+
+Assignment()
+{
+    rnd=$((1+RANDOM % 100))
+    if [ $rnd -le $test_percentage ]; then 
+        target="test"
+    else
+        target="train"
+    fi
+}
+audio=
+test_percentage=30  # Percent of data reserved as test set 
+debug=debug/cslu_dataprep_debug
+data=data/data_cslu
+. ./utils/parse_options.sh
+
+uttID=$(basename $audio)
+uttID=${uttID%'.wav'}
+sentID=${uttID: -3}
+spkID=${uttID%$sentID}
+sentID=${sentID%"0"}
+sentID=$(echo "$sentID" | tr '[:lower:]' '[:upper:]' )
+
+line=$(grep $sentID cslu/docs/all.map)
+
+if [ -z "$line" ]; then     # Can't map utterance to transcript
+    echo $audio $sentID >> $debug
+else
+    txt=$(echo $line | grep -oP '"\K.*?(?=")')
+    cap_txt=${txt^^}
+    Assignment
+    echo "$uttID $cap_txt" >> $data/$target/text
+    echo "$uttID $spkID" >> $data/$target/utt2spk
+    echo "$spkID f" >> $data/$target/spk2gender
+    echo "$uttID $audio" >> $data/$target/wav.scp
+fi
+
diff --git a/egs/cmu_cslu_kids/s5/local/cslu_prepare_data.sh b/egs/cmu_cslu_kids/s5/local/cslu_prepare_data.sh
new file mode 100755
index 00000000000..621179079b3
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/cslu_prepare_data.sh
@@ -0,0 +1,49 @@
+#! /bin/bash 
+
+# Copyright Johns Hopkins University
+#   2019 Fei Wu
+
+# Prepares cslu_kids
+# Should be run from egs/cmu_csli_kids
+
+set -e
+Looper()
+{
+    # echo "Looping through $1"
+    for f in $1/*; do 
+        if [ -d $f ]; then
+            Looper $f
+        else            
+            ./local/cslu_aud_prep.sh --data $data --audio $f
+        fi
+    done
+}
+
+data=data/data_cslu
+corpus=cslu
+. ./utils/parse_options.sh
+
+rm -f debug/cslu_dataprep_debug
+mkdir -p debug
+# File check, remove previous data and features files 
+for d in $data/test $data/train; do 
+    mkdir -p $d
+    ./local/file_check.sh $d
+done
+
+echo "Preparing cslu_kids..."
+Looper $corpus/speech/scripted
+
+for d in $data/test $data/train; do
+    ./utils/utt2spk_to_spk2utt.pl $d
+    ./utils/fix_data_dir.sh $d
+done
+if [ -f debug/cslu_dataprep_debug ]; then
+    echo "Missing transcripts for some utterances. See cslu_dataprep_debug"
+fi
+
+# Optional
+# Get data duration, just for book keeping
+# for data in data/data_cslu/test data/data_cslu/train; do 
+#     ./local/data_duration.sh $data
+# done
diff --git a/egs/cmu_cslu_kids/s5/local/data_duration.sh b/egs/cmu_cslu_kids/s5/local/data_duration.sh
new file mode 100755
index 00000000000..e838e365ea7
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/data_duration.sh
@@ -0,0 +1,19 @@
+#! /bin/bash 
+
+# Copyright Johns Hopkins University
+#   2019 Fei Wu
+
+# Get duration of the utterance given data dir
+set -eu
+echo $0 $@
+
+data_dir=$1
+mkdir -p duration
+
+./utils/data/get_utt2dur.sh $data_dir
+
+echo "$data_dir"
+python local/sum_duration.py $data_dir/utt2dur 
+echo ""
+
+
diff --git a/egs/cmu_cslu_kids/s5/local/download_cmu_dict.sh b/egs/cmu_cslu_kids/s5/local/download_cmu_dict.sh
new file mode 100755
index 00000000000..3f58fd23c93
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/download_cmu_dict.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# Copyright 2019 Fei Wu
+set -eu 
+# Adapted from the local/prepare_dict script in 
+# the librispeech recipe. Download and prepare CMU_dict.
+# For childresn speech ASR tasks, since the vocabulary in cmu_kids and 
+# cslu_kids is relatively easy comparing to librispeech, we use only the 
+# CMU_dict, and do not handle OOV with G2P.
+# Should be run from egs/cmu_cslu_kids.
+# Usage:
+#   local/download_cmu_dict.sh --dict_dir <path_to_dict_dir>
+
+dict_dir=data/local/dict
+OOV="<UNK>"
+
+. ./utils/parse_options.sh || exit 1;
+. ./path.sh || exit 1
+
+if [ ! -d $dict_dir ]; then
+  echo "Downloading and preparing CMU dict"
+  svn co -r 12440 https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict $dict_dir/raw_dict || exit 1;
+  
+  echo "Removing the pronunciation variant markers ..."
+  grep -v ';;;' $dict_dir/raw_dict/cmudict.0.7a | \
+  perl -ane 'if(!m:^;;;:){ s:(\S+)\(\d+\) :$1 :; print; }' | \
+  sort -u > $dict_dir/lexicon.txt || exit 1;
+
+  tr -d '\r' <  $dict_dir/raw_dict/cmudict.0.7a.symbols > $dict_dir/nonsilence_phones.txt
+  
+  echo "$OOV SIL" >> $dict_dir/lexicon.txt
+  
+  echo "SIL" > $dict_dir/silence_phones.txt
+  echo "SPN" >> $dict_dir/silence_phones.txt
+  echo "SIL" > $dict_dir/optional_silence.txt
+
+  rm -rf $dict_dir/raw_dict
+fi
diff --git a/egs/cmu_cslu_kids/s5/local/download_lm.sh b/egs/cmu_cslu_kids/s5/local/download_lm.sh
new file mode 100755
index 00000000000..129ca1edbe3
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/download_lm.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 Vassil Panayotov
+# Apache 2.0
+
+if [ $# -ne "2" ]; then
+  echo "Usage: $0 <base-url> <download_dir>"
+  echo "e.g.: $0 http://www.openslr.org/resources/11 data/local/lm"
+  exit 1
+fi
+
+base_url=$1
+dst_dir=$2
+
+# given a filename returns the corresponding file size in bytes
+# The switch cases below can be autogenerated by entering the data directory and running:
+# for f in *; do echo "\"$f\") echo \"$(du -b $f | awk '{print $1}')\";;"; done
+function filesize() {
+  case $1 in
+    "3-gram.arpa.gz") echo "759636181";;
+    "3-gram.pruned.1e-7.arpa.gz") echo "34094057";;
+    "3-gram.pruned.3e-7.arpa.gz") echo "13654242";;
+    "4-gram.arpa.gz") echo "1355172078";;
+    "g2p-model-5") echo "20098243";;
+    "librispeech-lexicon.txt") echo "5627653";;
+    "librispeech-lm-corpus.tgz") echo "1803499244";;
+    "librispeech-lm-norm.txt.gz") echo "1507274412";;
+    "librispeech-vocab.txt") echo "1737588";;
+    *) echo "";;
+  esac
+}
+
+function check_and_download () {
+  [[ $# -eq 1 ]] || { echo "check_and_download() expects exactly one argument!"; return 1; }
+  fname=$1
+  echo "Downloading file '$fname' into '$dst_dir'..."
+  expect_size="$(filesize $fname)"
+  [[ ! -z "$expect_size" ]] || { echo "Unknown file size for '$fname'"; return 1; }
+  if [[ -s $dst_dir/$fname ]]; then
+    # In the following statement, the first version works on linux, and the part
+    # after '||' works on Linux.
+    f=$dst_dir/$fname
+    fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f)
+    if [[ "$fsize" -eq "$expect_size" ]]; then
+      echo "'$fname' already exists and appears to be complete"
+      return 0
+    else
+      echo "WARNING: '$fname' exists, but the size is wrong - re-downloading ..."
+    fi
+  fi
+  wget --no-check-certificate -O $dst_dir/$fname $base_url/$fname || {
+    echo "Error while trying to download $fname!"
+    return 1
+  }
+  f=$dst_dir/$fname
+  # In the following statement, the first version works on linux, and the part after '||'
+  # works on Linux.
+  fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f)
+  [[ "$fsize" -eq "$expect_size" ]] || { echo "$fname: file size mismatch!"; return 1; }
+  return 0
+}
+
+mkdir -p $dst_dir
+
+for f in 3-gram.arpa.gz 3-gram.pruned.1e-7.arpa.gz 3-gram.pruned.3e-7.arpa.gz 4-gram.arpa.gz \
+         g2p-model-5 librispeech-lm-corpus.tgz librispeech-vocab.txt librispeech-lexicon.txt; do
+  check_and_download $f || exit 1
+done
+
+cd $dst_dir
+ln -sf 3-gram.pruned.1e-7.arpa.gz lm_tgmed.arpa.gz
+ln -sf 3-gram.pruned.3e-7.arpa.gz lm_tgsmall.arpa.gz
+ln -sf 3-gram.arpa.gz lm_tglarge.arpa.gz
+ln -sf 4-gram.arpa.gz lm_fglarge.arpa.gz
+
+exit 0
diff --git a/egs/cmu_cslu_kids/s5/local/file_check.sh b/egs/cmu_cslu_kids/s5/local/file_check.sh
new file mode 100755
index 00000000000..859f228058a
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/file_check.sh
@@ -0,0 +1,17 @@
+#! /bin/bash
+
+# Copyright Johns Hopkins University
+#   2019 Fei Wu
+
+
+printf "\t File Check in folder: %s.\n" "$1"
+
+WavScp="$1/wav.scp"
+Text="$1/text"
+Utt2Spk="$1/utt2spk"
+Gend="$1/utt2gender"
+Spk2Utt="$1/spk2utt"
+rm -f $WavScp $Text $Utt2Spk $Gend $Spk2Utt
+
+
+
diff --git a/egs/cmu_cslu_kids/s5/local/format_lms.sh b/egs/cmu_cslu_kids/s5/local/format_lms.sh
new file mode 100755
index 00000000000..d1a18bada88
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/format_lms.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 Vassil Panayotov
+# Apache 2.0
+
+# Prepares the test time language model(G) transducers
+# (adapted from wsj/s5/local/wsj_format_data.sh)
+
+. ./path.sh || exit 1;
+
+# begin configuration section
+src_dir=data/lang
+# end configuration section
+
+. utils/parse_options.sh || exit 1;
+
+set -e
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 <lm-dir>"
+  echo "e.g.: $0 /export/a15/vpanayotov/data/lm"
+  echo ", where:"
+  echo "    <lm-dir> is the directory in which the language model is stored/downloaded"
+  echo "Options:"
+  echo "   --src-dir  <dir>           # source lang directory, default data/lang"
+  exit 1
+fi
+
+lm_dir=$1
+
+if [ ! -d $lm_dir ]; then
+  echo "$0: expected source LM directory $lm_dir to exist"
+  exit 1;
+fi
+if [ ! -f $src_dir/words.txt ]; then
+  echo "$0: expected $src_dir/words.txt to exist."
+  exit 1;
+fi
+
+
+tmpdir=data/local/lm_tmp.$$
+trap "rm -r $tmpdir" EXIT
+
+mkdir -p $tmpdir
+
+for lm_suffix in tgsmall tgmed; do
+  # tglarge is prepared by a separate command, called from run.sh; we don't
+  # want to compile G.fst for tglarge, as it takes a while.
+  test=${src_dir}_test_${lm_suffix}
+  mkdir -p $test
+  cp -r ${src_dir}/* $test
+  gunzip -c $lm_dir/lm_${lm_suffix}.arpa.gz | \
+    arpa2fst --disambig-symbol=#0 \
+             --read-symbol-table=$test/words.txt - $test/G.fst
+  utils/validate_lang.pl --skip-determinization-check $test || exit 1;
+done
+
+echo "Succeeded in formatting data."
+
+exit 0
diff --git a/egs/cmu_cslu_kids/s5/local/make_lm.pl b/egs/cmu_cslu_kids/s5/local/make_lm.pl
new file mode 100755
index 00000000000..80eea5a6198
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/make_lm.pl
@@ -0,0 +1,119 @@
+#!/usr/bin/env perl
+
+# Copyright 2010-2011 Yanmin Qian  Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This file takes as input the file wp_gram.txt that comes with the RM
+# distribution, and creates the language model as an acceptor in FST form.
+
+# make_rm_lm.pl   wp_gram.txt > G.txt
+
+if (@ARGV != 1) {
+    print "usage: make_rm_lm.pl  wp_gram.txt > G.txt\n";
+    exit(0);
+}
+unless (open(IN_FILE, "@ARGV[0]")) {
+    die ("can't open @ARGV[0]");
+}
+
+
+$flag = 0;
+$count_wrd = 0;
+$cnt_ends = 0;
+$init = "";
+
+while ($line = <IN_FILE>)
+{	
+	chop($line);        # Return the last char
+
+    $line =~ s/ //g;    # Selete all spaces 
+    
+	if(($line =~ /^>/)) # If line has ">"
+	{
+		if($flag == 0)          # Flip flag
+		{
+			$flag = 1;
+		}
+		$line =~ s/>//g;        # Delete ">" 
+		$hashcnt{$init} = $i;
+		$init = $line;
+		$i = 0;
+		$count_wrd++;
+		@LineArray[$count_wrd - 1] = $init;
+ 		$hashwrd{$init} = 0;
+	}
+	elsif($flag != 0)
+	{
+		
+		$hash{$init}[$i] = $line;
+		$i++; 			
+		if($line =~ /SENTENCE-END/)
+		{
+			$cnt_ends++;
+		}
+ 	} 
+	else
+	{}
+}
+
+$hashcnt{$init} = $i;
+
+$num = 0;
+$weight = 0;
+$init_wrd = "SENTENCE-END";
+$hashwrd{$init_wrd} = @LineArray;
+for($i = 0; $i < $hashcnt{$init_wrd}; $i++)
+{
+	$weight = -log(1/$hashcnt{$init_wrd});
+	$hashwrd{$hash{$init_wrd}[$i]} = $i + 1;
+	print "0    $hashwrd{$hash{$init_wrd}[$i]}    $hash{$init_wrd}[$i]    $hash{$init_wrd}[$i]    $weight\n";
+}
+$num = $i;
+
+for($i = 0; $i < @LineArray; $i++)
+{
+	if(@LineArray[$i] eq 'SENTENCE-END')
+	{}
+	else
+	{
+		if($hashwrd{@LineArray[$i]} == 0)
+		{
+			$num++;
+			$hashwrd{@LineArray[$i]} = $num;
+		}
+		for($j = 0; $j < $hashcnt{@LineArray[$i]}; $j++)
+		{
+			$weight = -log(1/$hashcnt{@LineArray[$i]});
+			if($hashwrd{$hash{@LineArray[$i]}[$j]} == 0)
+			{
+				$num++;
+				$hashwrd{$hash{@LineArray[$i]}[$j]} = $num;
+			}
+			if($hash{@LineArray[$i]}[$j] eq 'SENTENCE-END')
+			{
+				print "$hashwrd{@LineArray[$i]}    $hashwrd{$hash{@LineArray[$i]}[$j]}    <eps>    <eps>    $weight\n"
+                }
+			else
+			{
+				print "$hashwrd{@LineArray[$i]}    $hashwrd{$hash{@LineArray[$i]}[$j]}    $hash{@LineArray[$i]}[$j]    $hash{@LineArray[$i]}[$j]    $weight\n";
+			}
+		}
+	}
+}
+
+print "$hashwrd{$init_wrd}    0\n";
+close(IN_FILE);
+
+
diff --git a/egs/cmu_cslu_kids/s5/local/nnet3/compare_wer.sh b/egs/cmu_cslu_kids/s5/local/nnet3/compare_wer.sh
new file mode 100755
index 00000000000..4888de1f159
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/nnet3/compare_wer.sh
@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+  include_looped=true
+  shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+  include_online=true
+  shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System               "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+strings=(
+  "#WER dev_clean_2 (tgsmall) "
+  "#WER dev_clean_2 (tglarge) ")
+
+for n in 0 1; do
+   echo -n "${strings[$n]}"
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+    decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2)
+
+     wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+     printf "% 10s" $wer
+   done
+   echo
+   if $include_looped; then
+     echo -n "#             [looped:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+   if $include_online; then
+     echo -n "#             [online:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+done
+
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+echo -n "# Final train prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train acc      "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid acc      "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo
diff --git a/egs/cmu_cslu_kids/s5/local/nnet3/run_ivector_common.sh b/egs/cmu_cslu_kids/s5/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..4a7d3a8913a
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# This script is called from local/nnet3/run_tdnn.sh and
+# local/chain/run_tdnn.sh (and may eventually be called by more
+# scripts).  It contains the common feature preparation and
+# iVector-related parts of the script.  See those scripts for examples
+# of usage.
+
+stage=0
+train_set=train
+test_sets="test"
+gmm=tri3b
+
+nnet3_affix=
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist"
+    exit 1
+  fi
+done
+
+if [ $stage -le 1 ]; then
+  # Although the nnet will be trained by high resolution data, we still have to
+  # perturb the normal data to get the alignment _sp stands for speed-perturbed
+  echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+  echo "$0: making MFCC features for low-resolution speed-perturbed data"
+  steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/${train_set}_sp || exit 1;
+  steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1;
+  utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
+    data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+  # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+  # this shows how you can split across multiple file-systems.
+  echo "$0: creating high-resolution MFCC features"
+  mfccdir=data/${train_set}_sp_hires/data
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/fs0{1,2}/$USER/kaldi-data/mfcc/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1;
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+    utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+  done
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: computing a subset of data to train the diagonal UBM."
+  # We'll use about a quarter of the data.
+  mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+  temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+  num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
+  num_utts=$[$num_utts_total/4]
+  utils/data/subset_data_dir.sh data/${train_set}_sp_hires \
+     $num_utts ${temp_data_root}/${train_set}_sp_hires_subset
+
+  echo "$0: computing a PCA transform from the hires data."
+  steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
+      --splice-opts "--left-context=3 --right-context=3" \
+      --max-utts 10000 --subsample 2 \
+       ${temp_data_root}/${train_set}_sp_hires_subset \
+       exp/nnet3${nnet3_affix}/pca_transform
+
+  echo "$0: training the diagonal UBM."
+  # Use 512 Gaussians in the UBM.
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \
+    --num-frames 700000 \
+    --num-threads 8 \
+    ${temp_data_root}/${train_set}_sp_hires_subset 512 \
+    exp/nnet3${nnet3_affix}/pca_transform exp/nnet3${nnet3_affix}/diag_ubm
+fi
+
+if [ $stage -le 5 ]; then
+  # Train the iVector extractor.  Use all of the speed-perturbed data since iVector extractors
+  # can be sensitive to the amount of data.  The script defaults to an iVector dimension of
+  # 100.
+  echo "$0: training the iVector extractor"
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
+     data/${train_set}_sp_hires exp/nnet3${nnet3_affix}/diag_ubm \
+     exp/nnet3${nnet3_affix}/extractor || exit 1;
+fi
+
+
+if [ $stage -le 6 ]; then
+  # We extract iVectors on the speed-perturbed training data after combining
+  # short segments, which will be what we train the system on.  With
+  # --utts-per-spk-max 2, the script pairs the utterances into twos, and treats
+  # each of these pairs as one speaker; this gives more diversity in iVectors..
+  # Note that these are extracted 'online'.
+
+  # note, we don't encode the 'max2' in the name of the ivectordir even though
+  # that's the data we extract the ivectors from, as it's still going to be
+  # valid for the non-'max2' data, the utterance list is the same.
+
+  ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
+    utils/create_split_dir.pl /export/fs0{1,2}/$USER/kaldi-data/ivectors/cmu_cslu_kids-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
+  fi
+
+
+  # having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (iVector starts at zero).
+  temp_data_root=${ivectordir}
+  utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
+    data/${train_set}_sp_hires ${temp_data_root}/${train_set}_sp_hires_max2
+
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \
+    ${temp_data_root}/${train_set}_sp_hires_max2 \
+    exp/nnet3${nnet3_affix}/extractor $ivectordir
+
+  # Also extract iVectors for the test data, but in this case we don't need the speed
+  # perturbation (sp).
+  for data in $test_sets; do
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \
+      data/${data}_hires exp/nnet3${nnet3_affix}/extractor \
+      exp/nnet3${nnet3_affix}/ivectors_${data}_hires
+  done
+fi
+
+exit 0
diff --git a/egs/cmu_cslu_kids/s5/local/nnet3/run_tdnn_lstm.sh b/egs/cmu_cslu_kids/s5/local/nnet3/run_tdnn_lstm.sh
new file mode 120000
index 00000000000..9669251c14a
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/nnet3/run_tdnn_lstm.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_lstm_1c.sh
\ No newline at end of file
diff --git a/egs/cmu_cslu_kids/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh b/egs/cmu_cslu_kids/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
new file mode 100755
index 00000000000..2db79ce2734
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
@@ -0,0 +1,223 @@
+#!/usr/bin/env bash
+
+# This is a basic TDNN+LSTM nnet3 experiment.
+
+
+# steps/info/nnet3_dir_info.pl exp/nnet3/tdnn_lstm1a_sp
+# exp/nnet3/tdnn_lstm1a_sp: num-iters=32 nj=2..2 num-params=8.4M dim=40+100->2041 combine=-0.47->-0.38 loglike:train/valid[20,31,combined]=(-0.62,-0.38,-0.37/-1.03,-1.03,-1.02) accuracy:train/valid[20,31,combined]=(0.79,0.87,0.87/0.70,0.72,0.72)
+
+# Below, comparing with the chain TDNN system.  It's a little better with the
+# small-vocab decoding.  Both systems are probably super-badly tuned, and the
+# chain system probably used too many jobs.
+#
+# local/nnet3/compare_wer.sh exp/chain/tdnn1a_sp exp/nnet3/tdnn_lstm1a_sp
+# System                tdnn1a_sp tdnn_lstm1a_sp
+#WER dev_clean_2 (tgsmall)      18.43     17.37
+#WER dev_clean_2 (tglarge)      13.15     13.43
+# Final train prob                  -0.3933
+# Final valid prob                  -0.9662
+# Final train acc                    0.8652
+# Final valid acc                    0.7206
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=10
+train_set=train_clean_5
+test_sets=dev_clean_2
+gmm=tri3b
+nnet3_affix=
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1a   # affix for the TDNN directory name
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# training options
+# training chunk-options
+chunk_width=40,30,20
+chunk_left_context=40
+chunk_right_context=0
+common_egs_dir=
+xent_regularize=0.1
+
+# training options
+srand=0
+remove_egs=true
+reporting_email=
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 11" if you have already
+# run those things.
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+# Problem: We have removed the "train_" prefix of our training set in
+# the alignment directory names! Bad!
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+lang=data/lang
+label_delay=5
+
+dir=exp/nnet3${nnet3_affix}/tdnn_lstm${affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $gmm_dir/graph_tgsmall/HCLG.fst $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 10 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  relu-renorm-layer name=tdnn1 dim=520
+  relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1)
+  fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3
+  relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3)
+  fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3
+  relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3)
+  fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3
+
+  output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 11 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/train_rnn.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=6 \
+    --trainer.deriv-truncate-margin=10 \
+    --trainer.samples-per-iter=20000 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.0003 \
+    --trainer.optimization.final-effective-lrate=0.00003 \
+    --trainer.optimization.shrink-value=0.99 \
+    --trainer.rnn.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.5 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --ali-dir=$ali_dir \
+    --lang=$lang \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      nspk=$(wc -l <data/${data}_hires/spk2utt)
+      steps/nnet3/decode.sh \
+          --extra-left-context $chunk_left_context \
+          --extra-right-context $chunk_right_context \
+          --extra-left-context-initial 0 \
+          --extra-right-context-final 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nspk --cmd "$decode_cmd"  --num-threads 4 \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \
+          $gmm_dir/graph_tgsmall data/${data}_hires ${dir}/decode_tgsmall_${data} || exit 1
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,tglarge} \
+       data/${data}_hires ${dir}/decode_{tgsmall,tglarge}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+# Not testing the 'looped' decoding separately, because for
+# TDNN systems it would give exactly the same results as the
+# normal decoding.
+
+if $test_online_decoding && [ $stage -le 13 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      nspk=$(wc -l <data/${data}_hires/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      steps/online/nnet3/decode.sh \
+        --nj $nspk --cmd "$decode_cmd" \
+        $gmm_dir/graph_tgsmall data/${data} ${dir}_online/decode_tgsmall_${data} || exit 1
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,tglarge} \
+       data/${data}_hires ${dir}_online/decode_{tgsmall,tglarge}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/cmu_cslu_kids/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh b/egs/cmu_cslu_kids/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh
new file mode 100755
index 00000000000..ee50322fdb1
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh
@@ -0,0 +1,228 @@
+#!/usr/bin/env bash
+
+# This is like 1a, but adding dropout.   It's definitely helpful,
+# and you can see in the objf values that the train-test difference
+# is less.
+
+
+# steps/info/nnet3_dir_info.pl exp/nnet3/tdnn_lstm1b_sp
+# exp/nnet3/tdnn_lstm1b_sp: num-iters=32 nj=2..2 num-params=8.4M dim=40+100->2041 combine=-0.71->-0.58 loglike:train/valid[20,31,combined]=(-2.78,-0.95,-0.57/-2.94,-1.31,-0.98) accuracy:train/valid[20,31,combined]=(0.48,0.75,0.81/0.45,0.67,0.71)
+
+# local/nnet3/compare_wer.sh --online exp/nnet3/tdnn_lstm1a_sp exp/nnet3/tdnn_lstm1b_sp
+# System                tdnn_lstm1a_sp tdnn_lstm1b_sp
+#WER dev_clean_2 (tgsmall)      17.67     17.01
+#             [online:]         18.06     17.26
+#WER dev_clean_2 (tglarge)      13.43     12.63
+#             [online:]         13.73     12.94
+# Final train prob        -0.3660   -0.5680
+# Final valid prob        -1.0236   -0.9771
+# Final train acc          0.8737    0.8067
+# Final valid acc          0.7222    0.7144
+
+
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=10
+train_set=train_clean_5
+test_sets=dev_clean_2
+gmm=tri3b
+nnet3_affix=
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1b   # affix for the TDNN+LSTM directory name
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# training options
+# training chunk-options
+chunk_width=40,30,20
+chunk_left_context=40
+chunk_right_context=0
+common_egs_dir=
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+
+# training options
+srand=0
+remove_egs=true
+reporting_email=
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 11" if you have already
+# run those things.
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+# Problem: We have removed the "train_" prefix of our training set in
+# the alignment directory names! Bad!
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+lang=data/lang
+label_delay=5
+
+dir=exp/nnet3${nnet3_affix}/tdnn_lstm${affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $gmm_dir/graph_tgsmall/HCLG.fst $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 10 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  lstm_opts="decay-time=20 delay=-3 dropout-proportion=0.0"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  relu-renorm-layer name=tdnn1 dim=520
+  relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1)
+  fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+  relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3)
+  fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+  relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3)
+  fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+
+  output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 11 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/train_rnn.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=6 \
+    --trainer.deriv-truncate-margin=10 \
+    --trainer.samples-per-iter=20000 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.0003 \
+    --trainer.optimization.final-effective-lrate=0.00003 \
+    --trainer.optimization.shrink-value=0.99 \
+    --trainer.dropout-schedule="$dropout_schedule" \
+    --trainer.rnn.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.5 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --ali-dir=$ali_dir \
+    --lang=$lang \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      nspk=$(wc -l <data/${data}_hires/spk2utt)
+      steps/nnet3/decode.sh \
+          --extra-left-context $chunk_left_context \
+          --extra-right-context $chunk_right_context \
+          --extra-left-context-initial 0 \
+          --extra-right-context-final 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nspk --cmd "$decode_cmd"  --num-threads 4 \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \
+          $gmm_dir/graph_tgsmall data/${data}_hires ${dir}/decode_tgsmall_${data} || exit 1
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,tglarge} \
+       data/${data}_hires ${dir}/decode_{tgsmall,tglarge}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+# Not testing the 'looped' decoding separately, because for
+# TDNN systems it would give exactly the same results as the
+# normal decoding.
+
+if $test_online_decoding && [ $stage -le 13 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      nspk=$(wc -l <data/${data}_hires/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      steps/online/nnet3/decode.sh \
+        --nj $nspk --cmd "$decode_cmd" \
+        $gmm_dir/graph_tgsmall data/${data} ${dir}_online/decode_tgsmall_${data} || exit 1
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,tglarge} \
+       data/${data}_hires ${dir}_online/decode_{tgsmall,tglarge}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/cmu_cslu_kids/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh b/egs/cmu_cslu_kids/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh
new file mode 100755
index 00000000000..cc21d2c8dab
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh
@@ -0,0 +1,227 @@
+#!/usr/bin/env bash
+
+# 1c is like 1b, but changing renorm to batchnorm and adding l2 regularization.
+
+# local/nnet3/compare_wer.sh --online exp/nnet3/tdnn_lstm1b_sp exp/nnet3/tdnn_lstm1c_sp
+# System                tdnn_lstm1b_sp tdnn_lstm1c_sp
+#WER dev_clean_2 (tgsmall)      17.20     16.03
+#             [online:]         17.33     16.16
+#WER dev_clean_2 (tglarge)      12.69     11.66
+#             [online:]         12.90     11.70
+# Final train prob        -0.5626   -0.6092
+# Final valid prob        -0.9453   -0.9147
+# Final train acc          0.8068    0.7999
+# Final valid acc          0.7202    0.7235
+
+# steps/info/nnet3_dir_info.pl exp/nnet3/tdnn_lstm1c_sp
+# exp/nnet3/tdnn_lstm1c_sp: num-iters=32 nj=1..2 num-params=8.4M dim=40+100->2041 combine=-0.99->-0.81 loglike:train/valid[20,31,combined]=(-1.22,-0.69,-0.61/-1.34,-1.02,-0.91) accuracy:train/valid[20,31,combined]=(0.68,0.779,0.800/0.64,0.70,0.724)
+
+
+
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=10
+train_set=train_clean_5
+test_sets=dev_clean_2
+gmm=tri3b
+nnet3_affix=
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1c   # affix for the TDNN+LSTM directory name
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# training options
+# training chunk-options
+chunk_width=40,30,20
+chunk_left_context=40
+chunk_right_context=0
+common_egs_dir=
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+
+# training options
+srand=0
+remove_egs=true
+reporting_email=
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 11" if you have already
+# run those things.
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+# Problem: We have removed the "train_" prefix of our training set in
+# the alignment directory names! Bad!
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+lang=data/lang
+label_delay=5
+
+dir=exp/nnet3${nnet3_affix}/tdnn_lstm${affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $gmm_dir/graph_tgsmall/HCLG.fst $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 10 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  tdnn_opts="l2-regularize=0.05"
+  lstm_opts="l2-regularize=0.01 decay-time=20 delay=-3 dropout-proportion=0.0"
+  output_opts="l2-regularize=0.01"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  relu-batchnorm-layer name=tdnn1 dim=520 $tdnn_opts
+  relu-batchnorm-layer name=tdnn2 dim=520 $tdnn_opts input=Append(-1,0,1)
+  fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+  relu-batchnorm-layer name=tdnn3 dim=520 $tdnn_opts input=Append(-3,0,3)
+  relu-batchnorm-layer name=tdnn4 dim=520 $tdnn_opts input=Append(-3,0,3)
+  fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+  relu-batchnorm-layer name=tdnn5 dim=520 $tdnn_opts input=Append(-3,0,3)
+  relu-batchnorm-layer name=tdnn6 dim=520 $tdnn_opts input=Append(-3,0,3)
+  fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+
+  output-layer name=output input=lstm3 $output_opts output-delay=$label_delay dim=$num_targets max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 11 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/train_rnn.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=6 \
+    --trainer.deriv-truncate-margin=10 \
+    --trainer.samples-per-iter=20000 \
+    --trainer.optimization.num-jobs-initial=1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate=0.0003 \
+    --trainer.optimization.final-effective-lrate=0.00003 \
+    --trainer.dropout-schedule="$dropout_schedule" \
+    --trainer.rnn.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.5 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --ali-dir=$ali_dir \
+    --lang=$lang \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      nspk=$(wc -l <data/${data}_hires/spk2utt)
+      steps/nnet3/decode.sh \
+          --extra-left-context $chunk_left_context \
+          --extra-right-context $chunk_right_context \
+          --extra-left-context-initial 0 \
+          --extra-right-context-final 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nspk --cmd "$decode_cmd"  --num-threads 4 \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \
+          $gmm_dir/graph_tgsmall data/${data}_hires ${dir}/decode_tgsmall_${data} || exit 1
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,tglarge} \
+       data/${data}_hires ${dir}/decode_{tgsmall,tglarge}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+# Not testing the 'looped' decoding separately, because for
+# TDNN systems it would give exactly the same results as the
+# normal decoding.
+
+if $test_online_decoding && [ $stage -le 13 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      nspk=$(wc -l <data/${data}_hires/spk2utt)
+      # note: we just give it "data/${data}" as it only uses the wav.scp, the
+      # feature type does not matter.
+      steps/online/nnet3/decode.sh \
+        --nj $nspk --cmd "$decode_cmd" \
+        $gmm_dir/graph_tgsmall data/${data} ${dir}_online/decode_tgsmall_${data} || exit 1
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,tglarge} \
+       data/${data}_hires ${dir}_online/decode_{tgsmall,tglarge}_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+
+exit 0;
diff --git a/egs/cmu_cslu_kids/s5/local/prepare_grammar.sh b/egs/cmu_cslu_kids/s5/local/prepare_grammar.sh
new file mode 100755
index 00000000000..542447d8126
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/prepare_grammar.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+#
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# Takes no arguments. 
+
+
+
+tmpdir=data/local/tmp
+[ ! -f $tmpdir/G.txt ] && echo "No such file $tmpdir/G.txt" && exit 1;
+
+. ./path.sh || exit 1; # for KALDI_ROOT
+
+fstcompile --isymbols=data/lang/words.txt --osymbols=data/lang/words.txt --keep_isymbols=false \
+    --keep_osymbols=false $tmpdir/G.txt | fstarcsort --sort_type=ilabel > data/lang/G.fst || exit 1;
+
+# Checking that G is stochastic [note, it wouldn't be for an Arpa]
+fstisstochastic data/lang/G.fst || echo Error: G is not stochastic
+
+# Checking that G.fst is determinizable.
+fstdeterminize data/lang/G.fst /dev/null || echo Error determinizing G.
+
+# Checking that L_disambig.fst is determinizable.
+fstdeterminize data/lang/L_disambig.fst /dev/null || echo Error determinizing L.
+
+# Checking that disambiguated lexicon times G is determinizable
+fsttablecompose data/lang/L_disambig.fst data/lang/G.fst | \
+   fstdeterminize >/dev/null || echo Error
+
+# Checking that LG is stochastic:
+fsttablecompose data/lang/L.fst data/lang/G.fst | \
+   fstisstochastic || echo Error: LG is not stochastic.
+
+# Checking that L_disambig.G is stochastic:
+fsttablecompose data/lang/L_disambig.fst data/lang/G.fst | \
+   fstisstochastic || echo Error: LG is not stochastic.
+
+echo "Succeeded preparing grammar for CMU_kids."
diff --git a/egs/cmu_cslu_kids/s5/local/score.sh b/egs/cmu_cslu_kids/s5/local/score.sh
new file mode 100755
index 00000000000..cb5bbb7277b
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/score.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#           2014  Guoguo Chen
+# Apache 2.0
+
+[ -f ./path.sh ] && . ./path.sh
+
+# begin configuration section.
+cmd=run.pl
+stage=0
+decode_mbr=true
+word_ins_penalty=0.0,0.5,1.0
+min_lmwt=7
+max_lmwt=17
+iter=final
+#end configuration section.
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
+  echo " Options:"
+  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+  echo "    --stage (0|1|2)                 # start scoring script from part-way through."
+  echo "    --decode_mbr (true/false)       # maximum bayes risk decoding (confusion network)."
+  echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
+  echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
+  exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $dir/lat.1.gz $data/text; do
+  [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/scoring/log
+
+cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
+
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \
+    lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+    lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
+    lattice-best-path --word-symbol-table=$symtab \
+      ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1;
+done
+
+# Note: the double level of quoting for the sed command
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \
+    cat $dir/scoring/LMWT.$wip.tra \| \
+    utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
+    compute-wer --text --mode=present \
+    ark:$dir/scoring/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
+done
+
+exit 0;
diff --git a/egs/cmu_cslu_kids/s5/local/sort_result.sh b/egs/cmu_cslu_kids/s5/local/sort_result.sh
new file mode 100755
index 00000000000..aedec9dc344
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/sort_result.sh
@@ -0,0 +1,46 @@
+#! /bin/bash 
+
+# Copyright Johns Hopkins University
+#   2019 Fei Wu
+
+# Sorts and reports results in results/results.txt
+# for all models in exp. Expects decode directories 
+# to be named as exp/<model>/decode* or exp/chain/tdnn*/decode*
+# Should be run from egs/cmu_cslu_kids.
+
+res=${1:-"results/results.txt"}
+exp=exp
+mkdir -p results
+rm -f $res
+
+echo "Sorting results in: "
+echo "# ---------- GMM-HMM Models ----------" >> $res
+for mdl in $exp/mono* $exp/tri*; do
+    echo "  $mdl"
+    if [ -d $mdl ];then
+        for dec in $mdl/decode*;do
+            echo "    $dec"
+            if [ -d $dec ];then
+                grep WER $dec/wer* | \
+                    sort -k2 -n > $dec/WERs
+                head -n 1 $dec/WERs >> $res
+            fi
+        done
+    fi
+done
+
+echo "# ---------- DNN-HMM Models ----------" >> $res
+# DNN results
+for mdl in $exp/chain/tdnn*; do
+    echo "  $mdl"
+    for dec in $mdl/decode*; do
+        if [ -d $dec ]; then
+            echo "    $dec"
+            grep WER $dec/wer* | \
+                sort -k2 -n > $dec/WERs
+            head -n 1 $dec/WERs >> $res
+        fi
+    done
+done
+
+sed -i "s/:/    /g" $res 
diff --git a/egs/cmu_cslu_kids/s5/local/subset_dataset.sh b/egs/cmu_cslu_kids/s5/local/subset_dataset.sh
new file mode 100755
index 00000000000..f8936b64c97
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/subset_dataset.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Copyright 2017  Luminar Technologies, Inc. (author: Daniel Galvez)
+# Apache 2.0
+
+# The following commands were used to generate the mini_librispeech dataset:
+#
+# Note that data generation is random. This could be fixed by
+# providing a seed argument to the shuf program.
+
+if [ "$#" -ne 3 ]; then
+  echo "Usage: $0 <src-dir> <dst-dir> <num-hours>"
+  echo "e.g.: $0 /export/a05/dgalvez/LibriSpeech/train-clean-100 \\
+                 /export/a05/dgalvez/LibriSpeech/train-clean-5 5"
+  exit 1
+fi
+
+src_dir=$1
+dest_dir=$2
+dest_num_hours=$3
+
+src=$(basename $src_dir)
+dest=$(basename $dest_dir)
+librispeech_dir=$(dirname $src_dir)
+
+# TODO: Possibly improve this to ensure gender balance and speaker
+# balance.
+# TODO: Use actual time values instead of assuming that to make sure we get $dest_num_hours of data
+src_num_hours=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | awk -F'|' '{ print $3 }' | \
+python -c '
+from __future__ import print_function
+from sys import stdin
+minutes_str = stdin.read().split()
+print(int(round(sum([float(minutes) for minutes in minutes_str]) / 60.0)))')
+src_num_chapters=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | \
+                      awk -F'|' '{ print $1 }' | sort -u | wc -l)
+mkdir -p data/subset_tmp
+grep "$src" $librispeech_dir/CHAPTERS.TXT | \
+  awk -F'|' '{ print $1 }' | \
+  shuf -n $(((dest_num_hours * src_num_chapters) / src_num_hours)) > \
+       data/subset_tmp/${dest}_chapter_id_list.txt
+
+while read -r chapter_id || [[ -n "$chapter_id" ]]; do
+  chapter_dir=$(find $src_dir/ -mindepth 2 -name "$chapter_id" -type d)
+  speaker_id=$(basename $(dirname $chapter_dir))
+  mkdir -p $dest_dir/$speaker_id/
+  cp -r $chapter_dir $dest_dir/$speaker_id/
+done  < data/subset_tmp/${dest}_chapter_id_list.txt
diff --git a/egs/cmu_cslu_kids/s5/local/sum_duration.py b/egs/cmu_cslu_kids/s5/local/sum_duration.py
new file mode 100644
index 00000000000..0af7ba62151
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/sum_duration.py
@@ -0,0 +1,15 @@
+# Sum duration obtained by using 
+# utils/data/get_utt2dur.sh
+
+import sys
+file = sys.argv[1]
+sum = 0
+with open(file, 'r') as fp:
+    line = fp.readline()
+    while(line):
+        toks = line.strip().split()
+        sum += float(toks[1])
+        line = fp.readline()
+fp.close()
+h=sum/3600
+sys.stdout.write("%f hour data.\n"%h)
diff --git a/egs/cmu_cslu_kids/s5/local/train_lms.sh b/egs/cmu_cslu_kids/s5/local/train_lms.sh
new file mode 100755
index 00000000000..a5aaf415e44
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/train_lms.sh
@@ -0,0 +1,217 @@
+#!/usr/bin/env bash
+
+# This script trains LMs on the WSJ LM-training data.
+# It requires that you have already run wsj_extend_dict.sh,
+# to get the larger-size dictionary including all of CMUdict
+# plus any OOVs and possible acronyms that we could easily 
+# derive pronunciations for.
+
+dict_suffix=
+
+echo "$0 $@"  # Print the command line for logging
+. utils/parse_options.sh || exit 1;
+
+dir=data/local/local_lm
+srcdir=data/local/dict${dict_suffix}_larger
+mkdir -p $dir
+. ./path.sh || exit 1; # for KALDI_ROOT
+export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH
+( # First make sure the kaldi_lm toolkit is installed.
+ cd $KALDI_ROOT/tools || exit 1;
+ if [ -d kaldi_lm ]; then
+   echo Not installing the kaldi_lm toolkit since it is already there.
+ else
+   echo Downloading and installing the kaldi_lm tools
+   if [ ! -f kaldi_lm.tar.gz ]; then
+     wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
+   fi
+   tar -xvzf kaldi_lm.tar.gz || exit 1;
+   cd kaldi_lm
+   make || exit 1;
+   echo Done making the kaldi_lm tools
+ fi
+) || exit 1;
+
+
+
+if [ ! -f $srcdir/cleaned.gz -o ! -f $srcdir/lexicon.txt ]; then
+  echo "Expecting files $srcdir/cleaned.gz and $srcdir/lexicon.txt to exist";
+  echo "You need to run local/wsj_extend_dict.sh before running this script."
+  exit 1;
+fi
+
+# Get a wordlist-- keep everything but silence, which should not appear in
+# the LM.
+awk '{print $1}' $srcdir/lexicon.txt | grep -v -w '!SIL' > $dir/wordlist.txt
+
+# Get training data with OOV words (w.r.t. our current vocab) replaced with <UNK>.
+echo "Getting training data with OOV words replaced with <UNK> (train_nounk.gz)" 
+gunzip -c $srcdir/cleaned.gz | awk -v w=$dir/wordlist.txt \
+  'BEGIN{while((getline<w)>0) v[$1]=1;}
+  {for (i=1;i<=NF;i++) if ($i in v) printf $i" ";else printf "<UNK> ";print ""}'|sed 's/ $//g' \
+  | gzip -c > $dir/train_nounk.gz
+
+# Get unigram counts (without bos/eos, but this doens't matter here, it's
+# only to get the word-map, which treats them specially & doesn't need their
+# counts).
+# Add a 1-count for each word in word-list by including that in the data,
+# so all words appear.
+gunzip -c $dir/train_nounk.gz | cat - $dir/wordlist.txt | \
+  awk '{ for(x=1;x<=NF;x++) count[$x]++; } END{for(w in count){print count[w], w;}}' | \
+ sort -nr > $dir/unigram.counts
+
+# Get "mapped" words-- a character encoding of the words that makes the common words very short.
+cat $dir/unigram.counts  | awk '{print $2}' | get_word_map.pl "<s>" "</s>" "<UNK>" > $dir/word_map
+
+gunzip -c $dir/train_nounk.gz | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1]=$2;}
+  { for(n=1;n<=NF;n++) { printf map[$n]; if(n<NF){ printf " "; } else { print ""; }}}' | gzip -c >$dir/train.gz
+
+# To save disk space, remove the un-mapped training data.  We could
+# easily generate it again if needed.
+rm $dir/train_nounk.gz 
+
+train_lm.sh --arpa --lmtype 3gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 141.444826
+# 7.8 million N-grams.
+
+prune_lm.sh --arpa 6.0 $dir/3gram-mincount/
+# 1.45 million N-grams.
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 165.394139
+
+train_lm.sh --arpa --lmtype 4gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 126.734180
+# 10.3 million N-grams.
+
+prune_lm.sh --arpa 7.0 $dir/4gram-mincount
+# 1.50 million N-grams
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 155.663757
+
+
+exit 0
+
+### Below here, this script is showing various commands that 
+## were run during LM tuning.
+
+train_lm.sh --arpa --lmtype 3gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 141.444826
+# 7.8 million N-grams.
+
+prune_lm.sh --arpa 3.0 $dir/3gram-mincount/
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 156.408740
+# 2.5 million N-grams.
+
+prune_lm.sh --arpa 6.0 $dir/3gram-mincount/
+# 1.45 million N-grams.
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 165.394139
+
+train_lm.sh --arpa --lmtype 4gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 126.734180
+# 10.3 million N-grams.
+
+prune_lm.sh --arpa 3.0 $dir/4gram-mincount
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 143.206294
+# 2.6 million N-grams.
+
+prune_lm.sh --arpa 4.0 $dir/4gram-mincount
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 146.927717
+# 2.15 million N-grams.
+
+prune_lm.sh --arpa 5.0 $dir/4gram-mincount
+# 1.86 million N-grams
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 150.162023
+
+prune_lm.sh --arpa 7.0 $dir/4gram-mincount
+# 1.50 million N-grams
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 155.663757
+
+train_lm.sh --arpa --lmtype 3gram $dir
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 135.692866
+# 20.0 million N-grams
+
+! which ngram-count  \
+  && echo "SRILM tools not installed so not doing the comparison" && exit 1;
+
+#################
+# You could finish the script here if you wanted.
+# Below is to show how to do baselines with SRILM.
+#  You'd have to install the SRILM toolkit first.
+
+heldout_sent=10000 # Don't change this if you want result to be comparable with
+    # kaldi_lm results
+sdir=$dir/srilm # in case we want to use SRILM to double-check perplexities.
+mkdir -p $sdir
+gunzip -c $srcdir/cleaned.gz | head -$heldout_sent > $sdir/cleaned.heldout
+gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent > $sdir/cleaned.train
+(echo "<s>"; echo "</s>" ) | cat - $dir/wordlist.txt > $sdir/wordlist.final.s
+
+# 3-gram:
+ngram-count -text $sdir/cleaned.train -order 3 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
+  -map-unk "<UNK>" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz
+ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/cleaned.heldout # consider -debug 2
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
+#0 zeroprobs, logprob= -491456 ppl= 141.457 ppl1= 177.437
+
+# Trying 4-gram:
+ngram-count -text $sdir/cleaned.train -order 4 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
+  -map-unk "<UNK>" -kndiscount -interpolate -lm $sdir/srilm.o4g.kn.gz
+ngram -order 4 -lm $sdir/srilm.o4g.kn.gz -ppl $sdir/cleaned.heldout 
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
+#0 zeroprobs, logprob= -480939 ppl= 127.233 ppl1= 158.822
+
+#3-gram with pruning:
+ngram-count -text $sdir/cleaned.train -order 3 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
+  -prune 0.0000001 -map-unk "<UNK>" -kndiscount -interpolate -lm $sdir/srilm.o3g.pr7.kn.gz
+ngram -lm $sdir/srilm.o3g.pr7.kn.gz -ppl $sdir/cleaned.heldout 
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
+#0 zeroprobs, logprob= -510828 ppl= 171.947 ppl1= 217.616
+# Around 2.25M N-grams.
+# Note: this is closest to the experiment done with "prune_lm.sh --arpa 3.0 $dir/3gram-mincount/"
+# above, which gave 2.5 million N-grams and a perplexity of 156.
+
+# Note: all SRILM experiments above fully discount all singleton 3 and 4-grams.
+# You can use -gt3min=0 and -gt4min=0 to stop this (this will be comparable to
+# the kaldi_lm experiments above without "-mincount".
+
+##  From here is how to train with
+# IRSTLM.  This is not really working at the moment.
+
+if [ -z $IRSTLM ] ; then
+  export IRSTLM=$KALDI_ROOT/tools/irstlm/
+fi
+export PATH=${PATH}:$IRSTLM/bin
+if ! command -v prune-lm >/dev/null 2>&1 ; then
+  echo "$0: Error: the IRSTLM is not available or compiled" >&2
+  echo "$0: Error: We used to install it by default, but." >&2
+  echo "$0: Error: this is no longer the case." >&2
+  echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
+  echo "$0: Error: and run extras/install_irstlm.sh" >&2
+  exit 1
+fi
+
+idir=$dir/irstlm
+mkdir $idir
+gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent | add-start-end.sh | \
+  gzip -c > $idir/train.gz
+
+dict -i=WSJ.cleaned.irstlm.txt -o=dico -f=y -sort=no
+ cat dico | gawk 'BEGIN{while (getline<"vocab.20k.nooov") v[$1]=1; print "DICTIONARY 0 "length(v);}FNR>1{if ($1 in v)\
+{print $0;}}' > vocab.irstlm.20k
+
+
+build-lm.sh -i "gunzip -c $idir/train.gz" -o $idir/lm_3gram.gz  -p yes \
+  -n 3 -s improved-kneser-ney -b yes
+# Testing perplexity with SRILM tools:
+ngram -lm $idir/lm_3gram.gz  -ppl $sdir/cleaned.heldout 
+#data/local/local_lm/irstlm/lm_3gram.gz: line 162049: warning: non-zero probability for <unk> in closed-vocabulary LM
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 0 OOVs
+#0 zeroprobs, logprob= -513670 ppl= 175.041 ppl1= 221.599
+
+# Perplexity is very bad (should be ~141, since we used -p option,
+# not 175),
+# but adding -debug 3 to the command line shows that
+# the IRSTLM LM does not seem to sum to one properly, so it seems that
+# it produces an LM that isn't interpretable in the normal way as an ARPA
+# LM.
+
+
+
diff --git a/egs/cmu_cslu_kids/s5/local/vtln.sh b/egs/cmu_cslu_kids/s5/local/vtln.sh
new file mode 100755
index 00000000000..0f3f0d375d3
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/vtln.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+# Copyright Johns Hopkins University
+#   2019 Fei Wu
+
+# Run VTLN. This will be run if the vtln option 
+# is set to be true in run.sh.
+
+set -eu
+stage=0
+featdir=mfcc/vtln
+data=data
+mdl=exp/tri3
+mdl_vtln=${mdl}_vtln
+vtln_lda=exp/tri4
+vtln_sat=exp/tri5
+
+. ./cmd.sh
+. ./utils/parse_options.sh
+
+mkdir -p $featdir
+
+steps/train_lvtln.sh --cmd "$train_cmd" 1800 9000 $data/train $data/lang $mdl $mdl_vtln
+
+if [ $stage -le 0 ]; then
+    mkdir -p $data/train_vtln
+    cp $data/train/* $data/train_vtln || true
+    cp $mdl_vtln/final.warp $data/train_vtln/spk2warp
+    steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" $data/train_vtln exp/make_mfcc/train_vtln $featdir  
+    steps/compute_cmvn_stats.sh $data/train_vtln exp/make_mfcc/train_vtln $featdir  
+fi
+
+if [ $stage -le 1 ]; then 
+    utils/mkgraph.sh $data/lang_test_tgmed $mdl_vtln $mdl_vtln/graph
+    steps/decode_lvtln.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
+        $mdl_vtln/graph $data/test $mdl_vtln/decode
+fi 
+
+if [ $stage -le 2 ]; then
+    mkdir -p $data/test_vtln
+    cp $data/test/* $data/test_vtln || true
+    cp $mdl_vtln/decode/final.warp $data/test_vtln/spk2warp
+    steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" $data/test_vtln exp/make_mfcc/test_vtln $featdir  
+    steps/compute_cmvn_stats.sh $data/test_vtln exp/make_mfcc/test_vtln $featdir  
+fi 
+
+if [ $stage -le 3 ]; then
+    steps/train_lda_mllt.sh --cmd "$train_cmd" --splice-opts "--left-context=3 --right-context=3" 1800 9000 \
+        $data/train_vtln $data/lang $mdl_vtln $vtln_lda
+    utils/mkgraph.sh $data/lang_test_tgmed $vtln_lda $vtln_lda/graph
+    echo "$mdl_vtln + lda + mllt" > $vtln_lda/mcodel_discription
+    steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
+        $vtln_lda/graph $data/test_vtln $vtln_lda/decode
+fi
+
+if [ $stage -le 4 ]; then
+    steps/train_sat.sh 1800 9000 $data/train_vtln $data/lang $vtln_lda $vtln_sat
+    utils/mkgraph.sh $data/lang_test_tgmed $vtln_sat $vtln_sat/graph 
+    steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" $vtln_sat/graph $data/test_vtln $vtln_sat/decode 
+    echo  "$mdl_vtln + lda + mllt + SAT" > $vtln_sat/model_discription
+fi
diff --git a/egs/cmu_cslu_kids/s5/path.sh b/egs/cmu_cslu_kids/s5/path.sh
new file mode 100755
index 00000000000..2d17b17a84a
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/cmu_cslu_kids/s5/run.sh b/egs/cmu_cslu_kids/s5/run.sh
new file mode 100755
index 00000000000..4226201989b
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/run.sh
@@ -0,0 +1,179 @@
+#! /bin/bash
+
+# Copyright Johns Hopkins University
+#   2019 Fei Wu
+
+set -eo
+
+stage=0
+cmu_kids=               # path to cmu_kids corpus
+cslu_kids=              # path to cslu_kids corpus
+lm_src=                 # path of existing librispeech lm 
+extra_features=false    # Extra features for GMM model (MMI, boosting and MPE)
+vtln=false              # Optional, run VLTN on gmm and tdnnf models if set true 
+email=                  # Reporting email for tdnn-f training
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+lm_url=www.openslr.org/resources/11
+mkdir -p data
+mkdir -p data/local
+
+# Prepare data
+if [ $stage -le 0 ]; then
+  # Make soft link to the corpora
+  if [ ! -e cmu_kids ]; then
+    if [ ! -d $cmu_kids/kids ]; then echo "ERROR: Expected to find a directory called 'kids' in $cmu_kids. Exiting." && exit 1; fi
+    ln -sf $cmu_kids cmu_kids
+  fi
+  if [ ! -e cslu ]; then
+    if [ ! -d $cslu_kids/speech ]; then echo "ERROR: Expected to find a directory called 'speech' in $cslu_kids. Exiting." && exit 1; fi
+    ln -sf $cslu_kids cslu
+  fi
+  
+  # Make softlink to lm, if lm_src provided
+  if [ ! -z "$lm_src" ] && [ ! -e data/local/lm ] ; then
+    ln -sf $lm_src data/local/lm
+  fi
+  
+  # Remove old data dirs
+  rm -rf data/data_cmu
+  rm -rf data/data_cslu
+
+  # Data Prep
+  ./local/cmu_prepare_data.sh --corpus cmu_kids/kids --data data/data_cmu
+  ./local/cslu_prepare_data.sh --corpus cslu --data data/data_cslu 
+fi
+
+# Combine data
+if [ $stage -le 1 ]; then
+   mkdir -p data/train
+   mkdir -p data/test
+   rm -rf data/train/*
+   rm -rf data/test/*
+   ./utils/combine_data.sh data/train data/data_cmu/train data/data_cslu/train
+   ./utils/combine_data.sh data/test data/data_cmu/test data/data_cslu/test
+fi
+
+# LM, WFST Preparation
+if [ $stage -le 2 ]; then
+  if [ ! -d data/local/dict ]; then
+      ./local/download_cmu_dict.sh
+  fi
+
+  if [ ! -e data/local/lm ]; then
+    echo "lm_src not provided. Downloading lm from openslr."
+    ./local/download_lm.sh $lm_url data/local/lm
+  fi
+
+  utils/prepare_lang.sh data/local/dict "<UNK>"  data/local/lang data/lang
+  local/format_lms.sh --src_dir data/lang  data/local/lm 
+   
+  # Create ConstArpaLm format language model for full 3-gram and 4-gram LMs
+  utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge
+  utils/build_const_arpa_lm.sh data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge 
+fi
+
+# Make MFCC features
+if [ $stage -le 3 ]; then
+  mkdir -p mfcc
+  mkdir -p exp
+  steps/make_mfcc.sh --nj 40 --cmd "$train_cmd" data/test exp/make_feat/test mfcc
+  steps/compute_cmvn_stats.sh data/test exp/make_feat/test mfcc
+  steps/make_mfcc.sh --nj 40 --cmd "$train_cmd" data/train exp/make_feat/train mfcc 
+  steps/compute_cmvn_stats.sh data/train exp/make_feat/train mfcc
+fi
+
+# Mono-phone 
+if [ $stage -le 4 ]; then
+  # Train
+  steps/train_mono.sh --nj 40 --cmd "$train_cmd" data/train data/lang exp/mono 
+  #Decode
+  utils/mkgraph.sh data/lang_test_tgsmall exp/mono exp/mono/graph
+  steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/mono/graph data/test exp/mono/decode
+  #Align
+  steps/align_si.sh --nj 20 --cmd "$train_cmd" data/train data/lang exp/mono exp/mono_ali
+fi
+
+# Tri1 [Vanilla tri phone model]
+if [ $stage -le 5 ]; then
+  # Train
+  steps/train_deltas.sh --cmd "$train_cmd" 1800 9000 data/train data/lang exp/mono_ali exp/tri1
+  # Decode 
+  utils/mkgraph.sh data/lang_test_tgmed exp/tri1 exp/tri1/graph 
+  steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri1/graph data/test exp/tri1/decode
+  # Align - make graph - decode again   
+  steps/align_si.sh --nj 20 --cmd "$train_cmd" --use-graphs true data/train data/lang_test_tgmed exp/tri1 exp/tri1_ali
+  utils/mkgraph.sh data/lang_test_tgmed exp/tri1_ali exp/tri1_ali/graph
+  steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri1_ali/graph data/test exp/tri1_ali/decode
+fi
+
+# Add LDA and MLLT
+if [ $stage -le 6 ]; then
+  # Train
+  steps/train_lda_mllt.sh --cmd "$train_cmd" --splice-opts "--left-context=3 --right-context=3" 1800 9000 data/train data/lang exp/tri1_ali exp/tri2
+  utils/mkgraph.sh data/lang_test_tgmed exp/tri2 exp/tri2/graph
+  # Decode
+  steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2/decode
+  # Align - make graph - dcode again 
+  steps/align_si.sh --nj 20 --cmd "$train_cmd" --use-graphs true data/train data/lang_test_tgmed exp/tri2 exp/tri2_ali
+  utils/mkgraph.sh data/lang_test_tgmed exp/tri2_ali exp/tri2_ali/graph
+  steps/decode_fmllr.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri2_ali/graph data/test exp/tri2_ali/decode
+fi 
+
+# Add other features
+if [ $stage -le 7 ]; then
+  if [ $extra_features = true ]; then
+    # Add MMI
+    steps/make_denlats.sh --nj 20 --cmd "$train_cmd" data/train data/lang exp/tri2 exp/tri2_denlats
+    steps/train_mmi.sh data/train data/lang exp/tri2_ali exp/tri2_denlats exp/tri2_mmi
+    steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi/decode_it4
+    steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi/decode_it3
+    
+    # Add Boosting 
+    steps/train_mmi.sh --boost 0.05 data/train data/lang exp/tri2_ali exp/tri2_denlats exp/tri2_mmi_b0.05
+    steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi_b0.05/decode_it4
+    steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi_b0.05/decode_it3
+    
+    # Add MPE 
+    steps/train_mpe.sh data/train data/lang exp/tri2_ali exp/tri2_denlats exp/tri2_mpe
+    steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mpe/decode_it4
+    steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mpe/decode_it3
+  fi
+fi
+
+# Add SAT
+if [ $stage -le 8 ]; then 
+  # Do LDA+MLLT+SAT, and decode.
+  steps/train_sat.sh 1800 9000 data/train data/lang exp/tri2_ali exp/tri3
+  utils/mkgraph.sh data/lang_test_tgmed exp/tri3 exp/tri3/graph
+  steps/decode_fmllr.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri3/graph data/test exp/tri3/decode
+fi
+
+if [ $stage -le 9 ]; then
+  # Align all data with LDA+MLLT+SAT system (tri3)
+  steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" --use-graphs true data/train data/lang_test_tgmed exp/tri3 exp/tri3_ali
+  utils/mkgraph.sh data/lang_test_tgmed exp/tri3_ali exp/tri3_ali/graph   
+  steps/decode_fmllr.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri3_ali/graph data/test exp/tri3_ali/decode
+fi
+
+if [ $stage -le 10 ]; then 
+    # Uncomment reporting email option to get training progress updates by email
+  ./local/chain/run_tdnnf.sh --train_set train \
+      --test_sets test --gmm tri3  # --reporting_email $email 
+fi
+
+
+# Optional VTLN. Run if vtln is set to true
+if [ $stage -le 11 ]; then
+  if [ $vtln = true ]; then
+    ./local/vtln.sh
+    ./local/chain/run_tdnnf.sh --nnet3_affix vtln --train_set train_vtln \
+        --test_sets test_vtln --gmm tri5 # --reporting_email $email
+  fi
+fi
+
+# Collect and resport WER results for all models
+./local/sort_result.sh
diff --git a/egs/cmu_cslu_kids/s5/steps b/egs/cmu_cslu_kids/s5/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/cmu_cslu_kids/s5/utils b/egs/cmu_cslu_kids/s5/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/cnceleb/README.txt b/egs/cnceleb/README.txt
new file mode 100644
index 00000000000..db8789839a9
--- /dev/null
+++ b/egs/cnceleb/README.txt
@@ -0,0 +1,9 @@
+
+This directory contains example scripts for CN-Celeb speaker 
+verification. The CN-Celeb corpus is required, and can be 
+downloaded from Openslr http://www.openslr.org/82/ or from 
+CSLT@Tsinghua http://cslt.riit.tsinghua.edu.cn/~data/CN-Celeb/
+
+The subdirectories "v1" and so on are different speaker recognition 
+recipes. The recipe in v1 demonstrates a standard approach using a 
+full-covariance GMM-UBM, iVectors, and a PLDA backend.
diff --git a/egs/cnceleb/v1/README.txt b/egs/cnceleb/v1/README.txt
new file mode 100644
index 00000000000..dc5086f0b7a
--- /dev/null
+++ b/egs/cnceleb/v1/README.txt
@@ -0,0 +1,4 @@
+
+ This example demonstrates a traditional iVector system based on 
+ CN-Celeb dataset. 
+
diff --git a/egs/cnceleb/v1/cmd.sh b/egs/cnceleb/v1/cmd.sh
new file mode 100755
index 00000000000..d1ca1a6d126
--- /dev/null
+++ b/egs/cnceleb/v1/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 4G"
+
+
diff --git a/egs/cnceleb/v1/conf/mfcc.conf b/egs/cnceleb/v1/conf/mfcc.conf
new file mode 100644
index 00000000000..649cffb9de8
--- /dev/null
+++ b/egs/cnceleb/v1/conf/mfcc.conf
@@ -0,0 +1,7 @@
+--sample-frequency=16000
+--frame-length=25 # the default is 25
+--low-freq=20 # the default.
+--high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case).
+--num-mel-bins=30
+--num-ceps=24
+--snip-edges=false
diff --git a/egs/cnceleb/v1/conf/vad.conf b/egs/cnceleb/v1/conf/vad.conf
new file mode 100644
index 00000000000..a0ca2449b10
--- /dev/null
+++ b/egs/cnceleb/v1/conf/vad.conf
@@ -0,0 +1,2 @@
+--vad-energy-threshold=5.5
+--vad-energy-mean-scale=0.5
diff --git a/egs/cnceleb/v1/local/make_cnceleb.sh b/egs/cnceleb/v1/local/make_cnceleb.sh
new file mode 100755
index 00000000000..620c0dfe76a
--- /dev/null
+++ b/egs/cnceleb/v1/local/make_cnceleb.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# Copyright      2017  Ignacio Viñals
+#           2017-2018  David Snyder
+#                2019  Jiawen Kang
+#
+# This script prepares the CN-Celeb dataset. It creates separate directories
+# for train, eval enroll and eval test. It also prepares a trials files, in the eval test directory.
+
+if [  $# != 2 ]; then
+    echo "Usage: make_cnceleb.sh <CN-Celeb_PATH> <out_dir>"
+    echo "E.g.: make_cnceleb.sh /export/corpora/CN-Celeb data"
+    exit 1
+fi
+
+in_dir=$1
+out_dir=$2
+
+# Prepare the development data
+this_out_dir=${out_dir}/train
+mkdir -p $this_out_dir 2>/dev/null
+WAVFILE=$this_out_dir/wav.scp
+SPKFILE=$this_out_dir/utt2spk
+rm $WAVFILE $SPKFILE 2>/dev/null
+this_in_dir=${in_dir}/dev
+
+for spkr_id in `cat $this_in_dir/dev.lst`; do
+  for f in $in_dir/data/$spkr_id/*.wav; do
+    wav_id=$(basename $f | sed s:.wav$::)
+    echo "${spkr_id}-${wav_id} $f" >> $WAVFILE
+    echo "${spkr_id}-${wav_id} ${spkr_id}" >> $SPKFILE
+  done
+done
+utils/fix_data_dir.sh $this_out_dir
+
+# Prepare the evaluation data
+for mode in enroll test; do
+  this_out_dir=${out_dir}/eval_${mode}
+  mkdir -p $this_out_dir 2>/dev/null
+  WAVFILE=$this_out_dir/wav.scp
+  SPKFILE=$this_out_dir/utt2spk
+  rm $WAVFILE $SPKFILE 2>/dev/null
+  this_in_dir=${in_dir}/eval/${mode}
+
+  for f in $this_in_dir/*.wav; do
+    wav_id=$(basename $f | sed s:.wav$::)
+    spkr_id=$(echo ${wav_id} | cut -d "-" -f1)
+    echo "${wav_id} $f" >> $WAVFILE
+    echo "${wav_id} ${spkr_id}" >> $SPKFILE
+  done
+  utils/fix_data_dir.sh $this_out_dir
+done
+
+# Prepare test trials
+this_out_dir=$out_dir/eval_test/trials
+mkdir -p $out_dir/eval_test/trials
+this_in_dir=${in_dir}/eval/lists
+cat $this_in_dir/trials.lst | sed 's@-enroll@@g' | sed 's@test/@@g' | sed 's@.wav@@g' | \
+  awk '{if ($3 == "1")
+         {print $1,$2,"target"}
+       else
+         {print $1,$2,"nontarget"}
+       }'> $this_out_dir/trials.lst
+
diff --git a/egs/cnceleb/v1/path.sh b/egs/cnceleb/v1/path.sh
new file mode 100755
index 00000000000..e50f57c5271
--- /dev/null
+++ b/egs/cnceleb/v1/path.sh
@@ -0,0 +1,5 @@
+export KALDI_ROOT=`pwd`/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/cnceleb/v1/run.sh b/egs/cnceleb/v1/run.sh
new file mode 100755
index 00000000000..0afeddb8ffd
--- /dev/null
+++ b/egs/cnceleb/v1/run.sh
@@ -0,0 +1,133 @@
+#!/usr/bin/env bash
+# Copyright   2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
+#             2017   Johns Hopkins University (Author: Daniel Povey)
+#        2017-2018   David Snyder
+#             2018   Ewald Enzinger
+#             2019   Tsinghua University (Author: Jiawen Kang and Lantian Li)
+# Apache 2.0.
+#
+# This is an i-vector-based recipe for CN-Celeb database.
+# See ../README.txt for more info on data required. The recipe uses
+# CN-Celeb/dev for training the UBM, T matrix and PLDA, and CN-Celeb/eval
+# for evaluation. The results are reported in terms of EER and minDCF,
+# and are inline in the comments below.
+
+. ./cmd.sh
+. ./path.sh
+set -e
+mfccdir=`pwd`/mfcc
+vaddir=`pwd`/mfcc
+
+cnceleb_root=/export/corpora/CN-Celeb
+eval_trails_core=data/eval_test/trials/trials.lst
+
+stage=0
+
+if [ $stage -le 0 ]; then
+  # Prepare the CN-Celeb dataset. The script is used to prepare the development
+  # dataset and evaluation dataset.
+  local/make_cnceleb.sh $cnceleb_root data
+fi
+
+if [ $stage -le 1 ]; then
+  # Make MFCCs and compute the energy-based VAD for each dataset
+  for name in train eval_enroll eval_test; do
+    steps/make_mfcc.sh --write-utt2num-frames true --mfcc-config conf/mfcc.conf --nj 20 --cmd "$train_cmd" \
+      data/${name} exp/make_mfcc $mfccdir
+    utils/fix_data_dir.sh data/${name}
+    sid/compute_vad_decision.sh --nj 20 --cmd "$train_cmd" \
+      data/${name} exp/make_vad $vaddir
+    utils/fix_data_dir.sh data/${name}
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  # Train the UBM
+  sid/train_diag_ubm.sh --cmd "$train_cmd --mem 4G" \
+    --nj 20 --num-threads 8 \
+    data/train 2048 \
+    exp/diag_ubm
+
+  sid/train_full_ubm.sh --cmd "$train_cmd --mem 16G" \
+    --nj 20 --remove-low-count-gaussians false \
+    data/train \
+    exp/diag_ubm exp/full_ubm
+fi
+
+if [ $stage -le 3 ]; then
+  # Train the i-vector extractor.
+  sid/train_ivector_extractor.sh --nj 20 --cmd "$train_cmd --mem 16G" \
+    --ivector-dim 400 --num-iters 5 \
+    exp/full_ubm/final.ubm data/train \
+    exp/extractor
+fi
+
+if [ $stage -le 4 ]; then
+  # Note that there are over one-third of the utterances less than 2 seconds in our training set,
+  # and these short utterances are harmful for PLDA training. Therefore, to improve performance 
+  # of PLDA modeling and inference, we will combine the short utterances longer than 5 seconds.
+  utils/data/combine_short_segments.sh --speaker-only true \
+    data/train 5 data/train_comb
+  # Compute the energy-based VAD for train_comb
+  sid/compute_vad_decision.sh --nj 20 --cmd "$train_cmd" \
+    data/train_comb exp/make_vad $vaddir
+  utils/fix_data_dir.sh data/train_comb
+fi
+
+if [ $stage -le 5 ]; then
+  # These i-vectors will be used for mean-subtraction, LDA, and PLDA training.
+  sid/extract_ivectors.sh --cmd "$train_cmd --mem 4G" --nj 20 \
+    exp/extractor data/train_comb \
+    exp/ivectors_train_comb
+
+  # Extract i-vector for eval sets.
+  for name in eval_enroll eval_test; do
+    sid/extract_ivectors.sh --cmd "$train_cmd --mem 4G" --nj 10 \
+      exp/extractor data/$name \
+      exp/ivectors_$name
+  done
+fi
+
+if [ $stage -le 6 ]; then
+  # Compute the mean vector for centering the evaluation i-vectors.
+  $train_cmd exp/ivectors_train_comb/log/compute_mean.log \
+    ivector-mean scp:exp/ivectors_train_comb/ivector.scp \
+    exp/ivectors_train_comb/mean.vec || exit 1;
+
+  # This script uses LDA to decrease the dimensionality prior to PLDA.
+  lda_dim=150
+  $train_cmd exp/ivectors_train_comb/log/lda.log \
+    ivector-compute-lda --total-covariance-factor=0.0 --dim=$lda_dim \
+    "ark:ivector-subtract-global-mean scp:exp/ivectors_train_comb/ivector.scp ark:- |" \
+    ark:data/train_comb/utt2spk exp/ivectors_train_comb/transform.mat || exit 1;
+
+  # Train the PLDA model.
+  $train_cmd exp/ivectors_train_comb/log/plda.log \
+    ivector-compute-plda ark:data/train_comb/spk2utt \
+    "ark:ivector-subtract-global-mean scp:exp/ivectors_train_comb/ivector.scp ark:- | transform-vec exp/ivectors_train_comb/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
+    exp/ivectors_train_comb/plda || exit 1;
+
+fi
+
+if [ $stage -le 7 ]; then
+  # Compute PLDA scores for CN-Celeb eval core trials
+  $train_cmd exp/scores/log/cnceleb_eval_scoring.log \
+    ivector-plda-scoring --normalize-length=true \
+    --num-utts=ark:exp/ivectors_eval_enroll/num_utts.ark \
+    "ivector-copy-plda --smoothing=0.0 exp/ivectors_train_comb/plda - |" \
+    "ark:ivector-mean ark:data/eval_enroll/spk2utt scp:exp/ivectors_eval_enroll/ivector.scp ark:- | ivector-subtract-global-mean exp/ivectors_train_comb/mean.vec ark:- ark:- | transform-vec exp/ivectors_train_comb/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
+    "ark:ivector-subtract-global-mean exp/ivectors_train_comb/mean.vec scp:exp/ivectors_eval_test/ivector.scp ark:- | transform-vec exp/ivectors_train_comb/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
+    "cat '$eval_trails_core' | cut -d\  --fields=1,2 |" exp/scores/cnceleb_eval_scores || exit 1;
+
+  # CN-Celeb Eval Core:
+  # EER: 13.91%
+  # minDCF(p-target=0.01): 0.6530
+  # minDCF(p-target=0.001): 0.7521
+  echo -e "\nCN-Celeb Eval Core:";
+  eer=$(paste $eval_trails_core exp/scores/cnceleb_eval_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
+  mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/cnceleb_eval_scores $eval_trails_core 2> /dev/null`
+  mindcf2=`sid/compute_min_dcf.py --p-target 0.001 exp/scores/cnceleb_eval_scores $eval_trails_core 2> /dev/null`
+  echo "EER: $eer%"
+  echo "minDCF(p-target=0.01): $mindcf1"
+  echo "minDCF(p-target=0.001): $mindcf2"
+fi
diff --git a/egs/cnceleb/v1/sid b/egs/cnceleb/v1/sid
new file mode 120000
index 00000000000..893a12f30c9
--- /dev/null
+++ b/egs/cnceleb/v1/sid
@@ -0,0 +1 @@
+../../sre08/v1/sid
\ No newline at end of file
diff --git a/egs/cnceleb/v1/steps b/egs/cnceleb/v1/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/cnceleb/v1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/cnceleb/v1/utils b/egs/cnceleb/v1/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/cnceleb/v1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/commonvoice/s5/local/chain/compare_wer.sh b/egs/commonvoice/s5/local/chain/compare_wer.sh
index 133b6b5d250..217ec057a1a 100755
--- a/egs/commonvoice/s5/local/chain/compare_wer.sh
+++ b/egs/commonvoice/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copied from egs/mini_librispeech/s5/local/chain/compare_wer.sh (commit 87d95c5efff7da3b6f04e719a96de4204a367f8b)
 
diff --git a/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh
index d4acd0fed4b..68cb129d9ed 100755
--- a/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Adapted from egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh
 
diff --git a/egs/commonvoice/s5/local/download_and_untar.sh b/egs/commonvoice/s5/local/download_and_untar.sh
index 5590b36486d..23650b1d475 100755
--- a/egs/commonvoice/s5/local/download_and_untar.sh
+++ b/egs/commonvoice/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 #             2017  Luminar Technologies, Inc. (author: Daniel Galvez)
diff --git a/egs/commonvoice/s5/local/nnet3/compare_wer.sh b/egs/commonvoice/s5/local/nnet3/compare_wer.sh
index 17cbe1bbcef..5e3c0b0f9bd 100755
--- a/egs/commonvoice/s5/local/nnet3/compare_wer.sh
+++ b/egs/commonvoice/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copied from egs/mini_librispeech/s5/local/nnet3/compare_wer.sh (commit 87d95c5efff7da3b6f04e719a96de4204a367f8b)
 
diff --git a/egs/commonvoice/s5/local/nnet3/run_ivector_common.sh b/egs/commonvoice/s5/local/nnet3/run_ivector_common.sh
index 5560120677e..ab43a277218 100755
--- a/egs/commonvoice/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/commonvoice/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Adapted from egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh (commit 92c99ee51caeba4be7c5ab39ea7c1d6100f3d67b)
 
diff --git a/egs/commonvoice/s5/local/prepare_dict.sh b/egs/commonvoice/s5/local/prepare_dict.sh
index cdfffe42080..670dc972d26 100755
--- a/egs/commonvoice/s5/local/prepare_dict.sh
+++ b/egs/commonvoice/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012   Vassil Panayotov
 #           2017   Ewald Enzinger
diff --git a/egs/commonvoice/s5/local/prepare_lm.sh b/egs/commonvoice/s5/local/prepare_lm.sh
index 8bd689ac73e..86d9df1a996 100755
--- a/egs/commonvoice/s5/local/prepare_lm.sh
+++ b/egs/commonvoice/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Vassil Panayotov
 #           2017  Ewald Enzinger
diff --git a/egs/commonvoice/s5/run.sh b/egs/commonvoice/s5/run.sh
index 3e0e46c89f1..6065857ade2 100755
--- a/egs/commonvoice/s5/run.sh
+++ b/egs/commonvoice/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Recipe for Mozilla Common Voice corpus v1
 #
diff --git a/egs/csj/s5/local/chain/compare_wer.sh b/egs/csj/s5/local/chain/compare_wer.sh
index d7017a51a71..3076d394f41 100644
--- a/egs/csj/s5/local/chain/compare_wer.sh
+++ b/egs/csj/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn{1a,1b}
diff --git a/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh
index 75ceb80e3e0..b622b9eb262 100755
--- a/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is a basic TDNN experiment.(As the speed_perturbation is done by default,
 # the _sp suffix on the directory name is removed.)
diff --git a/egs/csj/s5/local/csj_data_prep.sh b/egs/csj/s5/local/csj_data_prep.sh
index 69e2865e316..b3fe966a4e2 100755
--- a/egs/csj/s5/local/csj_data_prep.sh
+++ b/egs/csj/s5/local/csj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
 #            2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
diff --git a/egs/csj/s5/local/csj_eval_data_prep.sh b/egs/csj/s5/local/csj_eval_data_prep.sh
index c452ee9f239..18d2fa5422c 100755
--- a/egs/csj/s5/local/csj_eval_data_prep.sh
+++ b/egs/csj/s5/local/csj_eval_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
 #            2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
diff --git a/egs/csj/s5/local/csj_prepare_dict.sh b/egs/csj/s5/local/csj_prepare_dict.sh
index 5620b7d99af..a2ff0c2dcc7 100755
--- a/egs/csj/s5/local/csj_prepare_dict.sh
+++ b/egs/csj/s5/local/csj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Making dictionary using CSJ data with morpheme analysis.
 # from the one in Kaldi s5 recipe in that it uses lower-case --Arnab (Jan 2013)
diff --git a/egs/csj/s5/local/csj_run_rnnlm.sh b/egs/csj/s5/local/csj_run_rnnlm.sh
index b3cb79478b1..53d50acfcc1 100755
--- a/egs/csj/s5/local/csj_run_rnnlm.sh
+++ b/egs/csj/s5/local/csj_run_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2016 Tokyo Institute of Technology (Authors: Tomohiro Tanaka, Takafumi Moriya and Takahiro Shinozaki)
 #            2016 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
diff --git a/egs/csj/s5/local/nnet/run_dnn.sh b/egs/csj/s5/local/nnet/run_dnn.sh
index 54b1da88fd3..d615b55674f 100755
--- a/egs/csj/s5/local/nnet/run_dnn.sh
+++ b/egs/csj/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh b/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh
index 297aed1f486..a405f13095b 100755
--- a/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh
+++ b/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 2016 Modified by Takafumi Moriya at Tokyo Institute of Technology
 # for Japanese speech recognition using CSJ.
diff --git a/egs/csj/s5/local/nnet/run_lstm.sh b/egs/csj/s5/local/nnet/run_lstm.sh
index dc0f40dec24..17af4a1ed85 100755
--- a/egs/csj/s5/local/nnet/run_lstm.sh
+++ b/egs/csj/s5/local/nnet/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 2016 Modified by Takafumi Moriya at Tokyo Institute of Technology
 # for Japanese speech recognition using CSJ.
diff --git a/egs/csj/s5/local/nnet3/run_ivector_common.sh b/egs/csj/s5/local/nnet3/run_ivector_common.sh
index 9c6b02b6e59..8ea140869fd 100755
--- a/egs/csj/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/csj/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/csj/s5/local/nnet3/run_tdnn.sh b/egs/csj/s5/local/nnet3/run_tdnn.sh
index e656b825517..f14f216d51b 100755
--- a/egs/csj/s5/local/nnet3/run_tdnn.sh
+++ b/egs/csj/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is modified from swbd/s5c/local/nnet3/run_tdnn.sh
 # Tomohiro Tanaka 15/05/2016
diff --git a/egs/csj/s5/local/run_mmi.sh b/egs/csj/s5/local/run_mmi.sh
index 1f844f697c3..7f44b89633a 100644
--- a/egs/csj/s5/local/run_mmi.sh
+++ b/egs/csj/s5/local/run_mmi.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . cmd.sh
 
diff --git a/egs/csj/s5/local/run_sgmm2.sh b/egs/csj/s5/local/run_sgmm2.sh
index c66b43c4f7f..66477a9c7b1 100755
--- a/egs/csj/s5/local/run_sgmm2.sh
+++ b/egs/csj/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 . ./path.sh
diff --git a/egs/csj/s5/local/score_csj.sh b/egs/csj/s5/local/score_csj.sh
index 91cc944e0b5..92e28adf8b3 100755
--- a/egs/csj/s5/local/score_csj.sh
+++ b/egs/csj/s5/local/score_csj.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # Modified by Takafumi Moriya for Japanese speech recognition using CSJ.
diff --git a/egs/csj/s5/run.sh b/egs/csj/s5/run.sh
index aaf2e51313e..dde2197666b 100755
--- a/egs/csj/s5/run.sh
+++ b/egs/csj/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2015 Tokyo Institute of Technology
 #                 (Authors: Takafumi Moriya, Tomohiro Tanaka and Takahiro Shinozaki)
diff --git a/egs/dihard_2018/v1/local/make_dihard_2018_dev.sh b/egs/dihard_2018/v1/local/make_dihard_2018_dev.sh
index cc48e2e792a..161bcd4d5f2 100755
--- a/egs/dihard_2018/v1/local/make_dihard_2018_dev.sh
+++ b/egs/dihard_2018/v1/local/make_dihard_2018_dev.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018   Zili Huang
 # Apache 2.0.
 #
diff --git a/egs/dihard_2018/v1/local/make_dihard_2018_eval.sh b/egs/dihard_2018/v1/local/make_dihard_2018_eval.sh
index 0a461c635ec..86bb0c31bb7 100755
--- a/egs/dihard_2018/v1/local/make_dihard_2018_eval.sh
+++ b/egs/dihard_2018/v1/local/make_dihard_2018_eval.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018   Zili Huang
 # Apache 2.0.
 #
diff --git a/egs/dihard_2018/v1/local/prepare_feats.sh b/egs/dihard_2018/v1/local/prepare_feats.sh
index 9fa70a2d91e..d9e0c2b49c9 100755
--- a/egs/dihard_2018/v1/local/prepare_feats.sh
+++ b/egs/dihard_2018/v1/local/prepare_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Apache 2.0.
 
diff --git a/egs/dihard_2018/v1/run.sh b/egs/dihard_2018/v1/run.sh
index eb23ac500cd..be7cc9a6df6 100755
--- a/egs/dihard_2018/v1/run.sh
+++ b/egs/dihard_2018/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #             2017   Johns Hopkins University (Author: Daniel Povey)
 #        2017-2018   David Snyder
diff --git a/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats.sh b/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats.sh
index 4ad2c42d8b9..19d202bc0dd 100755
--- a/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats.sh
+++ b/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Apache 2.0.
 
diff --git a/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats_for_egs.sh
index 1d8ac6153e7..cf0519aded9 100755
--- a/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats_for_egs.sh
+++ b/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copied from egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh (commit 3ea534070fd2cccd2e4ee21772132230033022ce).
 #
diff --git a/egs/dihard_2018/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/dihard_2018/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh
index 4ee472b1c71..7e39b8a2110 100755
--- a/egs/dihard_2018/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh
+++ b/egs/dihard_2018/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright      2017   David Snyder
 #                2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2017   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/dihard_2018/v2/run.sh b/egs/dihard_2018/v2/run.sh
index 6cd6630a838..542fc0930dd 100755
--- a/egs/dihard_2018/v2/run.sh
+++ b/egs/dihard_2018/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #             2017   Johns Hopkins University (Author: Daniel Povey)
 #        2017-2018   David Snyder
diff --git a/egs/fame/s5/local/fame_data_prep.sh b/egs/fame/s5/local/fame_data_prep.sh
index 11c28c1d130..fb5ae71e4bf 100755
--- a/egs/fame/s5/local/fame_data_prep.sh
+++ b/egs/fame/s5/local/fame_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Copyright 2016  Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/s5/local/fame_dict_prep.sh b/egs/fame/s5/local/fame_dict_prep.sh
index 95b5d846e6a..2202b1adf91 100755
--- a/egs/fame/s5/local/fame_dict_prep.sh
+++ b/egs/fame/s5/local/fame_dict_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Copyright 2016  Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/s5/local/nnet/run_dnn.sh b/egs/fame/s5/local/nnet/run_dnn.sh
index ca1efa5e0ac..80c877dc50e 100755
--- a/egs/fame/s5/local/nnet/run_dnn.sh
+++ b/egs/fame/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Copyright 2016  Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/s5/local/nnet/run_dnn_fbank.sh b/egs/fame/s5/local/nnet/run_dnn_fbank.sh
index a81449ffbcf..9d068c71455 100755
--- a/egs/fame/s5/local/nnet/run_dnn_fbank.sh
+++ b/egs/fame/s5/local/nnet/run_dnn_fbank.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Copyright 2016  Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/s5/local/wer_hyp_filter b/egs/fame/s5/local/wer_hyp_filter
index 372d1a9c73a..f1fb43d135d 100755
--- a/egs/fame/s5/local/wer_hyp_filter
+++ b/egs/fame/s5/local/wer_hyp_filter
@@ -1,2 +1,4 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
 s:<UNK>::g
+'
diff --git a/egs/fame/s5/local/wer_output_filter b/egs/fame/s5/local/wer_output_filter
index 372d1a9c73a..f1fb43d135d 100755
--- a/egs/fame/s5/local/wer_output_filter
+++ b/egs/fame/s5/local/wer_output_filter
@@ -1,2 +1,4 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
 s:<UNK>::g
+'
diff --git a/egs/fame/s5/local/wer_ref_filter b/egs/fame/s5/local/wer_ref_filter
index 372d1a9c73a..f1fb43d135d 100755
--- a/egs/fame/s5/local/wer_ref_filter
+++ b/egs/fame/s5/local/wer_ref_filter
@@ -1,2 +1,4 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
 s:<UNK>::g
+'
diff --git a/egs/fame/s5/run.sh b/egs/fame/s5/run.sh
index de6fe46b7c4..a8ed7bbd917 100755
--- a/egs/fame/s5/run.sh
+++ b/egs/fame/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 . ./path.sh
diff --git a/egs/fame/v1/local/dnn/run_nnet2_common.sh b/egs/fame/v1/local/dnn/run_nnet2_common.sh
index df5804d7d78..54223c76d71 100755
--- a/egs/fame/v1/local/dnn/run_nnet2_common.sh
+++ b/egs/fame/v1/local/dnn/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Make the features.
 
diff --git a/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh b/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh
index bba54c5583f..c9523f0e920 100755
--- a/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh
+++ b/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017  Radboud University (Author: Emre Yilmaz)
 #
 # This script is based on run_nnet2_multisplice.sh in
diff --git a/egs/fame/v1/local/dnn/train_dnn.sh b/egs/fame/v1/local/dnn/train_dnn.sh
index 7155f32d6a4..c211feb7173 100755
--- a/egs/fame/v1/local/dnn/train_dnn.sh
+++ b/egs/fame/v1/local/dnn/train_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017  Radboud University (Author: Emre Yilmaz)
 
 . ./cmd.sh
diff --git a/egs/fame/v1/local/fame_data_prep.sh b/egs/fame/v1/local/fame_data_prep.sh
index bbe30976dd5..996b32fe82d 100755
--- a/egs/fame/v1/local/fame_data_prep.sh
+++ b/egs/fame/v1/local/fame_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Copyright 2017  Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/v1/local/fame_dict_prep.sh b/egs/fame/v1/local/fame_dict_prep.sh
index 122c34c837b..52c1f5d32f4 100755
--- a/egs/fame/v1/local/fame_dict_prep.sh
+++ b/egs/fame/v1/local/fame_dict_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Copyright 2016  Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/v1/local/plda_scoring.sh b/egs/fame/v1/local/plda_scoring.sh
index 63d4a4f0d4c..d3e9c0c0e9d 100755
--- a/egs/fame/v1/local/plda_scoring.sh
+++ b/egs/fame/v1/local/plda_scoring.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/fame/v1/local/prepare_train.sh b/egs/fame/v1/local/prepare_train.sh
index 0a3979dd6a7..c1d91b85645 100755
--- a/egs/fame/v1/local/prepare_train.sh
+++ b/egs/fame/v1/local/prepare_train.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Copyright 2017  Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/v1/local/scoring_common.sh b/egs/fame/v1/local/scoring_common.sh
index 63950ae5711..0d9f74f6600 100755
--- a/egs/fame/v1/local/scoring_common.sh
+++ b/egs/fame/v1/local/scoring_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 # Copyright 2017   Emre Yilmaz (Adapted)
 # Apache 2.0.
diff --git a/egs/fame/v1/run.sh b/egs/fame/v1/run.sh
index 34c425adcf7..8a1c6334e55 100755
--- a/egs/fame/v1/run.sh
+++ b/egs/fame/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 #           2015   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #           2015   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/fame/v2/run.sh b/egs/fame/v2/run.sh
index 43fb5a275de..d3bb35a64f4 100755
--- a/egs/fame/v2/run.sh
+++ b/egs/fame/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016   David Snyder
 #                2015   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2015   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/farsdat/s5/local/farsdat_data_prep.sh b/egs/farsdat/s5/local/farsdat_data_prep.sh
index 0a086c535d2..9c114501537 100755
--- a/egs/farsdat/s5/local/farsdat_data_prep.sh
+++ b/egs/farsdat/s5/local/farsdat_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Copyright 2014 Univercity of Tehran (Author: Bagher BabaAli)
diff --git a/egs/farsdat/s5/local/farsdat_format_data.sh b/egs/farsdat/s5/local/farsdat_format_data.sh
index 8e565f11fd0..2415f163bf8 100644
--- a/egs/farsdat/s5/local/farsdat_format_data.sh
+++ b/egs/farsdat/s5/local/farsdat_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  (Author: Daniel Povey, Bagher BabaAli)
 # Apache 2.0
diff --git a/egs/farsdat/s5/local/farsdat_norm_trans.sh b/egs/farsdat/s5/local/farsdat_norm_trans.sh
index 369843c95a6..7bb53f1aeed 100755
--- a/egs/farsdat/s5/local/farsdat_norm_trans.sh
+++ b/egs/farsdat/s5/local/farsdat_norm_trans.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014   University of Tehran (Author: Bagher BabaAli)
 # Apache 2.0.
diff --git a/egs/farsdat/s5/local/farsdat_prepare_dict.sh b/egs/farsdat/s5/local/farsdat_prepare_dict.sh
index af03e4f44b3..6d2730b6937 100755
--- a/egs/farsdat/s5/local/farsdat_prepare_dict.sh
+++ b/egs/farsdat/s5/local/farsdat_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013   (Authors: Daniel Povey, Bagher BabaAli)
 
diff --git a/egs/farsdat/s5/local/farsdat_prepare_lm.sh b/egs/farsdat/s5/local/farsdat_prepare_lm.sh
index c04f756d438..6476fa3b168 100755
--- a/egs/farsdat/s5/local/farsdat_prepare_lm.sh
+++ b/egs/farsdat/s5/local/farsdat_prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/farsdat/s5/local/nnet/run_dnn.sh b/egs/farsdat/s5/local/nnet/run_dnn.sh
index a02894a7322..19f49ad6bd2 100755
--- a/egs/farsdat/s5/local/nnet/run_dnn.sh
+++ b/egs/farsdat/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/farsdat/s5/local/score_sclite.sh b/egs/farsdat/s5/local/score_sclite.sh
index 6269f7c494b..90cb91ad33c 100755
--- a/egs/farsdat/s5/local/score_sclite.sh
+++ b/egs/farsdat/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #           2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/farsdat/s5/run.sh b/egs/farsdat/s5/run.sh
index 4c3d3c5882b..a08bd8af308 100755
--- a/egs/farsdat/s5/run.sh
+++ b/egs/farsdat/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Copyright 2014 University of Tehran (Author: Bagher BabaAli)
diff --git a/egs/fisher_callhome_spanish/s5/local/callhome_data_prep.sh b/egs/fisher_callhome_spanish/s5/local/callhome_data_prep.sh
index f61b0fa9519..31202c5a406 100755
--- a/egs/fisher_callhome_spanish/s5/local/callhome_data_prep.sh
+++ b/egs/fisher_callhome_spanish/s5/local/callhome_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2014  Gaurav Kumar.   Apache 2.0
 # The input is the Callhome Spanish Dataset. (*.sph files)
diff --git a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh
index 7f407552c2e..087f95c5392 100755
--- a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh
+++ b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1g is like 1f but upgrading to a "resnet-style TDNN-F model", i.e.
 #   with bypass resnet connections, and re-tuned.
diff --git a/egs/fisher_callhome_spanish/s5/local/fsp_create_test_lang.sh b/egs/fisher_callhome_spanish/s5/local/fsp_create_test_lang.sh
index fb765b57e69..195b9f25713 100755
--- a/egs/fisher_callhome_spanish/s5/local/fsp_create_test_lang.sh
+++ b/egs/fisher_callhome_spanish/s5/local/fsp_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Gaurav Kumar.   Apache 2.0
 #
 
diff --git a/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh b/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh
index 11d65da3e95..0bcd4abca8b 100755
--- a/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh
+++ b/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2014  Gaurav Kumar.   Apache 2.0
 # The input is the Fisher Dataset which contains DISC1 and DISC2. (*.sph files)
diff --git a/egs/fisher_callhome_spanish/s5/local/fsp_train_lms.sh b/egs/fisher_callhome_spanish/s5/local/fsp_train_lms.sh
index cebf3b222ab..c839dd16255 100755
--- a/egs/fisher_callhome_spanish/s5/local/fsp_train_lms.sh
+++ b/egs/fisher_callhome_spanish/s5/local/fsp_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Gaurav Kumar.   Apache 2.0
 
 # To be run from one level above this directory
diff --git a/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh b/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh
index cc9de4d26c5..4204e94fad9 100755
--- a/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh b/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh
index 3713fe228d6..300d54b0945 100755
--- a/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh
+++ b/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/fisher_callhome_spanish/s5/local/run_sgmm2x.sh b/egs/fisher_callhome_spanish/s5/local/run_sgmm2x.sh
index 9148b1f1171..7b9d1cc7ab3 100755
--- a/egs/fisher_callhome_spanish/s5/local/run_sgmm2x.sh
+++ b/egs/fisher_callhome_spanish/s5/local/run_sgmm2x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Gaurav Kumar.   Apache 2.0
 
 # This is as run_sgmm2.sh but excluding the "speaker-dependent weights",
diff --git a/egs/fisher_callhome_spanish/s5/local/subset_data_prep.sh b/egs/fisher_callhome_spanish/s5/local/subset_data_prep.sh
index 9f5855d56c4..433201609cb 100755
--- a/egs/fisher_callhome_spanish/s5/local/subset_data_prep.sh
+++ b/egs/fisher_callhome_spanish/s5/local/subset_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2014  Gaurav Kumar.   Apache 2.0
 # The input is a subset of the dataset in use. (*.sph files) 
diff --git a/egs/fisher_callhome_spanish/s5/local/wer_output_filter b/egs/fisher_callhome_spanish/s5/local/wer_output_filter
index 4fce42945b3..2c52ee3fd23 100755
--- a/egs/fisher_callhome_spanish/s5/local/wer_output_filter
+++ b/egs/fisher_callhome_spanish/s5/local/wer_output_filter
@@ -1,5 +1,7 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
 s:\[laughter\]::g
 s:\[noise\]::g
 s:\[oov\]::g
 s:<UNK>::g
+'
diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh
index 6e2752a7b68..c8e6adab40f 100755
--- a/egs/fisher_callhome_spanish/s5/run.sh
+++ b/egs/fisher_callhome_spanish/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2018  Nagendra Goel, Saikiran Valluri  Apache 2.0
 # Copyright 2014  Gaurav Kumar.   Apache 2.0
diff --git a/egs/fisher_english/s5/local/chain/compare_wer_general.sh b/egs/fisher_english/s5/local/chain/compare_wer_general.sh
index 2f724c8ff81..4d80fb90687 100755
--- a/egs/fisher_english/s5/local/chain/compare_wer_general.sh
+++ b/egs/fisher_english/s5/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_{c,d}_sp
diff --git a/egs/fisher_english/s5/local/chain/run_tdnn.sh b/egs/fisher_english/s5/local/chain/run_tdnn.sh
index 1fd0f1fdf3a..1df1a4555c5 100755
--- a/egs/fisher_english/s5/local/chain/run_tdnn.sh
+++ b/egs/fisher_english/s5/local/chain/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Based on run_tdnn_7b.sh in the fisher swbd recipe
diff --git a/egs/fisher_english/s5/local/fisher_create_test_lang.sh b/egs/fisher_english/s5/local/fisher_create_test_lang.sh
index ac3e16c9c78..3202589a16a 100755
--- a/egs/fisher_english/s5/local/fisher_create_test_lang.sh
+++ b/egs/fisher_english/s5/local/fisher_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script formats ARPA LM into G.fst.
 
diff --git a/egs/fisher_english/s5/local/fisher_data_prep.sh b/egs/fisher_english/s5/local/fisher_data_prep.sh
index f3ad3c3f5bd..900ee385768 100755
--- a/egs/fisher_english/s5/local/fisher_data_prep.sh
+++ b/egs/fisher_english/s5/local/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/fisher_english/s5/local/fisher_prepare_dict.sh b/egs/fisher_english/s5/local/fisher_prepare_dict.sh
index f52ec61823a..bd6b6f45b67 100755
--- a/egs/fisher_english/s5/local/fisher_prepare_dict.sh
+++ b/egs/fisher_english/s5/local/fisher_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 # To be run from one directory above this script.
diff --git a/egs/fisher_english/s5/local/fisher_train_lms.sh b/egs/fisher_english/s5/local/fisher_train_lms.sh
index 881d3ce9466..d1c8b1e9714 100755
--- a/egs/fisher_english/s5/local/fisher_train_lms.sh
+++ b/egs/fisher_english/s5/local/fisher_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/fisher_english/s5/local/fisher_train_lms_pocolm.sh b/egs/fisher_english/s5/local/fisher_train_lms_pocolm.sh
index 906703953a1..570339104f7 100755
--- a/egs/fisher_english/s5/local/fisher_train_lms_pocolm.sh
+++ b/egs/fisher_english/s5/local/fisher_train_lms_pocolm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh b/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh
index 210d0f5646f..8e4218f34ff 100755
--- a/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh
+++ b/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # this (local/nnet2/run_6c_gpu.sh) trains a p-norm neural network on top of
diff --git a/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh b/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh
index b203f9638b4..d8c3945a5d9 100755
--- a/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Hossein Hadian
 #           2017  Vimal Manohar
diff --git a/egs/fisher_english/s5/local/online/run_nnet2.sh b/egs/fisher_english/s5/local/online/run_nnet2.sh
index 2204a16e363..fbd0953f42c 100755
--- a/egs/fisher_english/s5/local/online/run_nnet2.sh
+++ b/egs/fisher_english/s5/local/online/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/fisher_english/s5/local/online/run_nnet2_b.sh b/egs/fisher_english/s5/local/online/run_nnet2_b.sh
index 512f35d8043..2664fe8a718 100755
--- a/egs/fisher_english/s5/local/online/run_nnet2_b.sh
+++ b/egs/fisher_english/s5/local/online/run_nnet2_b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script run_nnet2_b.sh is as run_nnet2.sh but it trains a larger network,
 # with 5 instead of 4 hidden layers and p-norm (input,output) dims of
diff --git a/egs/fisher_english/s5/local/online/run_nnet2_common.sh b/egs/fisher_english/s5/local/online/run_nnet2_common.sh
index 5a23e6b32da..af3f9f25695 100755
--- a/egs/fisher_english/s5/local/online/run_nnet2_common.sh
+++ b/egs/fisher_english/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Make the features, build the iVector extractor
 
diff --git a/egs/fisher_english/s5/local/online/run_nnet2_discriminative.sh b/egs/fisher_english/s5/local/online/run_nnet2_discriminative.sh
index 8c85a989fdd..25d618d2674 100755
--- a/egs/fisher_english/s5/local/online/run_nnet2_discriminative.sh
+++ b/egs/fisher_english/s5/local/online/run_nnet2_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is to be run after run_nnet2.sh
 
diff --git a/egs/fisher_english/s5/local/online/run_nnet2_multisplice.sh b/egs/fisher_english/s5/local/online/run_nnet2_multisplice.sh
index b5361f2f8d8..c8ad9d8d1de 100755
--- a/egs/fisher_english/s5/local/online/run_nnet2_multisplice.sh
+++ b/egs/fisher_english/s5/local/online/run_nnet2_multisplice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/fisher_english/s5/local/run_data_cleaning.sh b/egs/fisher_english/s5/local/run_data_cleaning.sh
index 68b752ad577..7154da73e27 100755
--- a/egs/fisher_english/s5/local/run_data_cleaning.sh
+++ b/egs/fisher_english/s5/local/run_data_cleaning.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script shows how you can do data-cleaning, and exclude data that has a
diff --git a/egs/fisher_english/s5/local/run_nnet2.sh b/egs/fisher_english/s5/local/run_nnet2.sh
index 359b6c5afdf..6f971129878 100755
--- a/egs/fisher_english/s5/local/run_nnet2.sh
+++ b/egs/fisher_english/s5/local/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This shows what you can potentially run; you'd probably want to pick and choose.
diff --git a/egs/fisher_english/s5/local/run_unk_model.sh b/egs/fisher_english/s5/local/run_unk_model.sh
index 1fe658bda79..1894acf7bdb 100755
--- a/egs/fisher_english/s5/local/run_unk_model.sh
+++ b/egs/fisher_english/s5/local/run_unk_model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh
index 07636a8b3c8..24c64d84ba9 100644
--- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
index b1c133942ef..d934db2929d 100755
--- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh
index 04244014502..66ff7363a0e 100755
--- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/fisher_english/s5/local/semisup/run_100k.sh b/egs/fisher_english/s5/local/semisup/run_100k.sh
index 7657e94b7f2..0d93820d865 100644
--- a/egs/fisher_english/s5/local/semisup/run_100k.sh
+++ b/egs/fisher_english/s5/local/semisup/run_100k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/fisher_english/s5/local/semisup/run_50k.sh b/egs/fisher_english/s5/local/semisup/run_50k.sh
index c2a5c0db7e7..27fb54d7659 100644
--- a/egs/fisher_english/s5/local/semisup/run_50k.sh
+++ b/egs/fisher_english/s5/local/semisup/run_50k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/fisher_english/s5/run.sh b/egs/fisher_english/s5/run.sh
index 67c0d5ce638..256cc7f850f 100755
--- a/egs/fisher_english/s5/run.sh
+++ b/egs/fisher_english/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # It's best to run the commands in this one by one.
 
diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh
index 66f87c8da8f..ba116ca472e 100755
--- a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # based on run_tdnn_6h.sh
 
diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh
index c12f604f26b..87a77127a21 100755
--- a/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
 
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh
index 543f753bd4e..eea6efea2b9 100755
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh
index efcd1eced4a..8d31a580ea1 100644
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
 
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh
index e4a555abfdd..ad6139e909c 100644
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
 
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh
index 5650cedca28..52d8e2b7ae1 100755
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
 
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh
index 5beb2e74a9a..cab547fc1e4 100644
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2018  Nagendra Kumar Goel,
 #            Saikiran Valluri, Govivace.Inc -  Apache 2.0
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh
index f3cc869e6de..37ab5ba5e4c 100755
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
 
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh
index 059a81e15fc..73344f74995 100755
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
 
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh
index d86b699d6f6..613c4fde163 100755
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
 
diff --git a/egs/fisher_swbd/s5/local/eval2000_data_prep.sh b/egs/fisher_swbd/s5/local/eval2000_data_prep.sh
index 533b5ecf46e..17e069964ff 100755
--- a/egs/fisher_swbd/s5/local/eval2000_data_prep.sh
+++ b/egs/fisher_swbd/s5/local/eval2000_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Hub-5 Eval 2000 data preparation 
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/fisher_swbd/s5/local/fisher_create_test_lang.sh b/egs/fisher_swbd/s5/local/fisher_create_test_lang.sh
index f0926d2ceab..250257e071d 100755
--- a/egs/fisher_swbd/s5/local/fisher_create_test_lang.sh
+++ b/egs/fisher_swbd/s5/local/fisher_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/fisher_swbd/s5/local/fisher_create_test_lang_fsh.sh b/egs/fisher_swbd/s5/local/fisher_create_test_lang_fsh.sh
index 44291ddace9..e1c174c135f 100755
--- a/egs/fisher_swbd/s5/local/fisher_create_test_lang_fsh.sh
+++ b/egs/fisher_swbd/s5/local/fisher_create_test_lang_fsh.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/fisher_swbd/s5/local/fisher_data_prep.sh b/egs/fisher_swbd/s5/local/fisher_data_prep.sh
index 186f7d7e122..d8cd7bc1ba3 100755
--- a/egs/fisher_swbd/s5/local/fisher_data_prep.sh
+++ b/egs/fisher_swbd/s5/local/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh b/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh
index ddc70295e40..3b7148d46ce 100755
--- a/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh
+++ b/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 # To be run from one directory above this script.
diff --git a/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh b/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh
index bae19603285..90786b78209 100755
--- a/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh
+++ b/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 # To be run from one directory above this script.
diff --git a/egs/fisher_swbd/s5/local/fisher_train_lms.sh b/egs/fisher_swbd/s5/local/fisher_train_lms.sh
index 7df4353338e..3cf89d4c48e 100755
--- a/egs/fisher_swbd/s5/local/fisher_train_lms.sh
+++ b/egs/fisher_swbd/s5/local/fisher_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/fisher_swbd/s5/local/fisher_train_lms_fsh.sh b/egs/fisher_swbd/s5/local/fisher_train_lms_fsh.sh
index c9df2b72f0d..46418100f07 100755
--- a/egs/fisher_swbd/s5/local/fisher_train_lms_fsh.sh
+++ b/egs/fisher_swbd/s5/local/fisher_train_lms_fsh.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
@@ -31,7 +31,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
    echo Downloading and installing the kaldi_lm tools
    if [ ! -f kaldi_lm.tar.gz ]; then
      wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz ||
-     wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
+     wget -c http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
    fi
    tar -xvzf kaldi_lm.tar.gz || exit 1;
    cd kaldi_lm
diff --git a/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh b/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh
index 01c988709f1..b711d2e9304 100755
--- a/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e
diff --git a/egs/fisher_swbd/s5/local/online/run_nnet2_common.sh b/egs/fisher_swbd/s5/local/online/run_nnet2_common.sh
index ad2a54bd8bf..e7cff4dd606 100755
--- a/egs/fisher_swbd/s5/local/online/run_nnet2_common.sh
+++ b/egs/fisher_swbd/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Make the features, build the iVector extractor
 
diff --git a/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh b/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh
index 91dc19a1e71..ad1417daf19 100755
--- a/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh
+++ b/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/fisher_swbd/s5/local/rt03_data_prep.sh b/egs/fisher_swbd/s5/local/rt03_data_prep.sh
index f537b88b609..15e03f70498 100755
--- a/egs/fisher_swbd/s5/local/rt03_data_prep.sh
+++ b/egs/fisher_swbd/s5/local/rt03_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # RT-03 data preparation (conversational telephone speech part only)
 # Adapted from Arnab Ghoshal's script for Hub-5 Eval 2000 by Peng Qi
diff --git a/egs/fisher_swbd/s5/local/score.sh b/egs/fisher_swbd/s5/local/score.sh
index 189d49119ab..ba8f43d7efd 100755
--- a/egs/fisher_swbd/s5/local/score.sh
+++ b/egs/fisher_swbd/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 orig_args=
diff --git a/egs/fisher_swbd/s5/local/score_basic.sh b/egs/fisher_swbd/s5/local/score_basic.sh
index 6d3ac65c383..18a939d0455 100755
--- a/egs/fisher_swbd/s5/local/score_basic.sh
+++ b/egs/fisher_swbd/s5/local/score_basic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/fisher_swbd/s5/local/score_sclite.sh b/egs/fisher_swbd/s5/local/score_sclite.sh
index c17b3c69c9b..18ad3853255 100755
--- a/egs/fisher_swbd/s5/local/score_sclite.sh
+++ b/egs/fisher_swbd/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/fisher_swbd/s5/local/score_sclite_conf.sh b/egs/fisher_swbd/s5/local/score_sclite_conf.sh
index bb354fd50cf..d6d052ee763 100755
--- a/egs/fisher_swbd/s5/local/score_sclite_conf.sh
+++ b/egs/fisher_swbd/s5/local/score_sclite_conf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/fisher_swbd/s5/local/swbd1_data_download.sh b/egs/fisher_swbd/s5/local/swbd1_data_download.sh
index 04904945472..7f5ea2e13cb 100755
--- a/egs/fisher_swbd/s5/local/swbd1_data_download.sh
+++ b/egs/fisher_swbd/s5/local/swbd1_data_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Switchboard-1 training data preparation customized for Edinburgh
 # Author:  Arnab Ghoshal (Jan 2013)
@@ -36,7 +36,7 @@ if [ ! -d $SWBD_DIR/transcriptions/swb_ms98_transcriptions ]; then
     if [ ! -d swb_ms98_transcriptions ]; then
       echo " *** Downloading trascriptions and dictionary ***" 
       wget http://www.openslr.org/resources/5/switchboard_word_alignments.tar.gz ||
-      wget http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
+      wget -c http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
       tar -xf switchboard_word_alignments.tar.gz
     fi
   )
diff --git a/egs/fisher_swbd/s5/local/swbd1_data_prep.sh b/egs/fisher_swbd/s5/local/swbd1_data_prep.sh
index ce5d580eb5a..ee5bfc204aa 100755
--- a/egs/fisher_swbd/s5/local/swbd1_data_prep.sh
+++ b/egs/fisher_swbd/s5/local/swbd1_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Switchboard-1 training data preparation customized for Edinburgh
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh
index fc951d14fa0..da978a21c87 100755
--- a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh
+++ b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Formatting the Mississippi State dictionary for use in Edinburgh. Differs 
 # from the one in Kaldi s5 recipe in that it uses lower-case --Arnab (Jan 2013)
diff --git a/egs/fisher_swbd/s5/run.sh b/egs/fisher_swbd/s5/run.sh
index fcb0ac8ebb6..09da3ab7c91 100755
--- a/egs/fisher_swbd/s5/run.sh
+++ b/egs/fisher_swbd/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # It's best to run the commands in this one by one.
 . ./cmd.sh
diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh
index 66c5ad3335f..f9ceb667553 100755
--- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on run_tdnn_7h.sh in swbd chain recipe.
 
diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh
index 1981bb0530d..d27f2884e10 100755
--- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script shows improvement arising from data cleaning.
 
diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh
index 6fa10344cfc..3e1f7ac7b15 100755
--- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # CER:
 # %WER 16.44 [ 35459 / 215718, 4216 ins, 11278 del, 19965 sub ] exp/chain/tdnn_1b_sp/decode_test/cer_10_0.0
diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh
index 1f4b7e12850..2f83db14ad1 100755
--- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # CER:
 # 1a: %WER 16.83 [ 36305 / 215718, 4772 ins, 10810 del, 20723 sub ] exp/chain/tdnn_1a_sp/decode_test/cer_9_0.0
diff --git a/egs/formosa/s5/local/nnet3/run_ivector_common.sh b/egs/formosa/s5/local/nnet3/run_ivector_common.sh
index 723589ddd2e..11542e55760 100755
--- a/egs/formosa/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/formosa/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/formosa/s5/local/nnet3/run_tdnn.sh b/egs/formosa/s5/local/nnet3/run_tdnn.sh
index a41d990a9b2..9a0d6a6dbc1 100755
--- a/egs/formosa/s5/local/nnet3/run_tdnn.sh
+++ b/egs/formosa/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on swbd/s5c/local/nnet3/run_tdnn.sh
 
diff --git a/egs/formosa/s5/local/prepare_data.sh b/egs/formosa/s5/local/prepare_data.sh
index 68f342e1549..1da4eb16776 100755
--- a/egs/formosa/s5/local/prepare_data.sh
+++ b/egs/formosa/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Copyright 2018  Yuan-Fu Liao, National Taipei University of Technology
@@ -26,15 +26,15 @@ if [ -z "$(command -v dos2unix 2>/dev/null)" ]; then
     exit 1;
 fi
 
-# have to remvoe previous files to avoid filtering speakers according to cmvn.scp and feats.scp
+# have to remove previous files to avoid filtering speakers according to cmvn.scp and feats.scp
 rm -rf   data/all data/train data/test data/eval data/local/train
 mkdir -p data/all data/train data/test data/eval data/local/train
 
 
 # make utt2spk, wav.scp and text
-find $train_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n"     $y $y' \; | dos2unix > data/all/utt2spk
-find $train_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n"     $y $x' \; | dos2unix > data/all/wav.scp
-find $train_dir -name *.txt -exec sh -c 'x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x'    \; | dos2unix > data/all/text
+find -L $train_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n"     $y $y' \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/all/utt2spk
+find -L $train_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n"     $y $x' \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/all/wav.scp
+find -L $train_dir -name *.txt -exec sh -c 'x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x'    \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/all/text
 
 # fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp,
 # duplicate entries and so on). Also, it regenerates the spk2utt from
@@ -51,9 +51,9 @@ echo "cp data/train/text data/local/train/text for language model training"
 cat data/train/text | awk '{$1=""}1;' | awk '{$1=$1}1;' > data/local/train/text
 
 # preparing EVAL set.
-find $eval_dir     -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n"     $y $y' \; | dos2unix > data/eval/utt2spk
-find $eval_dir     -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n"     $y $x' \; | dos2unix > data/eval/wav.scp
-find $eval_key_dir -name *.txt -exec sh -c 'x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x'    \; | dos2unix > data/eval/text
+find -L $eval_dir     -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n"     $y $y' \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/eval/utt2spk
+find -L $eval_dir     -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n"     $y $x' \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/eval/wav.scp
+find -L $eval_key_dir -name *.txt -exec sh -c 'x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x'    \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/eval/text
 utils/fix_data_dir.sh data/eval
 
 echo "Data preparation completed."
diff --git a/egs/formosa/s5/local/prepare_dict.sh b/egs/formosa/s5/local/prepare_dict.sh
index 4e580f5f6e8..82f93895919 100755
--- a/egs/formosa/s5/local/prepare_dict.sh
+++ b/egs/formosa/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Copyright 2018  Yuan-Fu Liao, National Taipei University of Technology
diff --git a/egs/formosa/s5/local/prepare_lm.sh b/egs/formosa/s5/local/prepare_lm.sh
index 59fe1529658..b137bb2886f 100755
--- a/egs/formosa/s5/local/prepare_lm.sh
+++ b/egs/formosa/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
diff --git a/egs/formosa/s5/local/run_cleanup_segmentation.sh b/egs/formosa/s5/local/run_cleanup_segmentation.sh
index b72cd89b4d1..1515411969c 100755
--- a/egs/formosa/s5/local/run_cleanup_segmentation.sh
+++ b/egs/formosa/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2016  Vimal Manohar
 #             2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/formosa/s5/local/score.sh b/egs/formosa/s5/local/score.sh
index a9786169973..d283ceb68dc 100755
--- a/egs/formosa/s5/local/score.sh
+++ b/egs/formosa/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 set -x
diff --git a/egs/formosa/s5/local/train_lms.sh b/egs/formosa/s5/local/train_lms.sh
index efc5b92c573..8c91b351867 100755
--- a/egs/formosa/s5/local/train_lms.sh
+++ b/egs/formosa/s5/local/train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/formosa/s5/run.sh b/egs/formosa/s5/run.sh
index a4d0f2dcd1d..7cf4dbac00e 100755
--- a/egs/formosa/s5/run.sh
+++ b/egs/formosa/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2018, Yuan-Fu Liao, National Taipei University of Technology, yfliao@mail.ntut.edu.tw
 #
diff --git a/egs/gale_arabic/s5/local/gale_data_prep_txt.sh b/egs/gale_arabic/s5/local/gale_data_prep_txt.sh
index a95b37ab14f..960a3ced161 100755
--- a/egs/gale_arabic/s5/local/gale_data_prep_txt.sh
+++ b/egs/gale_arabic/s5/local/gale_data_prep_txt.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 QCRI (author: Ahmed Ali)
 # Apache 2.0
diff --git a/egs/gale_arabic/s5/local/gale_format_data.sh b/egs/gale_arabic/s5/local/gale_format_data.sh
index 053323dc194..8e86f75b085 100755
--- a/egs/gale_arabic/s5/local/gale_format_data.sh
+++ b/egs/gale_arabic/s5/local/gale_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 QCRI (author: Ahmed Ali)
 # Apache 2.0
diff --git a/egs/gale_arabic/s5/local/gale_prep_dict.sh b/egs/gale_arabic/s5/local/gale_prep_dict.sh
index f6fd83378d0..5aa3894272d 100755
--- a/egs/gale_arabic/s5/local/gale_prep_dict.sh
+++ b/egs/gale_arabic/s5/local/gale_prep_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 QCRI (author: Ahmed Ali)
 # Apache 2.0
diff --git a/egs/gale_arabic/s5/local/gale_train_lms.sh b/egs/gale_arabic/s5/local/gale_train_lms.sh
index 8f8e715390f..9f91749a0dd 100755
--- a/egs/gale_arabic/s5/local/gale_train_lms.sh
+++ b/egs/gale_arabic/s5/local/gale_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/gale_arabic/s5/local/nnet/run_lstm.sh b/egs/gale_arabic/s5/local/nnet/run_lstm.sh
index aeb2272976b..7969538b3c5 100755
--- a/egs/gale_arabic/s5/local/nnet/run_lstm.sh
+++ b/egs/gale_arabic/s5/local/nnet/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015 QCRI (author: Ahmed Ali)
 # Apache 2.0
diff --git a/egs/gale_arabic/s5/local/online/run_nnet2.sh b/egs/gale_arabic/s5/local/online/run_nnet2.sh
index afc3166c9eb..4f24df40fd4 100644
--- a/egs/gale_arabic/s5/local/online/run_nnet2.sh
+++ b/egs/gale_arabic/s5/local/online/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar
 # This is our online neural net build for Gale system
diff --git a/egs/gale_arabic/s5/local/run_sgmm.sh b/egs/gale_arabic/s5/local/run_sgmm.sh
index a5d32d18038..b0112004aa2 100755
--- a/egs/gale_arabic/s5/local/run_sgmm.sh
+++ b/egs/gale_arabic/s5/local/run_sgmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 . ./path.sh
diff --git a/egs/gale_arabic/s5/local/score.sh b/egs/gale_arabic/s5/local/score.sh
index abd8149a672..332f038c575 100755
--- a/egs/gale_arabic/s5/local/score.sh
+++ b/egs/gale_arabic/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/gale_arabic/s5/local/score_combine.sh b/egs/gale_arabic/s5/local/score_combine.sh
index 65caab06ecc..c4d3c13886a 100755
--- a/egs/gale_arabic/s5/local/score_combine.sh
+++ b/egs/gale_arabic/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/gale_arabic/s5/local/score_mbr.sh b/egs/gale_arabic/s5/local/score_mbr.sh
index 04b84ccce5a..8c752368906 100755
--- a/egs/gale_arabic/s5/local/score_mbr.sh
+++ b/egs/gale_arabic/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script for minimum bayes risk decoding.
 
diff --git a/egs/gale_arabic/s5/local/split_wer.sh b/egs/gale_arabic/s5/local/split_wer.sh
index 26d8a3c6023..dbcc9f03e73 100755
--- a/egs/gale_arabic/s5/local/split_wer.sh
+++ b/egs/gale_arabic/s5/local/split_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Report WER for reports and conversational
 # Copyright 2014 QCRI (author: Ahmed Ali)
diff --git a/egs/gale_arabic/s5/local/split_wer_per_corpus.sh b/egs/gale_arabic/s5/local/split_wer_per_corpus.sh
index 53716f809ac..556eb320a0d 100755
--- a/egs/gale_arabic/s5/local/split_wer_per_corpus.sh
+++ b/egs/gale_arabic/s5/local/split_wer_per_corpus.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Report WER for reports and conversational
 # Copyright 2014 QCRI (author: Ahmed Ali)
diff --git a/egs/gale_arabic/s5/run.sh b/egs/gale_arabic/s5/run.sh
index 5f20c14c414..7e672d67eb2 100755
--- a/egs/gale_arabic/s5/run.sh
+++ b/egs/gale_arabic/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e 
 
diff --git a/egs/gale_arabic/s5b/local/chain/compare_wer.sh b/egs/gale_arabic/s5b/local/chain/compare_wer.sh
index 1a40523355a..ece324c279e 100755
--- a/egs/gale_arabic/s5b/local/chain/compare_wer.sh
+++ b/egs/gale_arabic/s5b/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/gale_arabic/s5b/local/chain/run_chain_common.sh b/egs/gale_arabic/s5b/local/chain/run_chain_common.sh
index da37e148441..710625cf489 100755
--- a/egs/gale_arabic/s5b/local/chain/run_chain_common.sh
+++ b/egs/gale_arabic/s5b/local/chain/run_chain_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script has common stages shared across librispeech chain recipes.
 # It generates a new topology in a new lang directory, gets the alignments as
diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh
index bf2e45c9914..346c3f39ccb 100755
--- a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # ./local/chain/compare_wer.sh exp/chain/tdnn_1a_sp
 # System                      tdnn_1a_sp
diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
index deebafc95e4..259e660532d 100755
--- a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #started from tedlium recipe with few edits
 
diff --git a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh
index a03cc5b2fa3..3732e2e4518 100755
--- a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh
+++ b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/gale_arabic/s5b/local/nnet3/tuning/run_lstm_1a.sh b/egs/gale_arabic/s5b/local/nnet3/tuning/run_lstm_1a.sh
index 7f7b8b3ba56..6e481f2ea7d 100755
--- a/egs/gale_arabic/s5b/local/nnet3/tuning/run_lstm_1a.sh
+++ b/egs/gale_arabic/s5b/local/nnet3/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #started from tedlium recipe with few edits
 
diff --git a/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh
index 6619df668ef..c624d4e8535 100755
--- a/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # started from tedlium recipe with few edits
 
diff --git a/egs/gale_arabic/s5b/local/prepare_lm.sh b/egs/gale_arabic/s5b/local/prepare_lm.sh
index 6fdf35f471a..70ad8bc1b76 100755
--- a/egs/gale_arabic/s5b/local/prepare_lm.sh
+++ b/egs/gale_arabic/s5b/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Vassil Panayotov
 #           2017  Ewald Enzinger
diff --git a/egs/gale_arabic/s5b/local/score.sh b/egs/gale_arabic/s5b/local/score.sh
index 1d84815fc69..6168f38a929 100755
--- a/egs/gale_arabic/s5b/local/score.sh
+++ b/egs/gale_arabic/s5b/local/score.sh
@@ -1,5 +1,5 @@
 
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/gale_arabic/s5b/local/split_wer.sh b/egs/gale_arabic/s5b/local/split_wer.sh
index d83a0f79e8c..c4c323003a3 100755
--- a/egs/gale_arabic/s5b/local/split_wer.sh
+++ b/egs/gale_arabic/s5b/local/split_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Report WER for reports and conversational
 # Copyright 2014 QCRI (author: Ahmed Ali)
diff --git a/egs/gale_arabic/s5c/local/chain/compare_wer.sh b/egs/gale_arabic/s5c/local/chain/compare_wer.sh
index 1a40523355a..ece324c279e 100755
--- a/egs/gale_arabic/s5c/local/chain/compare_wer.sh
+++ b/egs/gale_arabic/s5c/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/gale_arabic/s5c/local/chain/run_chain_common.sh b/egs/gale_arabic/s5c/local/chain/run_chain_common.sh
index da37e148441..710625cf489 100755
--- a/egs/gale_arabic/s5c/local/chain/run_chain_common.sh
+++ b/egs/gale_arabic/s5c/local/chain/run_chain_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script has common stages shared across librispeech chain recipes.
 # It generates a new topology in a new lang directory, gets the alignments as
diff --git a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh
index bf2e45c9914..346c3f39ccb 100755
--- a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # ./local/chain/compare_wer.sh exp/chain/tdnn_1a_sp
 # System                      tdnn_1a_sp
diff --git a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh
index deebafc95e4..259e660532d 100755
--- a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #started from tedlium recipe with few edits
 
diff --git a/egs/gale_arabic/s5c/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5c/local/nnet3/run_ivector_common.sh
index a03cc5b2fa3..3732e2e4518 100755
--- a/egs/gale_arabic/s5c/local/nnet3/run_ivector_common.sh
+++ b/egs/gale_arabic/s5c/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/gale_arabic/s5c/local/nnet3/tuning/run_lstm_1a.sh b/egs/gale_arabic/s5c/local/nnet3/tuning/run_lstm_1a.sh
index 7f7b8b3ba56..6e481f2ea7d 100755
--- a/egs/gale_arabic/s5c/local/nnet3/tuning/run_lstm_1a.sh
+++ b/egs/gale_arabic/s5c/local/nnet3/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #started from tedlium recipe with few edits
 
diff --git a/egs/gale_arabic/s5c/local/nnet3/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5c/local/nnet3/tuning/run_tdnn_1a.sh
index 6619df668ef..c624d4e8535 100755
--- a/egs/gale_arabic/s5c/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/gale_arabic/s5c/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # started from tedlium recipe with few edits
 
diff --git a/egs/gale_arabic/s5c/local/prepare_lm.sh b/egs/gale_arabic/s5c/local/prepare_lm.sh
index 6fdf35f471a..70ad8bc1b76 100755
--- a/egs/gale_arabic/s5c/local/prepare_lm.sh
+++ b/egs/gale_arabic/s5c/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Vassil Panayotov
 #           2017  Ewald Enzinger
diff --git a/egs/gale_arabic/s5c/local/prepare_lm_subword.sh b/egs/gale_arabic/s5c/local/prepare_lm_subword.sh
index a5d5c1d1c94..7ba19c95277 100755
--- a/egs/gale_arabic/s5c/local/prepare_lm_subword.sh
+++ b/egs/gale_arabic/s5c/local/prepare_lm_subword.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Vassil Panayotov
 #           2017  Ewald Enzinger
diff --git a/egs/gale_arabic/s5c/local/score.sh b/egs/gale_arabic/s5c/local/score.sh
index 1d84815fc69..6168f38a929 100755
--- a/egs/gale_arabic/s5c/local/score.sh
+++ b/egs/gale_arabic/s5c/local/score.sh
@@ -1,5 +1,5 @@
 
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/gale_arabic/s5c/local/split_wer.sh b/egs/gale_arabic/s5c/local/split_wer.sh
index d83a0f79e8c..c4c323003a3 100755
--- a/egs/gale_arabic/s5c/local/split_wer.sh
+++ b/egs/gale_arabic/s5c/local/split_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Report WER for reports and conversational
 # Copyright 2014 QCRI (author: Ahmed Ali)
diff --git a/egs/gale_arabic/s5c/local/wer_output_filter b/egs/gale_arabic/s5c/local/wer_output_filter
index fcd40539e7f..0c8b7984f58 100755
--- a/egs/gale_arabic/s5c/local/wer_output_filter
+++ b/egs/gale_arabic/s5c/local/wer_output_filter
@@ -1,4 +1,6 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
 s/@@ //g
 s/<sil>//g
 s/<UNK>//g
+'
diff --git a/egs/gale_arabic/s5d/RESULTS b/egs/gale_arabic/s5d/RESULTS
new file mode 100644
index 00000000000..464b98ae5eb
--- /dev/null
+++ b/egs/gale_arabic/s5d/RESULTS
@@ -0,0 +1,19 @@
+tri1
+%WER 40.91 [ 32272 / 78894, 2147 ins, 7478 del, 22647 sub ] exp/tri1/decode/wer_12_0.5
+tri2b
+%WER 36.68 [ 28936 / 78894, 2752 ins, 5682 del, 20502 sub ] exp/tri2b/decode/wer_13_0.0
+tri3b
+%WER 35.35 [ 27892 / 78894, 2587 ins, 7024 del, 18281 sub ] exp/tri3b/decode/wer_14_0.0
+
+chain for dev set
+%WER 16.60 [ 13094 / 78894, 1314 ins, 2992 del, 8788 sub ] exp/chain/tdnn_1a_sp/decode_dev/wer_9_0.0
+rnnlm-rescoring for dev set
+%WER 15.02 [ 11846 / 78894, 1248 ins, 2836 del, 7762 sub ] exp/chain/tdnn_1a_sp/decode_dev_rnnlm_1e_0.45/wer_9_0.0
+
+chain for test_p2 set
+%WER 14.95 [ 10416 / 69668, 1129 ins, 2593 del, 6694 sub ] exp/chain/tdnn_1a_sp/decode_test_p2/wer_9_0.0
+rnnlm-rescoring for test_p2 set
+%WER 13.51 [ 9413 / 69668, 1059 ins, 2517 del, 5837 sub ] exp/chain/tdnn_1a_sp/decode_test_p2_rnnlm_1e_0.45/wer_9_0.0
+
+rnnlm-rescoring for mt_eval set
+%WER 12.02 [ 10829 / 90112, 1483 ins, 2401 del, 6945 sub ] exp/chain/tdnn_1a_sp/decode_mt_all_rnnlm_1e_0.45/wer_9_0.0
diff --git a/egs/gale_arabic/s5d/cmd.sh b/egs/gale_arabic/s5d/cmd.sh
new file mode 100755
index 00000000000..0a2eda442d6
--- /dev/null
+++ b/egs/gale_arabic/s5d/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="retry.pl queue.pl"
+export decode_cmd="retry.pl queue.pl"
+export mkgraph_cmd="retry.pl queue.pl --mem 8G"
diff --git a/egs/gale_arabic/s5d/conf/decode.config b/egs/gale_arabic/s5d/conf/decode.config
new file mode 100644
index 00000000000..6f503eab35e
--- /dev/null
+++ b/egs/gale_arabic/s5d/conf/decode.config
@@ -0,0 +1 @@
+link decode_dnn.config
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/conf/mfcc.conf b/egs/gale_arabic/s5d/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/gale_arabic/s5d/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/gale_arabic/s5d/conf/mfcc_hires.conf b/egs/gale_arabic/s5d/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..c45f2b691a9
--- /dev/null
+++ b/egs/gale_arabic/s5d/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--sample-frequency=16000 
+--num-mel-bins=40    
+--num-ceps=40   
+--low-freq=40    # low cutoff frequency for mel bins
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
diff --git a/egs/gale_arabic/s5d/conf/online_cmvn.conf b/egs/gale_arabic/s5d/conf/online_cmvn.conf
new file mode 100644
index 00000000000..cbdaf5f281c
--- /dev/null
+++ b/egs/gale_arabic/s5d/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh
diff --git a/egs/gale_arabic/s5d/local/add_to_datadir.py b/egs/gale_arabic/s5d/local/add_to_datadir.py
new file mode 100755
index 00000000000..b41ab42f7c4
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/add_to_datadir.py
@@ -0,0 +1,54 @@
+#!/usr/bin/python
+
+# This script appends utterances dumped out from XML to a Kaldi datadir
+
+import sys, re
+from xml.sax.saxutils import unescape
+
+basename=sys.argv[1]
+outdir = sys.argv[2]
+
+if len(sys.argv) > 3:
+    mer_thresh=float(sys.argv[3])
+else:
+    mer_thresh = None
+
+# open the output files in append mode
+#segments_file = open(outdir + '/segments', 'a')
+#utt2spk_file = open(outdir + '/utt2spk', 'a')
+#text_file = open(outdir + '/text', 'a')
+mgb2_file = open(outdir + '/mgb2', 'a')
+
+for line in sys.stdin:
+
+    m = re.match(r'\w+speaker(\d+)\w+\s+(.*)', line)
+    #print line
+
+    if m:
+
+        spk = int(m.group(1))
+
+        t = m.group(2).split()
+        start = float(t[0])
+        end = float(t[1])
+        mer = float(t[2])
+        
+        s = [unescape(w) for w in t[3:]]       
+        words = ' '.join(s)
+
+        segId = '%s_spk-%04d_seg-%07d:%07d' % (basename, spk, start*100, end*100)
+        spkId = '%s_spk-%04d' % (basename, spk)
+
+        # only add segments where the Matching Error Rate is below the prescribed threshhold
+        if mer_thresh == None or mer <= mer_thresh:
+#print >> segments_file, '%s %s %.2f %.2f' % (segId, basename, start, end ) 
+#print >> text_file, '%s %s' % (segId, words)
+#print >> utt2spk_file, '%s %s' % (segId, spkId)
+            print >> mgb2_file, '%s %s %.3f %.3f %s' % (basename, segId, start, end, words)
+
+#segments_file.close()
+#utt2spk_file.close()
+#text_file.close()
+mgb2_file.close()
+ 
+            
diff --git a/egs/gale_arabic/s5d/local/arabic_convert.py b/egs/gale_arabic/s5d/local/arabic_convert.py
new file mode 100755
index 00000000000..83d271bab40
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/arabic_convert.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+
+import sys
+
+def hex_to_decimal(utf8_string):
+    assert(len(utf8_string) == 3)
+    hex_dict = {}
+    char_list = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"]
+    value_list = [0, 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+    for key, value in zip (char_list, value_list):
+        hex_dict[key] = value
+
+    result = 0
+    length = len(utf8_string)
+    for i in range(length):
+        digit = utf8_string[length - 1 - i]
+        result += hex_dict[digit] * (16 ** i)
+
+    return result
+
+def get_unicode_dict():
+    unicode_dict = {}
+    utf8_list = [("621", "'"), ("622", "|"),("623", ">"),
+                 ("624", "&"), ("625", "<"),("626", "}"),
+                 ("627", "A"), ("628", "b"),("629", "p"),
+                 ("62A", "t"), ("62B", "v"),("62C", "j"),
+                 ("62D", "H"), ("62E", "x"),("62F", "d"),
+                 ("630", "*"), ("631", "r"),("632", "z"),
+                 ("633", "s"), ("634", "$"),("635", "S"),
+                 ("636", "D"), ("637", "T"),("638", "Z"),
+                 ("639", "E"), ("63A", "g"),("640", "_"),
+                 ("641", "f"), ("642", "q"),("643", "k"),
+                 ("644", "l"), ("645", "m"),("646", "n"),
+                 ("647", "h"), ("648", "w"),("649", "Y"),
+                 ("64A", "y"), ("64B", "F"),("64C", "N"),
+                 ("64D", "K"), ("64E", "a"),("64F", "u"),
+                 ("650", "i"), ("651", "~"),("652", "o"),
+                 ("670", "`"), ("671", "{"),("67E", "P"),
+                 ("686", "J"), ("6A4", "V"),("6AF", "G")]
+
+    for word_pair in utf8_list:
+        utf8 = word_pair[0]
+        char = word_pair[1]
+        unicode_dict[hex_to_decimal(utf8)] = char
+
+    return unicode_dict
+    
+
+def convert(word, unicode_dict):
+    word_list = []
+    for char in word:
+        c_unicode = ord(char)
+        if c_unicode in unicode_dict:
+            word_list.append(unicode_dict[c_unicode])
+
+    return "".join(word_list)
+
+def process_arabic_text(arabic_text, unicode_dict):
+    with open(arabic_text, 'r') as file:
+        sentence_list = []
+        is_sentence = False
+        for line in file.readlines():
+#print(line.split()[0], is_sentence, line.split()[0] == "</P>")
+            if len(line.split()) > 0:
+                if line.split()[0] == "<P>":
+                    is_sentence = True
+
+                elif (is_sentence and line.split()[0] != "</P>"):
+                    for word in line.split():
+                        if word == '.':
+                            # when meet period ".", sentence_list should not be empty (do find sentence ending with two period)
+                            if (len(sentence_list) > 0):                
+                                sentence = " ".join(sentence_list)
+                                print(sentence)
+                            sentence_list = []
+                        elif word[-1] == ".":
+                            word = word[:-1]
+                            sentence_list.append(word)
+                            sentence = " ".join(sentence_list)
+                            print(sentence)
+                            sentence_list = []
+                        else:
+                            word = word
+                            if word != '':
+                                sentence_list.append(word)
+    
+                if line.split()[0] == "</P>":
+                    is_sentence = False
+                    if (len(sentence_list) > 0):
+                        print(" ".join(sentence_list)) 
+                        sentence_list = []
+                
+                
+
+def main():
+    arabic_text = sys.argv[1]
+    unicode_dict = get_unicode_dict()
+    process_arabic_text(arabic_text, unicode_dict)
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/gale_arabic/s5d/local/bad_segments b/egs/gale_arabic/s5d/local/bad_segments
new file mode 100644
index 00000000000..c3413f0714c
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/bad_segments
@@ -0,0 +1,10 @@
+ARABIYA_FROMIRAQ_ARB_20070302_175801_2326286_2327450
+ARABIYA_BILARABI_ARB_20061005_201400_221375_223694
+LBC_NAHAR_ARB_20060911_142800_3683267_3685290
+LBC_NAHAR_ARB_20070303_145800_3249800_3251128
+LBC_NAHAR_ARB_20070303_145800_3623646_3624152
+LBC_NAHAR_ARB_20070305_035800_481003_484069
+ALAM_WITHEVENT_ARB_20070227_205800_3141876_3144152
+ALAM_NEWSRPT_ARB_20070130_015801_2875054_2876396
+ALJZ_TODHARV_ARB_20060914_155800_2947717_2949041
+ALJZ_TODHARV_ARB_20070107_145800_2417848_2419238
diff --git a/egs/gale_arabic/s5d/local/chain/compare_wer.sh b/egs/gale_arabic/s5d/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..ece324c279e
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/compare_wer.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
+
+# ./local/chain/compare_wer.sh exp/chain/cnn1a
+# System                          cnn1a
+# WER                              0.61
+# CER                              0.15
+# Final train prob              -0.0377
+# Final valid prob              -0.0380
+# Final train prob (xent)       -0.0830
+# Final valid prob (xent)       -0.0838
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/cnn{1a,1b}"
+  exit 1
+fi
+
+echo "# $0 $*"
+used_epochs=false
+
+echo -n "# System                     "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+echo -n "# WER                        "
+for x in $*; do
+  wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# CER                        "
+for x in $*; do
+  cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+echo -n "# Final train prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)    "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)    "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
diff --git a/egs/gale_arabic/s5d/local/chain/run_chain_common.sh b/egs/gale_arabic/s5d/local/chain/run_chain_common.sh
new file mode 100755
index 00000000000..710625cf489
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/run_chain_common.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+
+# this script has common stages shared across librispeech chain recipes.
+# It generates a new topology in a new lang directory, gets the alignments as
+# lattices, and builds a tree for the new topology
+set -e
+
+stage=11
+
+# input directory names. These options are actually compulsory, and they have
+# been named for convenience
+gmm_dir=
+ali_dir=
+lores_train_data_dir=
+
+num_leaves=6000
+
+# output directory names. They are also compulsory.
+lang=
+lat_dir=
+tree_dir=
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+[ -z $lang ] && echo "Set --lang, this specifies the new lang directory which will have the new topology" && exit 1;
+[ -z $lat_dir ] && echo "Set --lat-dir, this specifies the experiment directory to store lattice" && exit 1;
+[ -z $tree_dir ] && echo "Set --tree-dir, this specifies the directory to store new tree " && exit 1;
+
+for f in $gmm_dir/final.mdl $ali_dir/ali.1.gz $lores_train_data_dir/feats.scp; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating lang directory with one state per phone."
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 12 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  nj=$(cat ${ali_dir}/num_jobs) || exit 1;
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    $lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 13 ]; then
+  # Build a tree using our new topology. We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+      --context-opts "--context-width=2 --central-position=1" \
+      --cmd "$train_cmd" $num_leaves ${lores_train_data_dir} $lang $ali_dir $tree_dir
+fi
+
+exit 0;
diff --git a/egs/gale_arabic/s5d/local/chain/run_tdnn.sh b/egs/gale_arabic/s5d/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/local/chain/run_tdnn_lstm.sh b/egs/gale_arabic/s5d/local/chain/run_tdnn_lstm.sh
new file mode 120000
index 00000000000..8e647598556
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/run_tdnn_lstm.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_lstm_1a.sh
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..16e9b928714
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,211 @@
+#!/usr/bin/env bash
+
+set -e -o pipefail
+stage=0
+nj=30
+train_set=train
+test_set=dev
+gmm=tri3b        # this is the source gmm-dir that we'll use for alignments; it
+                 # should have alignments for the specified training data.
+num_threads_ubm=32
+nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=_1a   #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+reporting_email=
+
+# LSTM/chain options
+train_stage=-10
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.5@0.50,0'
+
+# training chunk-options
+chunk_width=150,110,100
+get_egs_stage=-10
+
+# training options
+srand=0
+remove_egs=true
+run_ivector_common=true
+run_chain_common=true
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if $run_ivector_common; then
+  local/nnet3/run_ivector_common.sh \
+    --stage $stage --nj $nj \
+    --train-set $train_set --gmm $gmm \
+    --test-sets $test_set \
+    --num-threads-ubm $num_threads_ubm \
+    --nnet3-affix "$nnet3_affix"
+fi
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn${affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+
+# note: you don't necessarily have to change the treedir name
+# each time you do a new experiment-- only if you change the
+# configuration in a way that affects the tree.
+tree_dir=exp/chain${nnet3_affix}/tree_a_sp
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
+    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+# Please take this as a reference on how to specify all the options of
+# local/chain/run_chain_common.sh
+if $run_chain_common; then
+  local/chain/run_chain_common.sh --stage $stage \
+                                  --gmm-dir $gmm_dir \
+                                  --ali-dir $ali_dir \
+                                  --lores-train-data-dir ${lores_train_data_dir} \
+                                  --lang $lang \
+                                  --lat-dir $lat_dir \
+                                  --num-leaves 7000 \
+                                  --tree-dir $tree_dir || exit 1;
+fi
+
+if [ $stage -le 15 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  linear-component name=prefinal-l dim=256 $linear_opts
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 16 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.0 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs 6 \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.00025 \
+    --trainer.optimization.final-effective-lrate 0.000025 \
+    --trainer.num-chunk-per-minibatch=64,32 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --egs.chunk-width=$chunk_width \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0 --constrained false" \
+    --egs.stage $get_egs_stage \
+    --reporting.email="$reporting_email" \
+    --cleanup.remove-egs=$remove_egs \
+    --feat-dir=$train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir=$lat_dir \
+    --dir $dir  || exit 1;
+
+fi
+
+if [ $stage -le 17 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/lang/check_phones_compatible.sh \
+    data/lang_test/phones.txt $lang/phones.txt
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test_srilm \
+    $tree_dir $tree_dir/graph || exit 1;
+fi
+
+if [ $stage -le 18 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+    test_set=mt_all
+
+    steps/nnet3/decode.sh \
+      --acwt 1.0 --post-decode-acwt 10.0 \
+      --extra-left-context 0 --extra-right-context 0 \
+      --extra-left-context-initial 0 \
+      --extra-right-context-final 0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$decode_cmd"  --num-threads 4 \
+      --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test_set}_hires \
+      $tree_dir/graph data/${test_set}_hires ${dir}/decode_${test_set} || exit 1
+fi
diff --git a/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_lstm_1a.sh
new file mode 100755
index 00000000000..4273e83835a
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -0,0 +1,221 @@
+#!/usr/bin/env bash
+
+#started from tedlium recipe with few edits
+
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+decode_nj=30
+min_seg_len=1.55
+chunk_left_context=40
+chunk_right_context=0
+label_delay=5
+xent_regularize=0.025
+train_set=train
+gmm=tri3b # the gmm for the target data gmm for the target data
+num_threads_ubm=32
+nnet3_affix=_cleaned  # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
+# decode options
+extra_left_context=40
+extra_right_context=0
+frames_per_chunk=150
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+train_stage=-10
+tree_affix=  # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
+tdnn_lstm_affix=1a  #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration.
+common_egs_dir=  # you can set this to use previously dumped egs.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --nj $nj \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --num-threads-ubm $num_threads_ubm \
+                                  --nnet3-affix "$nnet3_affix"
+
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+tree_dir=exp/chain${nnet3_affix}/tree_bi${tree_affix}
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn_lstm${tdnn_lstm_affix}_sp_bi
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 14 ]; then
+  echo "$0: creating lang directory with one state per phone."
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d data/lang_chain ]; then
+    if [ data/lang_chain/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: data/lang_chain already exists, not overwriting it; continuing"
+    else
+      echo "$0: data/lang_chain already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang data/lang_chain
+    silphonelist=$(cat data/lang_chain/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat data/lang_chain/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >data/lang_chain/topo
+  fi
+fi
+
+if [ $stage -le 15 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 16 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+      --context-opts "--context-width=2 --central-position=1" \
+      --cmd "$train_cmd" 7000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 17 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-renorm-layer name=tdnn1 dim=1024
+  relu-renorm-layer name=tdnn2 dim=1024 input=Append(-1,0,1)
+  fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
+  relu-renorm-layer name=tdnn3 dim=1024 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn4 dim=1024 input=Append(-3,0,3)
+  fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
+  relu-renorm-layer name=tdnn5 dim=1024 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn6 dim=1024 input=Append(-3,0,3)
+  fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
+
+  ## adding the layers for chain branch
+  output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 18 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+ steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width "$frames_per_chunk" \
+    --egs.chunk-left-context "$chunk_left_context" \
+    --egs.chunk-right-context "$chunk_right_context" \
+    --trainer.num-chunk-per-minibatch 64,32 \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.max-param-change 2.0 \
+    --trainer.num-epochs 6 \
+    --trainer.deriv-truncate-margin 10 \
+    --trainer.optimization.shrink-value 0.99 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 12 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.momentum 0.0 \
+    --cleanup.remove-egs true \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir
+fi
+
+
+
+if [ $stage -le 19 ]; then
+  # Note: it might appear that this data/lang_chain directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test_3_no_al $dir $dir/graph
+fi
+
+if [ $stage -le 20 ]; then
+  steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" --stage 3\
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $extra_left_context  \
+    --extra-right-context $extra_right_context  \
+    --frames-per-chunk "$frames_per_chunk" \
+    --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \
+    --scoring-opts "--min-lmwt 5 --max_lmwt 15" \
+    $dir/graph data/test_hires $dir/decode_bn || exit 1;
+fi
+exit 0
diff --git a/egs/gale_arabic/s5d/local/check_tools.sh b/egs/gale_arabic/s5d/local/check_tools.sh
new file mode 100755
index 00000000000..448a6536946
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/check_tools.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# check whether bs4 and lxml is installed
+if ! python3 -c "import bs4" 2>/dev/null; then
+  echo "$0: BeautifulSoup4 not installed, you can install it by 'pip install beautifulsoup4' if you prefer to use python to process xml file" 
+  exit 1;
+fi
+
+if ! python3 -c "import lxml" 2>/dev/null; then
+  echo "$0: lxml not installed, you can install it by 'pip install lxml' if you prefer to use python to process xml file"
+  exit 1;
+fi
+
+echo "both BeatufileSoup4 and lxml are installed in python"
+exit 0
diff --git a/egs/gale_arabic/s5d/local/check_vocab.py b/egs/gale_arabic/s5d/local/check_vocab.py
new file mode 100755
index 00000000000..57ec32285bf
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/check_vocab.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+import sys
+
+def get_vocab_set(ref_file):
+    vocab_set = set()
+    with open(ref_file, 'r') as f:
+        for line in f.readlines():
+            word = line.split()[0]
+            vocab_set.add(word)
+    return vocab_set
+            
+
+def compare(vocab_set, wordlist):
+    with open(wordlist, 'r') as f:
+        for line in f.readlines():
+            word = line.split()[0]
+            if word not in vocab_set:
+                print(word)
+
+def main():
+    ref_file = sys.argv[1]
+    wordlist = sys.argv[2]
+    vocab_set = get_vocab_set(ref_file)
+    compare(vocab_set, wordlist)
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/gale_arabic/s5d/local/eng2arabic.pl b/egs/gale_arabic/s5d/local/eng2arabic.pl
new file mode 100755
index 00000000000..2fea8d33211
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/eng2arabic.pl
@@ -0,0 +1,108 @@
+#!/bin/usr/env perl
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Apache 2.0
+
+use warnings;
+use strict;
+use Encode;
+use utf8;
+
+
+
+if (@ARGV !=2 )
+    {#
+	print "usage: $0 <inFile> <onlyArabicFile>\n"; 
+	exit (1);   
+    }
+    
+# <\check usage>
+my $inFile = shift (@ARGV);
+my $ouFile = shift(@ARGV);
+
+
+open INFILE, "<$inFile" || die "unable to open the input file $inFile\n";
+binmode INFILE, ":encoding(utf8)";
+
+
+open OUTPUTFILE, ">$ouFile" or die "unable to open the output mlf file $ouFile\n";
+binmode OUTPUTFILE, ":encoding(utf8)";
+
+while (<INFILE>) {
+  my $BW = convertUTF8ToBuckwalter ($_);
+  print OUTPUTFILE "$BW";
+}
+close INFILE;
+close OUTPUTFILE;
+
+
+
+# this function is copied from MADATools.pm: MADA Tools
+ sub convertUTF8ToBuckwalter {
+
+    my ($line)= (@_);
+    $line =~ s/\'/\x{0621}/g;   ## HAMZA
+    $line =~ s/\|/\x{0622}/g;   ## ALEF WITH MADDA ABOVE
+    $line =~ s/\>/\x{0623}/g;   ## ALEF WITH HAMZA ABOVE
+    $line =~ s/\&/\x{0624}/g;   ## WAW WITH HAMZA ABOVE
+    $line =~ s/\</\x{0625}/g;   ## ALEF WITH HAMZA BELOW
+    $line =~ s/\}/\x{0626}/g;   ## YEH WITH HAMZA ABOVE
+    $line =~ s/A/\x{0627}/g;    ## ALEF
+    $line =~ s/b/\x{0628}/g;    ## BEH
+    $line =~ s/p/\x{0629}/g;    ## TEH MARBUTA
+    $line =~ s/t/\x{062A}/g;    ## TEH
+    $line =~ s/v/\x{062B}/g;    ## THEH
+    $line =~ s/j/\x{062C}/g;    ## JEEM
+    $line =~ s/H/\x{062D}/g;    ## HAH
+    $line =~ s/x/\x{062E}/g;    ## KHAH
+    $line =~ s/d/\x{062F}/g;    ## DAL
+    $line =~ s/\*/\x{0630}/g;   ## THAL
+    $line =~ s/r/\x{0631}/g;    ## REH
+    $line =~ s/z/\x{0632}/g;    ## ZAIN
+    $line =~ s/s/\x{0633}/g;    ## SEEN
+    $line =~ s/\$/\x{0634}/g;   ## SHEEN
+    $line =~ s/S/\x{0635}/g;    ## SAD
+    $line =~ s/D/\x{0636}/g;    ## DAD
+    $line =~ s/T/\x{0637}/g;    ## TAH
+    $line =~ s/Z/\x{0638}/g;    ## ZAH
+    $line =~ s/E/\x{0639}/g;    ## AIN
+    $line =~ s/g/\x{063A}/g;    ## GHAIN
+    $line =~ s/_/\x{0640}/g;    ## TATWEEL
+    $line =~ s/f/\x{0641}/g;    ## FEH
+    $line =~ s/q/\x{0642}/g;    ## QAF
+    $line =~ s/k/\x{0643}/g;    ## KAF
+    $line =~ s/l/\x{0644}/g;    ## LAM
+    $line =~ s/m/\x{0645}/g;    ## MEEM
+    $line =~ s/n/\x{0646}/g;    ## NOON
+    $line =~ s/h/\x{0647}/g;    ## HEH
+    $line =~ s/w/\x{0648}/g;    ## WAW
+    $line =~ s/Y/\x{0649}/g;    ## ALEF MAKSURA
+    $line =~ s/y/\x{064A}/g;    ## YEH
+
+    ## Diacritics
+    $line =~ s/F/\x{064B}/g;    ## FATHATAN
+    $line =~ s/N/\x{064C}/g;    ## DAMMATAN
+    $line =~ s/K/\x{064D}/g;    ## KASRATAN
+    $line =~ s/a/\x{064E}/g;    ## FATHA
+    $line =~ s/u/\x{064F}/g;    ## DAMMA
+    $line =~ s/i/\x{0650}/g;    ## KASRA
+    $line =~ s/\~/\x{0651}/g;   ## SHADDA
+    $line =~ s/o/\x{0652}/g;    ## SUKUN
+    $line =~ s/\`/\x{0670}/g;   ## SUPERSCRIPT ALEF
+
+    $line =~ s/\{/\x{0671}/g;   ## ALEF WASLA
+    $line =~ s/P/\x{067E}/g;    ## PEH
+    $line =~ s/J/\x{0686}/g;    ## TCHEH
+    $line =~ s/V/\x{06A4}/g;    ## VEH
+    $line =~ s/G/\x{06AF}/g;    ## GAF
+
+
+    ## Punctuation should really be handled by the utf8 cleaner or other method
+
+
+
+
+
+
+    return $line;
+}
diff --git a/egs/gale_arabic/s5d/local/gale_train_lms.sh b/egs/gale_arabic/s5d/local/gale_train_lms.sh
new file mode 100755
index 00000000000..be0b4ad8f79
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/gale_train_lms.sh
@@ -0,0 +1,137 @@
+#!/usr/bin/env bash
+
+# Copyright 2013  Arnab Ghoshal
+#                 Johns Hopkins University (author: Daniel Povey)
+#           2014  Guoguo Chen
+#           2019  Dongji Gao
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# To be run from one directory above this script.
+
+# Begin configuration section.
+weblm=
+# end configuration sections
+
+help_message="Usage: $0 [options] <train-txt> <dict> <out-dir> [giga-dirs]
+Train language models for GALE Arabic, and optionally for Gigaword.\n
+options: 
+  --help          # print this message and exit
+";
+
+. utils/parse_options.sh
+
+if [ $# -lt 3 ]; then
+  printf "$help_message\n";
+  exit 1;
+fi
+
+text=$1     # data/local/train/text
+lexicon=$2  # data/local/dict/lexicon.txt
+dir=$3      # data/local/lm
+
+shift 3
+giga_dirs=( $@ )
+
+for f in "$text" "$lexicon"; do
+  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
+done
+
+loc=`which ngram-count`;
+if [ -z $loc ]; then
+  if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
+    sdir=`pwd`/../../../tools/srilm/bin/i686-m64 
+  else
+    sdir=`pwd`/../../../tools/srilm/bin/i686
+  fi
+  if [ -f $sdir/ngram-count ]; then
+    echo Using SRILM tools from $sdir
+    export PATH=$PATH:$sdir
+  else
+    echo You appear to not have SRILM tools installed, either on your path,
+    echo or installed in $sdir.  See tools/install_srilm.sh for installation
+    echo instructions.
+    exit 1
+  fi
+fi
+    
+stage=0
+
+set -o errexit
+mkdir -p $dir
+export LC_ALL=C 
+
+heldout_sent=10000
+cut -d' ' -f2- $text | gzip -c > $dir/train.all.gz
+cut -d' ' -f2- $text | tail -n +$heldout_sent | gzip -c > $dir/train.gz
+cut -d' ' -f2- $text | head -n $heldout_sent > $dir/heldout
+
+cut -d' ' -f1 $lexicon > $dir/wordlist
+
+if [ $stage -le 1 ]; then
+  # Trigram language model
+  echo "training tri-gram lm"
+  smoothing="kn"
+  ngram-count -text $dir/train.gz -order 3 -limit-vocab -vocab $dir/wordlist \
+    -unk -map-unk "<UNK>" -${smoothing}discount -interpolate -lm $dir/gale.o3g.${smoothing}.gz
+  echo "PPL for GALE Arabic trigram LM:"
+  ngram -unk -lm $dir/gale.o3g.${smoothing}.gz -ppl $dir/heldout
+  ngram -unk -lm $dir/gale.o3g.${smoothing}.gz -ppl $dir/heldout -debug 2 >& $dir/3gram.${smoothing}.ppl2
+  
+  # 4gram language model
+  echo "training 4-gram lm"
+  ngram-count -text $dir/train.gz -order 4 -limit-vocab -vocab $dir/wordlist \
+    -unk -map-unk "<UNK>" -${smoothing}discount -interpolate -lm $dir/gale.o4g.${smoothing}.gz
+  echo "PPL for GALE Arabic 4gram LM:"
+  ngram -unk -lm $dir/gale.o4g.${smoothing}.gz -ppl $dir/heldout
+  ngram -unk -lm $dir/gale.o4g.${smoothing}.gz -ppl $dir/heldout -debug 2 >& $dir/4gram.${smoothing}.ppl2
+fi
+
+if [ ! -z $giga_dirs ]; then
+  mkdir -p $dir/giga
+  if [ ! -f $giga_dirs/text.2000k ]; then
+    echo "Arabic Gigaword text not found, prepare it"
+    local/prepare_giga.sh $giga_dirs
+  fi
+
+  cp $giga_dirs/text.2000k $dir/giga
+  cat $dir/giga/text.2000k | gzip -c > $dir/giga/text2000k.gz
+  
+  for x in 3 4; do
+    smoothing="kn"
+    ngram-count -text $dir/giga/text2000k.gz -order $x -limit-vocab \
+      -vocab $dir/wordlist -unk -map-unk "<UNK>" -${smoothing}discount -interpolate \
+      -lm $dir/giga/giga.o${x}g.${smoothing}.gz
+    echo "PPL for Gigaword ${x}gram LM:"
+    ngram -unk -lm $dir/giga/giga.o${x}g.${smoothing}.gz -ppl $dir/heldout
+    ngram -unk -lm $dir/giga/giga.o${x}g.${smoothing}.gz -ppl $dir/heldout -debug 2 \
+      >& $dir/giga/${x}gram.${smoothing}.ppl2
+    compute-best-mix $dir/${x}gram.${smoothing}.ppl2 \
+      $dir/giga/${x}gram.${smoothing}.ppl2 >& $dir/gale_giga_mix.${x}gram.${smoothing}.log
+    grep 'best lambda' $dir/gale_giga_mix.${x}gram.${smoothing}.log | perl -e '
+      $_=<>;
+      s/.*\(//; s/\).*//;
+      @A = split;
+      die "Expecting 2 numbers; found: $_" if(@A!=2);
+      print "$A[0]\n$A[1]\n";' > $dir/gale_giga_mix.${x}gram.${smoothing}.weights
+    gale_weight=$(head -1 $dir/gale_giga_mix.${x}gram.${smoothing}.weights)
+    giga_weight=$(tail -n 1 $dir/gale_giga_mix.${x}gram.${smoothing}.weights)
+    ngram -order $x -lm $dir/gale.o${x}g.${smoothing}.gz -lambda $swb1_weight \
+      -mix-lm $dir/giga/giga.o${x}g.${smoothing}.gz \
+      -unk -write-lm $dir/gale_giga.o${x}g.${smoothing}.gz
+    echo "PPL for GALE + Gigaword ${x}gram LM:"
+    ngram -unk -lm $dir/gale_giga.o${x}g.${smoothing}.gz -ppl $dir/heldout
+  done
+fi
diff --git a/egs/gale_arabic/s5d/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5d/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..31c58ef06b4
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,182 @@
+#!/usr/bin/env bash
+
+set -e -o pipefail
+
+# This script is called from scripts like local/nnet3/run_tdnn.sh and
+# local/chain/run_tdnn.sh (and may eventually be called by more scripts).  It
+# contains the common feature preparation and iVector-related parts of the
+# script.  See those scripts for examples of usage.
+
+
+stage=0
+nj=100
+train_set=train   # you might set this to e.g. train.
+test_sets="test"
+gmm=tri3b # This specifies a GMM-dir from the features of the type you're training the system on;
+                         # it should contain alignments for 'train_set'.
+
+num_threads_ubm=32
+nnet3_affix= # affix for exp/nnet3 directory to put iVector stuff
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist"
+    exit 1
+  fi
+done
+
+
+
+if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then
+  echo "$0: data/${train_set}_sp_hires/feats.scp already exists."
+  echo " ... Please either remove it, or rerun this script with stage > 2."
+  exit 1
+fi
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: preparing directory for speed-perturbed data"
+  utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating high-resolution MFCC features"
+
+  # this shows how you can split across multiple file-systems.  we'll split the
+  # MFCC dir across multiple locations.  You might want to be careful here, if you
+  # have multiple copies of Kaldi checked out and run the same recipe, not to let
+  # them overwrite each other.
+  mfccdir=data/${train_set}_sp_hires/data
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/gale_arabic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" --write-utt2dur false data/${datadir}_hires
+    steps/compute_cmvn_stats.sh data/${datadir}_hires
+    utils/fix_data_dir.sh data/${datadir}_hires
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: computing a subset of data to train the diagonal UBM."
+  mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+  temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+  # train a diagonal UBM using a subset of about a quarter of the data
+  num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
+  num_utts=$[$num_utts_total/4]
+  utils/data/subset_data_dir.sh data/${train_set}_sp_hires \
+      $num_utts ${temp_data_root}/${train_set}_sp_hires_subset
+
+  echo "$0: computing a PCA transform from the hires data."
+  steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
+      --splice-opts "--left-context=3 --right-context=3" \
+      --max-utts 10000 --subsample 2 \
+       ${temp_data_root}/${train_set}_sp_hires_subset \
+       exp/nnet3${nnet3_affix}/pca_transform
+
+  echo "$0: training the diagonal UBM."
+  # Use 512 Gaussians in the UBM.
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \
+    --num-frames 700000 \
+    --num-threads $num_threads_ubm \
+    ${temp_data_root}/${train_set}_sp_hires_subset 512 \
+    exp/nnet3${nnet3_affix}/pca_transform exp/nnet3${nnet3_affix}/diag_ubm
+fi
+
+if [ $stage -le 4 ]; then
+  # Train the iVector extractor.  Use all of the speed-perturbed data since iVector extractors
+  # can be sensitive to the amount of data.  The script defaults to an iVector dimension of
+  # 100.
+  echo "$0: training the iVector extractor"
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
+    data/${train_set}_sp_hires exp/nnet3${nnet3_affix}/diag_ubm exp/nnet3${nnet3_affix}/extractor || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  # note, we don't encode the 'max2' in the name of the ivectordir even though
+  # that's the data we extract the ivectors from, as it's still going to be
+  # valid for the non-'max2' data; the utterance list is the same.
+  ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/ivectors/gale_arabic-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
+  fi
+  # We extract iVectors on the speed-perturbed training data .  With
+  # --utts-per-spk-max 2, the script pairs the utterances into twos, and treats
+  # each of these pairs as one speaker; this gives more diversity in iVectors..
+  # Note that these are extracted 'online' (they vary within the utterance).
+
+  # Having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (the iVector starts at zero at the beginning
+  # of each pseudo-speaker).
+  temp_data_root=${ivectordir}
+  utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
+    data/${train_set}_sp_hires ${temp_data_root}/${train_set}_sp_hires_max2
+
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
+    ${temp_data_root}/${train_set}_sp_hires_max2 \
+    exp/nnet3${nnet3_affix}/extractor $ivectordir
+
+  # Also extract iVectors for the test data, but in this case we don't need the speed
+  # perturbation (sp).
+  for data in ${test_sets}; do
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
+      data/${data}_hires exp/nnet3${nnet3_affix}/extractor \
+      exp/nnet3${nnet3_affix}/ivectors_${data}_hires
+  done
+fi
+
+if [ -f data/${train_set}_sp/feats.scp ] && [ $stage -le 7 ]; then
+  echo "$0: data/${train_set}_sp/feats.scp already exists.  Refusing to overwrite the features "
+  echo " to avoid wasting time.  Please remove the file and continue if you really mean this."
+  exit 1;
+fi
+
+
+if [ $stage -le 6 ]; then
+  echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh \
+    data/${train_set} data/${train_set}_sp
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: making MFCC features for low-resolution speed-perturbed data (needed for alignments)"
+  steps/make_mfcc.sh --nj $nj \
+    --cmd "$train_cmd" data/${train_set}_sp
+  steps/compute_cmvn_stats.sh data/${train_set}_sp
+  echo "$0: fixing input data-dir to remove nonexistent features, in case some "
+  echo ".. speed-perturbed segments were too short."
+  utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 8 ]; then
+  if [ -f $ali_dir/ali.1.gz ]; then
+    echo "$0: alignments in $ali_dir appear to already exist.  Please either remove them "
+    echo " ... or use a later --stage option."
+    exit 1
+  fi
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+    data/${train_set}_sp data/lang $gmm_dir $ali_dir
+fi
+
+
+exit 0;
diff --git a/egs/gale_arabic/s5d/local/nnet3/run_lstm.sh b/egs/gale_arabic/s5d/local/nnet3/run_lstm.sh
new file mode 120000
index 00000000000..c53740399ce
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/nnet3/run_lstm.sh
@@ -0,0 +1 @@
+tuning/run_lstm_1a.sh
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/local/nnet3/run_tdnn.sh b/egs/gale_arabic/s5d/local/nnet3/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/nnet3/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/local/nnet3/tuning/run_lstm_1a.sh b/egs/gale_arabic/s5d/local/nnet3/tuning/run_lstm_1a.sh
new file mode 100755
index 00000000000..6e481f2ea7d
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/nnet3/tuning/run_lstm_1a.sh
@@ -0,0 +1,161 @@
+#!/usr/bin/env bash
+
+#started from tedlium recipe with few edits
+
+
+set -e -o pipefail -u
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+decode_nj=30
+min_seg_len=1.55
+train_set=train
+gmm=tri2b  # this is the source gmm-dir for the data-type of interest; it
+                  # should have alignments for the specified training data.
+num_threads_ubm=32
+nnet3_affix=_cleaned  # cleanup affix for exp dirs, e.g. _cleaned
+
+# Options which are not passed through to run_ivector_common.sh
+affix=
+common_egs_dir=
+reporting_email=
+
+# LSTM options
+train_stage=-10
+splice_indexes="-2,-1,0,1,2 0 0"
+lstm_delay=" -1 -2 -3 "
+label_delay=5
+num_lstm_layers=3
+cell_dim=1024
+hidden_dim=1024
+recurrent_projection_dim=256
+non_recurrent_projection_dim=256
+chunk_width=20
+chunk_left_context=40
+chunk_right_context=0
+max_param_change=2.0
+
+# training options
+srand=0
+num_epochs=6
+initial_effective_lrate=0.0003
+final_effective_lrate=0.00003
+num_jobs_initial=2
+num_jobs_final=3
+momentum=0.5
+num_chunk_per_minibatch=100
+samples_per_iter=20000
+remove_egs=true
+
+#decode options
+extra_left_context=
+extra_right_context=
+frames_per_chunk=
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --nj $nj \
+                                  --min-seg-len $min_seg_len \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --num-threads-ubm $num_threads_ubm \
+                                  --nnet3-affix "$nnet3_affix"
+
+
+
+gmm_dir=exp/${gmm}
+graph_dir=$gmm_dir/graph
+ali_dir=exp/${gmm}_ali_${train_set}_sp_comb
+dir=exp/nnet3${nnet3_affix}/lstm${affix:+_$affix}
+if [ $label_delay -gt 0 ]; then dir=${dir}_ld$label_delay; fi
+dir=${dir}_sp
+train_data_dir=data/${train_set}_sp_hires_comb
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+     $graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs"
+  config_extra_opts=()
+  [ ! -z "$lstm_delay" ] && config_extra_opts+=(--lstm-delay "$lstm_delay")
+  steps/nnet3/lstm/make_configs.py  "${config_extra_opts[@]}" \
+    --feat-dir $train_data_dir \
+    --ivector-dir $train_ivector_dir \
+    --ali-dir $ali_dir \
+    --num-lstm-layers $num_lstm_layers \
+    --splice-indexes "$splice_indexes " \
+    --cell-dim $cell_dim \
+    --hidden-dim $hidden_dim \
+    --recurrent-projection-dim $recurrent_projection_dim \
+    --non-recurrent-projection-dim $non_recurrent_projection_dim \
+    --label-delay $label_delay \
+    --self-repair-scale-nonlinearity 0.00001 \
+  $dir/configs || exit 1;
+fi
+
+if [ $stage -le 13 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage
+  fi
+  
+  steps/nnet3/train_rnn.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --trainer.srand=$srand \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.samples-per-iter=$samples_per_iter \
+    --trainer.optimization.num-jobs-initial=$num_jobs_initial \
+    --trainer.optimization.num-jobs-final=$num_jobs_final \
+    --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \
+    --trainer.optimization.final-effective-lrate=$final_effective_lrate \
+    --trainer.optimization.shrink-value 0.99 \
+    --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \
+    --trainer.optimization.momentum=$momentum \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.dir="$common_egs_dir" \
+    --cleanup.remove-egs=$remove_egs \
+    --cleanup.preserve-model-interval=1 \
+    --use-gpu=true \
+    --feat-dir=$train_data_dir \
+    --ali-dir=$ali_dir \
+    --lang=data/lang \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 14 ]; then
+  [ -z $extra_left_context ] && extra_left_context=$chunk_left_context;
+  [ -z $extra_right_context ] && extra_right_context=$chunk_right_context;
+  [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width;
+  rm $dir/.error 2>/dev/null || true
+  steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
+    --extra-left-context $extra_left_context \
+    --extra-right-context $extra_right_context \
+    --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \
+    ${graph_dir} data/test_hires ${dir}/decode || exit 1
+    steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \
+    data/test_hires ${dir}/decode_test ${dir}/decode_test_rescore || exit 1
+fi
+
+exit 0;
diff --git a/egs/gale_arabic/s5d/local/nnet3/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5d/local/nnet3/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..c624d4e8535
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/nnet3/tuning/run_tdnn_1a.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+
+# started from tedlium recipe with few edits
+
+set -e -o pipefail -u
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+decode_nj=30
+min_seg_len=1.55
+train_set=train
+gmm=tri2b  # this is the source gmm-dir for the data-type of interest; it
+                  # should have alignments for the specified training data.
+num_threads_ubm=32
+nnet3_affix=_cleaned  # cleanup affix for exp dirs, e.g. _cleaned
+tdnn_affix=  #affix for TDNN directory e.g. "a" or "b", in case we change the configuration.
+
+# Options which are not passed through to run_ivector_common.sh
+train_stage=-10
+splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0"
+remove_egs=true
+relu_dim=850
+num_epochs=3
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --nj $nj \
+                                  --min-seg-len $min_seg_len \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --num-threads-ubm $num_threads_ubm \
+                                  --nnet3-affix "$nnet3_affix"
+
+
+
+gmm_dir=exp/${gmm}
+graph_dir=$gmm_dir/graph
+ali_dir=exp/${gmm}_ali_${train_set}_sp_comb
+dir=exp/nnet3${nnet3_affix}/tdnn${tdnn_affix}_sp
+train_data_dir=data/${train_set}_sp_hires_comb
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+     $graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage
+  fi
+   
+  steps/nnet3/tdnn/train.sh --stage $train_stage \
+    --num-epochs $num_epochs --num-jobs-initial 2 --num-jobs-final 2 \
+    --splice-indexes "$splice_indexes" \
+    --feat-type raw \
+    --online-ivector-dir ${train_ivector_dir} \
+    --cmvn-opts "--norm-means=false --norm-vars=false" \
+    --initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \
+    --cmd "$decode_cmd" \
+    --relu-dim "$relu_dim" \
+    --remove-egs "$remove_egs" \
+    $train_data_dir data/lang $ali_dir $dir
+fi
+
+if [ $stage -le 13 ]; then
+  steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
+    --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \
+    ${graph_dir} data/test_hires ${dir}/decode || exit 1 
+fi
+
+exit 0;
diff --git a/egs/gale_arabic/s5d/local/normalize_transcript_BW.pl b/egs/gale_arabic/s5d/local/normalize_transcript_BW.pl
new file mode 100755
index 00000000000..3493a8d0874
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/normalize_transcript_BW.pl
@@ -0,0 +1,116 @@
+#!/usr/bin/env perl
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Apache 2.0
+
+use warnings;
+use strict;
+use Encode;
+use utf8;
+
+
+
+if (@ARGV !=2 )
+    {#
+	print "usage: $0 <inFile> <onlyArabicFile>\n"; 
+	exit (1);   
+    }
+    
+# <\check usage>
+my $inFile = shift (@ARGV);
+my $ouFile = shift(@ARGV);
+
+
+open INFILE, "<$inFile" || die "unable to open the input file $inFile\n";
+binmode INFILE, ":encoding(utf8)";
+
+
+open OUTPUTFILE, ">$ouFile" or die "unable to open the output mlf file $ouFile\n";
+binmode OUTPUTFILE, ":encoding(utf8)";
+
+
+while (<INFILE>) {
+  s/[^اأإآبتثجحخدذرزسشصضطظعغفقكلمنهويىئءؤة0-9]+/ /g;  ## Removes non Arabic or numbers
+#  s/[^0-9]/ /g;
+#  $_ =~ s/[^اأإآبتثجحخدذرزسشصضطظعغفقكلمنهويىئءؤة0-9]+/ /g;  ## Removes non Arabic or numbers
+#  s/[0-9]+//g;
+  my $BW = convertUTF8ToBuckwalter ($_);
+  print OUTPUTFILE "$BW"."\n";
+}
+close INFILE;
+close OUTPUTFILE;
+
+
+
+# this function is copied from MADATools.pm: MADA Tools
+ sub convertUTF8ToBuckwalter {
+
+    my ($line)= (@_);
+#$line = $UTF8_ENCODING_OBJ->decode($line);  ## Same as Encode::decode("utf8",$line), but faster since object already created
+#$line =~ s/[^اأإآبتثجحخدذرزسشصضطظعغفقكلمنهويىئءؤة0-9]+//g;  ## Removes non Arabic or numbers
+#    $line =~ s/[0-9]//g;
+    $line =~ s/\x{0621}/\'/g;   ## HAMZA
+    $line =~ s/\x{0622}/\|/g;   ## ALEF WITH MADDA ABOVE
+    $line =~ s/\x{0623}/\>/g;   ## ALEF WITH HAMZA ABOVE
+    $line =~ s/\x{0624}/\&/g;   ## WAW WITH HAMZA ABOVE
+    $line =~ s/\x{0625}/\</g;   ## ALEF WITH HAMZA BELOW
+    $line =~ s/\x{0626}/\}/g;   ## YEH WITH HAMZA ABOVE
+    $line =~ s/\x{0627}/A/g;    ## ALEF
+    $line =~ s/\x{0628}/b/g;    ## BEH
+    $line =~ s/\x{0629}/p/g;    ## TEH MARBUTA
+    $line =~ s/\x{062A}/t/g;    ## TEH
+    $line =~ s/\x{062B}/v/g;    ## THEH
+    $line =~ s/\x{062C}/j/g;    ## JEEM
+    $line =~ s/\x{062D}/H/g;    ## HAH
+    $line =~ s/\x{062E}/x/g;    ## KHAH
+    $line =~ s/\x{062F}/d/g;    ## DAL
+    $line =~ s/\x{0630}/\*/g;   ## THAL
+    $line =~ s/\x{0631}/r/g;    ## REH
+    $line =~ s/\x{0632}/z/g;    ## ZAIN
+    $line =~ s/\x{0633}/s/g;    ## SEEN
+    $line =~ s/\x{0634}/\$/g;   ## SHEEN
+    $line =~ s/\x{0635}/S/g;    ## SAD
+    $line =~ s/\x{0636}/D/g;    ## DAD
+    $line =~ s/\x{0637}/T/g;    ## TAH
+    $line =~ s/\x{0638}/Z/g;    ## ZAH
+    $line =~ s/\x{0639}/E/g;    ## AIN
+    $line =~ s/\x{063A}/g/g;    ## GHAIN
+    $line =~ s/\x{0640}/_/g;    ## TATWEEL
+    $line =~ s/\x{0641}/f/g;    ## FEH
+    $line =~ s/\x{0642}/q/g;    ## QAF
+    $line =~ s/\x{0643}/k/g;    ## KAF
+    $line =~ s/\x{0644}/l/g;    ## LAM
+    $line =~ s/\x{0645}/m/g;    ## MEEM
+    $line =~ s/\x{0646}/n/g;    ## NOON
+    $line =~ s/\x{0647}/h/g;    ## HEH
+    $line =~ s/\x{0648}/w/g;    ## WAW
+    $line =~ s/\x{0649}/Y/g;    ## ALEF MAKSURA
+    $line =~ s/\x{064A}/y/g;    ## YEH
+
+    ## Diacritics
+    $line =~ s/\x{064B}/F/g;    ## FATHATAN
+    $line =~ s/\x{064C}/N/g;    ## DAMMATAN
+    $line =~ s/\x{064D}/K/g;    ## KASRATAN
+    $line =~ s/\x{064E}/a/g;    ## FATHA
+    $line =~ s/\x{064F}/u/g;    ## DAMMA
+    $line =~ s/\x{0650}/i/g;    ## KASRA
+    $line =~ s/\x{0651}/\~/g;   ## SHADDA
+    $line =~ s/\x{0652}/o/g;    ## SUKUN
+    $line =~ s/\x{0670}/\`/g;   ## SUPERSCRIPT ALEF
+
+    $line =~ s/\x{0671}/\{/g;   ## ALEF WASLA
+    $line =~ s/\x{067E}/P/g;    ## PEH
+    $line =~ s/\x{0686}/J/g;    ## TCHEH
+    $line =~ s/\x{06A4}/V/g;    ## VEH
+    $line =~ s/\x{06AF}/G/g;    ## GAF
+
+
+    ## Punctuation should really be handled by the utf8 cleaner or other method
+#   $line =~ s/\xa2/\,/g; # comma
+#    $line =~ s//\,/g; # comma
+#    $line =~ s//\,/g;
+#    $line =~ s//\;/g; # semicolon
+#    $line =~ s//\?/g; # questionmark
+
+    return $line;
+}
diff --git a/egs/gale_arabic/s5d/local/prepare_arabic_giga.sh b/egs/gale_arabic/s5d/local/prepare_arabic_giga.sh
new file mode 100755
index 00000000000..7063f4774c6
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_arabic_giga.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+data_dir=/export/corpora/LDC/LDC2011T11/
+arabic_giga_dir=Arabic_giga/
+
+[ ! -d $arabic_giga_dir ] && mkdir $arabic_giga_dir
+
+#for x in `find $data_dir -name "*.gz"`; do
+#  echo $x
+#  dest_file=`basename $x .gz`
+#  gunzip -c $x > ${arabic_giga_dir}/${dest_file}.orig
+#done
+
+#for x in $arabic_giga_dir/*.orig; do
+#  echo "Processing $x"
+#  local/arabic_convert.py $x > ${x}.mid
+#done
+
+for x in $arabic_giga_dir/*.mid; do
+  echo "Processing $x"
+  local/normalize_transcript_BW.pl $x ${x}.norm
+done
diff --git a/egs/gale_arabic/s5d/local/prepare_data.sh b/egs/gale_arabic/s5d/local/prepare_data.sh
new file mode 100755
index 00000000000..d09ff00acd0
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_data.sh
@@ -0,0 +1,198 @@
+#!/bin/bash 
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Apache 2.0
+
+# GALE Arabic phase 2 Conversation Speech
+dir1=/export/corpora/LDC/LDC2013S02/         
+dir2=/export/corpora/LDC/LDC2013S07/         
+text1=/export/corpora/LDC/LDC2013T04/
+text2=/export/corpora/LDC/LDC2013T17/
+# GALE Arabic phase 2 News Speech
+dir3=/export/corpora/LDC/LDC2014S07/         
+dir4=/export/corpora/LDC/LDC2015S01/         
+text3=/export/corpora/LDC/LDC2014T17/        
+text4=/export/corpora/LDC/LDC2015T01/        
+# GALE Arabic phase 3 Conversation Speech
+dir5=/export/corpora/LDC/LDC2015S11/         
+dir6=/export/corpora/LDC/LDC2016S01/         
+text5=/export/corpora/LDC/LDC2015T16/        
+text6=/export/corpora/LDC/LDC2016T06/        
+# GALE Arabic phase 3 News Speech
+dir7=/export/corpora/LDC/LDC2016S07/          
+dir8=/export/corpora/LDC/LDC2017S02/          
+text7=/export/corpora/LDC/LDC2016T17/         
+text8=/export/corpora/LDC/LDC2017T04/         
+# GALE Arabic phase 4 Conversation Speech
+dir9=/export/corpora/LDC/LDC2017S15/          
+text9=/export/corpora/LDC/LDC2017T12/         
+# GALE Arabic phase 4 News Speech
+dir10=/export/corpora/LDC/LDC2018S05/         
+text10=/export/corpora/LDC/LDC2018T14/        
+
+mgb2_dir=""
+process_xml=""
+mer=80
+
+. ./utils/parse_options.sh
+
+gale_data=GALE
+
+mkdir -p $gale_data 
+# check that sox is installed 
+which sox  &>/dev/null
+if [[ $? != 0 ]]; then 
+ echo "$0: sox is not installed"; exit 1
+fi
+
+for dvd in $dir1 $dir2 $dir3 $dir4 $dir5 $dir6 $dir7 $dir8 $dir9 $dir10; do
+  dvd_full_path=$(utils/make_absolute.sh $dvd)
+  if [[ ! -e $dvd_full_path ]]; then 
+    echo "$0: missing $dvd_full_path"; exit 1;
+  fi
+  find $dvd_full_path \( -name "*.wav" -o -name "*.flac" \)  | while read file; do
+    id=$(basename $file | awk '{gsub(".wav","");gsub(".flac","");print}')
+    echo "$id sox $file -r 16000 -t wav - |"
+  done 
+done | sort -u > $gale_data/wav.scp
+echo "$0:data prep audio succeded"
+
+gale_data=$(utils/make_absolute.sh "GALE" );
+top_pwd=`pwd`
+txtdir=$gale_data/txt
+mkdir -p $txtdir; cd $txtdir
+
+for cdx in $text1 $text2 $text3 $text4 $text5 $text6 $text7 $text8 $text9 $text10; do
+  echo "$0:Preparing $cdx"
+  if [[ $cdx  == *.tgz ]] ; then
+     tar -xvf $cdx
+  elif [  -d "$cdx" ]; then
+    ln -s $cdx `basename $cdx`
+  else
+    echo "$0:I don't really know what I shall do with $cdx " >&2
+  fi
+done
+
+find -L . -type f -name "*.tdf" | while read file; do
+sed '1,3d' $file  # delete the first 3 lines
+done >  all.tmp$$
+
+perl -e '
+    ($inFile,$idFile,$txtFile)= split /\s+/, $ARGV[0];
+    open(IN, "$inFile");
+    open(ID, ">$idFile");
+    open(TXT, ">$txtFile");
+    while (<IN>) {
+      @arr= split /\t/,$_;
+      $start=sprintf ("%0.3f",$arr[2]);$rStart=$start;$start=~s/\.//; $start=~s/^0+$/0/; $start=~s/^0+([^0])/$1/; # remove zeros at the beginning
+      $end=sprintf ("%0.3f",$arr[3]);$rEnd=$end;$end=~s/^0+([^0])/$1/;$end=~s/\.//;
+      if ( ($arr[11] !~ m/report/) && ($arr[11] !~ m/conversational/) ){$arr[11]="UNK";}
+      $id="$arr[11] $arr[0] $arr[0]_${start}_${end} $rStart $rEnd\n";
+      next if ($rStart == $rEnd);
+      $id =~ s/.sph//g;
+      print ID $id;
+      print TXT "$arr[7]\n";
+ }' "all.tmp$$ allid.tmp$$ contentall.tmp$$"
+
+perl ${top_pwd}/local/normalize_transcript_BW.pl contentall.tmp$$ contentall.buck.tmp$$
+paste allid.tmp$$ contentall.buck.tmp$$ | sed 's: $::' | awk '{if (NF>5) {print $0}}'  > all_1.tmp$$
+
+
+awk '{$1="";print $0}' all_1.tmp$$ | sed 's:^ ::' > $gale_data/all
+awk '{if ($1 == "report") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' >  $gale_data/report
+awk '{if ($1 == "conversational") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $gale_data/conversational
+
+cd ..;
+rm -fr $txtdir
+cd $top_pwd
+
+# prepare MGB2 data
+if [ ! -z $mgb2_dir ]; then
+  echo "preparing MGB2 data"
+
+  xmldir=$mgb2_dir/train/xml/bw
+  output_dir=$gale_data/mgb2
+  mkdir -p $output_dir
+
+  if [ -f $output_dir/wav.scp ]; then
+    mkdir -p $output_dir/.backup
+    mv $output_dir/wav.scp ${output_dir}/.backup
+    mv $output_dir/mgb2 ${output_dir}/.backup
+  fi
+
+  if [ $process_xml == 'python' ]; then
+    echo "using python to process xml file"
+    # check if bs4 and lxml are installed in python
+    local/check_tools.sh
+    ls $mgb2_dir/train/wav/ | while read name; do
+      basename=`basename -s .wav $name`
+      [ ! -e $xmldir/$basename.xml ] && echo "Missing $xmldir/$basename.xml" && exit 1
+      local/process_xml.py $xmldir/$basename.xml - | local/add_to_datadir.py $basename $train_dir $mer
+      echo $basename $db_dir/train/wav/$basename.wav >> $output_dir/wav.scp
+    done
+  elif [ $process_xml == 'xml' ]; then
+    # check if xml binary exsits
+    if command -v xml >/dev/null 2>/dev/null; then
+      echo "using xml"
+      ls $mgb2_dir/train/wav/ | while read name; do
+        basename=`basename -s .wav $name`
+        [ ! -e $xmldir/$basename.xml ] && echo "Missing $xmldir/$basename.xml" && exit 1
+        xml sel -t -m '//segments[@annotation_id="transcript_align"]' -m "segment" -n -v  "concat(@who,' ',@starttime,' ',@endtime,' ',@WMER,' ')" -m "element" -v "concat(text(),' ')" $xmldir/$basename.xml | local/add_to_datadir.py $basename $output_dir $mer
+        echo $basename $db_dir/train/wav/$basename.wav >> $output_dir/wav.scp
+      done
+    else
+      echo "xml not found, you may use python by '--process-xml python'"
+      exit 1;
+    fi
+  else
+    # invalid option
+    echo "$0: invalid option for --process-xml, choose from 'xml' or 'python'"
+    exit 1;
+  fi
+
+  # add mgb2 data to training data (GALE/all and wav.scp)
+  mv $gale_data/all $gale_data/all.gale 
+  cat $gale_data/all.gale $output_dir/mgb2 > $gale_data/all
+  cat $output_dir/wav.scp >> $gale_data/wav.scp
+
+  # for dict preparation 
+  grep -v -f local/test/dev_all $gale_data/all.gale | \
+         grep -v -f local/test/test_p2 | \
+         grep -v -f local/test/mt_eval_all | \
+         grep -v -f local/bad_segments > $gale_data/all.gale.train 
+  awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $gale_data/all.gale.train | sort -u > $gale_data/gale_text
+echo "$0:MGB2 data added to training data"
+fi
+
+
+echo "$0:data prep text succeeded"
+
+mkdir -p data
+dir=$(utils/make_absolute.sh data/)
+grep -f local/test/dev_all $gale_data/all | grep -v -f local/bad_segments > $gale_data/all.dev
+grep -f local/test/test_p2 $gale_data/all | grep -v -f local/bad_segments > $gale_data/all.test_p2
+grep -f local/test/mt_eval_all $gale_data/all | grep -v -f local/bad_segments > $gale_data/all.mt_all
+grep -v -f local/test/dev_all $gale_data/all | \
+       grep -v -f local/test/test_p2 | \
+       grep -v -f local/test/mt_eval_all | \
+       grep -v -f local/bad_segments > $gale_data/all.train 
+
+for x in dev test_p2 mt_all train; do
+ outdir=data/$x
+ file=$gale_data/all.$x 
+ mkdir -p $outdir
+ awk '{print $2 " " $2}' $file | sort -u > $outdir/utt2spk 
+ cp -pr $outdir/utt2spk $outdir/spk2utt
+ awk '{print $2 " " $1 " " $3 " " $4}' $file  | sort -u > $outdir/segments
+ awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $file | sort -u > $outdir/text
+done 
+
+grep -f local/test/dev_all $gale_data/wav.scp > $dir/dev/wav.scp
+grep -f local/test/test_p2 $gale_data/wav.scp > $dir/test_p2/wav.scp
+grep -f local/test/mt_eval_all $gale_data/wav.scp > $dir/mt_all/wav.scp
+
+cat $gale_data/wav.scp | awk -v seg=$dir/train/segments 'BEGIN{while((getline<seg) >0) {seen[$2]=1;}}
+ {if (seen[$1]) { print $0}}' > $dir/train/wav.scp
+ 
+echo "$0:data prep split succeeded"
+exit 0
diff --git a/egs/gale_arabic/s5d/local/prepare_dict.sh b/egs/gale_arabic/s5d/local/prepare_dict.sh
new file mode 100755
index 00000000000..31bae09ff31
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_dict.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+# Copyright 2017 QCRI (author: Ahmed Ali)
+# Apache 2.0
+# This script prepares the dictionary.
+
+set -e
+dir=data/local/dict
+lexicon_url1="http://alt.qcri.org//resources/speech/dictionary/ar-ar_grapheme_lexicon_2016-02-09.bz2";
+lexicon_url2="http://alt.qcri.org//resources/speech/dictionary/ar-ar_lexicon_2014-03-17.txt.bz2";
+stage=0
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+mkdir -p $dir data/local/lexicon_data
+
+if [ $stage -le 0 ]; then
+  echo "$0: Downloading text for lexicon... $(date)."
+#  wget -P data/local/lexicon_data $lexicon_url1
+#  wget -P data/local/lexicon_data $lexicon_url2
+#  bzcat data/local/lexicon_data/ar-ar_grapheme_lexicon_2016-02-09.bz2  | sed '1,3d' | awk '{print $1}'  >  data/local/lexicon_data/grapheme_lexicon
+#  bzcat data/local/lexicon_data/ar-ar_lexicon_2014-03-17.txt.bz2 | sed '1,3d' | awk '{print $1}' >>  data/local/lexicon_data/grapheme_lexicon
+  gale_data=GALE
+  text=data/train/text
+  [ -f $gale_data/gale_text ] && text=$gale_data/gale_text
+  echo "text is $text"
+  cat $text | cut -d ' ' -f 2- | tr -s " " "\n" | sort -u >> data/local/lexicon_data/grapheme_lexicon
+fi
+
+
+if [ $stage -le 0 ]; then
+  echo "$0: processing lexicon text and creating lexicon... $(date)."
+  # remove vowels and  rare alef wasla
+  grep -v [0-9] data/local/lexicon_data/grapheme_lexicon |  sed -e 's:[FNKaui\~o\`]::g' -e 's:{:}:g' | sort -u > data/local/lexicon_data/processed_lexicon
+  local/prepare_lexicon.py
+fi
+
+cut -d' ' -f2- $dir/lexicon.txt | sed 's/SIL//g' | tr ' ' '\n' | sort -u | sed '/^$/d' >$dir/nonsilence_phones.txt || exit 1;
+
+sed -i '1i<UNK> UNK' $dir/lexicon.txt
+
+echo UNK >> $dir/nonsilence_phones.txt
+
+echo '<sil> SIL' >> $dir/lexicon.txt
+
+echo SIL > $dir/silence_phones.txt
+
+echo SIL >$dir/optional_silence.txt
+
+echo -n "" >$dir/extra_questions.txt
+
+echo "$0: Dictionary preparation succeeded"
diff --git a/egs/gale_arabic/s5d/local/prepare_giga.sh b/egs/gale_arabic/s5d/local/prepare_giga.sh
new file mode 100755
index 00000000000..f7345803274
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_giga.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+giga_dir=$1
+
+source_dir=/export/corpora/LDC/LDC2011T11/arb_gw_5
+num=2000000
+suffix="2000k"
+
+[ ! -d $source_dir ] && echo "source Arabic Gigaword does not exist." && exit 1;
+
+[ -f $giga_dir/text ] && mv $giga_dir/text $giga_dir/text.bkp
+mkdir -p $giga_dir/
+
+find $source_dir/data/ -name "*.gz" | while read file; do
+  gunzip -c $file | local/arabic_convert.py - >> $giga_dir/text.arb
+done
+
+head -n $num $giga_dir/text.arb > $giga_dir/text.arb.${suffix}
+local/normalize_transcript_BW.pl $giga_dir/text.arb.${suffix} $giga_dir/text.${suffix}
+
+echo "finish preparing Arabic Gigaword"
+exit 0
diff --git a/egs/gale_arabic/s5d/local/prepare_lexicon.py b/egs/gale_arabic/s5d/local/prepare_lexicon.py
new file mode 100755
index 00000000000..215541585eb
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_lexicon.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+
+# Copyright      2018  Ashish Arora
+# Apache 2.0
+
+# This script prepares lexicon.
+
+import argparse
+import os
+
+parser = argparse.ArgumentParser(description="""Creates the list of characters and words in lexicon""")
+args = parser.parse_args()
+
+### main ###
+lex = {}
+text_path = os.path.join('data','local', 'lexicon_data', 'processed_lexicon')
+with open(text_path, 'r', encoding='utf-8') as f:
+    for line in f:
+        line = line.strip()
+        characters = list(line)
+        characters = " ".join(['V' if char == '*' else char for char in characters])
+        lex[line] = characters
+
+with open(os.path.join('data','local','dict', 'lexicon.txt'), 'w', encoding='utf-8') as fp:
+    for key in sorted(lex):
+        fp.write(key + "  " + lex[key] + "\n")
diff --git a/egs/gale_arabic/s5d/local/prepare_lm.sh b/egs/gale_arabic/s5d/local/prepare_lm.sh
new file mode 100755
index 00000000000..a4f38a3da13
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_lm.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+
+# Copyright 2012  Vassil Panayotov
+#           2017  Ewald Enzinger
+# Apache 2.0
+
+. ./path.sh || exit 1
+
+echo "=== Building a language model ..."
+
+dir=data/local/lm/
+text=data/train/text
+lexicon=data/local/dict/lexicon.txt
+arabic_giga_dir=Arabic_giga
+# Language model order
+order=4
+
+. utils/parse_options.sh
+
+# Prepare a LM training corpus from the transcripts
+mkdir -p $dir
+
+for f in "$text" "$lexicon"; do
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+loc=`which ngram-count`;
+if [ -z $loc ]; then
+  if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
+    sdir=$KALDI_ROOT/tools/srilm/bin/i686-m64 
+  else
+    sdir=$KALDI_ROOT/tools/srilm/bin/i686
+  fi
+  if [ -f $sdir/ngram-count ]; then
+    echo Using SRILM tools from $sdir
+    export PATH=$PATH:$sdir
+  else
+    echo You appear to not have SRILM tools installed, either on your path,
+    echo or installed in $sdir.  See tools/install_srilm.sh for installation
+    echo instructions.
+    exit 1
+  fi
+fi
+
+
+#cat Arabic_giga/text.1000000 > $dir/text.txt
+[ -f $dir/text.txt ] && rm $dir/text.txt && echo "deleted"
+cat data/train/text | cut -d " " -f2- > $dir/text.txt
+cat Arabic_giga/text.all >> $dir/text.txt
+echo "text.txt contains `wc -l $dir/text.txt` lines"
+cut -d' ' -f1 $lexicon > $dir/wordlist
+
+ngram-count -text $dir/text.txt -order $order -limit-vocab -vocab $dir/wordlist \
+  -unk -map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.$order.all.gz
+
+ngram -lm $dir/lm.$order.all.gz -ppl $dir/dev.txt
+echo "*** Finished building the LM model!"
diff --git a/egs/gale_arabic/s5d/local/prepare_lm_pocolm.sh b/egs/gale_arabic/s5d/local/prepare_lm_pocolm.sh
new file mode 100755
index 00000000000..e85f97d2faf
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_lm_pocolm.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+
+# Dongji Gao
+
+set -e 
+set -o pipefail
+set -u
+
+stage=0
+
+dir=data/local/pocolm
+cmd=run.pl
+order=4
+extra_text=""
+
+. ./utils/parse_options.sh
+
+lm_dir=${dir}/data
+lm_name=10m_${order}
+
+mkdir -p $dir
+. ./path.sh
+export PATH=$KALDI_ROOT/tools/pocolm/scripts:$PATH
+
+if [ $stage -le 0 ]; then
+  mkdir -p ${dir}/data
+  mkdir -p ${dir}/data/text
+
+  echo "$0: Getting the data sources"
+
+  rm ${dir}/data/text/* 2>/dev/null || true
+
+  cat data/dev/text | cut -d ' ' -f2- > ${dir}/data/text/dev.txt
+  cat data/train/text | cut -d ' ' -f2- > ${dir}/data/text/train.txt
+  [ ! -z $extra_text ] &&  [ -f $extra_text ]  && cp $extra_text ${dir}/data/text/giga.txt
+#  cp temp/text.2000k ${dir}/data/text/arb_giga_2000k.txt
+fi
+
+if [ $stage -le 1 ]; then
+  mkdir -p ${dir}/data/work
+  if [ ! -f ${dir}/data/work/word_counts/.done ]; then
+    get_word_counts.py ${dir}/data/text ${dir}/data/work/word_counts
+    touch ${dir}/data/work/word_counts/.done
+  fi
+fi
+
+lexicon=data/local/dict/lexicon.txt
+[ ! -f $lexicon ] && echo "$0: No such file $lexicon" && exit 1;
+
+wordlist=${dir}/data/work/wordlist
+if [ $stage -le 2 ]; then
+  cut -d ' ' -f1 $lexicon > $wordlist
+  wordlist_to_vocab.py --unk-symbol="<UNK>" $wordlist > ${dir}/data/work/vocab_wordlist.txt
+  touch ${dir}/data/work/.vocab_wordlist.txt.done
+fi
+
+unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm
+echo "$unpruned_lm_dir"
+
+if [ $stage -le 3 ]; then
+  echo "$0: training the unpruned LM"
+  $cmd ${unpruned_lm_dir}/log/train.log \
+    train_lm.py --wordlist=$wordlist --num-split=20 --warm-start-ratio=20 \
+                --limit-unk-history=false \
+                ${dir}/data/text $order ${lm_dir}/work ${unpruned_lm_dir}
+
+  for x in dev; do
+    $cmd ${unpruned_lm_dir}/log/compute_data_prob_${x}.log \
+      get_data_prob.py ${dir}/data/text/${x}.txt ${unpruned_lm_dir}
+    cat ${unpruned_lm_dir}/log/compute_data_prob_${x}.log | grep -F '[perplexity'
+  done
+
+  format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > pocolm/lm.$order.gz
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: pruning the LM (to larger size)"
+  size=100000000
+  prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big
+
+  for x in dev; do
+    echo "============ compute perlexity for big lm ================="
+    get_data_prob.py ${dir}/data/text/${x}.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity'
+  done
+
+  format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > pocolm/lm.prune.${order}.2big.gz
+fi
diff --git a/egs/gale_arabic/s5d/local/process_xml.py b/egs/gale_arabic/s5d/local/process_xml.py
new file mode 100755
index 00000000000..3c6eed452ac
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/process_xml.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+from bs4 import BeautifulSoup
+import sys
+import argparse
+
+def get_args():
+    parser = argparse.ArgumentParser(description="""This script process xml file.""")
+    parser.add_argument("xml", type=str, help="""Input xml file""") 
+    parser.add_argument("output", type=str, help="""output text file""") 
+    args = parser.parse_args()
+    return args
+
+def process_xml(xml_handle, output_handle):
+    soup = BeautifulSoup(xml_handle, "xml")
+    for segment in soup.find_all("segment"):
+        who = segment["who"]
+        starttime = segment["starttime"]
+        endtime = segment["endtime"]
+        WMER = segment["WMER"]
+        text = " ".join([element.string for element in segment.find_all("element") if element.string != None])
+        output_handle.write("{} {} {} {} {}\n".format(who, starttime, endtime, WMER, text))
+    xml_handle.close()
+    output_handle.close()
+
+def main():
+    args = get_args()
+
+    xml_handle = open(args.xml, 'r')
+    output_handle = sys.stdout if args.output == '-' else open(args.output, 'w')
+
+    process_xml(xml_handle, output_handle)
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/gale_arabic/s5d/local/reorder.py b/egs/gale_arabic/s5d/local/reorder.py
new file mode 100755
index 00000000000..4255b6b642f
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/reorder.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from pathlib import Path
+
+asr_result = sys.argv[1]
+mt_dir = sys.argv[2]
+file = sys.argv[3]
+output_dir = sys.argv[4]
+
+def get_asr_dict(file):
+    asr_dict = dict()
+    with open(file, 'r') as f:
+        for line in f.readlines():
+            line_split = line.split()
+            utt_id = line_split[0]
+            sentence = " ".join(line_split[1:])
+            assert(utt_id not in asr_dict)
+            asr_dict[utt_id] = sentence
+
+    return asr_dict
+
+def get_utt_id(line_list):
+    start_time = "".join(line_list[2].split('.'))
+    start_digit = len(line_list[2].split('.')[1])
+    start_dif = 3 - start_digit
+
+    end_time = "".join(line_list[3].split('.'))
+    end_digit = len(line_list[3].split('.')[1])
+    end_dif = 3 - end_digit
+
+    if start_time == '00':
+        start_time = '0'
+    else:
+        start_time += "0" * start_dif
+    end_time += "0" * end_dif
+
+    utt_id = line_list[0] + "_" + start_time + "_" + end_time
+    return utt_id
+
+def write_result(asr_dict, file, mt_dir, output_dir):
+    # create output directory
+    output_path = Path(output_dir)
+    output_path.mkdir(exist_ok=True)
+
+    # read each file
+    with open(file, 'r') as f:
+        for line in f.readlines():
+            file_name = line.split()[0]
+            output_file_name = file_name.split('.')[0]
+            with open(mt_dir+"/"+file_name, 'r') as input_file:
+                with open(output_dir+"/"+output_file_name+".txt", 'w') as output_file:
+                    for line in input_file.readlines():
+                        line_split = line.split()
+                        utt_id = get_utt_id(line_split)
+                        if utt_id not in asr_dict:
+                            print(utt_id)
+                        else:
+                            output_file.write(utt_id + " " + asr_dict[utt_id] + "\n")
+
+def main():
+    asr_dict = get_asr_dict(asr_result)
+    write_result(asr_dict, file, mt_dir, output_dir)
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/gale_arabic/s5d/local/reverse.sh b/egs/gale_arabic/s5d/local/reverse.sh
new file mode 100755
index 00000000000..c09371b15d6
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/reverse.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+import sys
+
+text = sys.argv[1]
+output = sys.argv[2]
+
+with open(text, 'r') as text:
+    with open(output, 'w') as output:
+        for line in text.readlines():
+            sentence = list()
+            line_list = line.split()
+            line_list.reverse()
+            for word in line_list:
+                sentence.append("".join(reversed(word)))
+            output.write(" ".join(sentence)+"\n")
diff --git a/egs/gale_arabic/s5d/local/rnnlm/run_tdnn_lstm.sh b/egs/gale_arabic/s5d/local/rnnlm/run_tdnn_lstm.sh
new file mode 100755
index 00000000000..18e6b784fef
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/rnnlm/run_tdnn_lstm.sh
@@ -0,0 +1,134 @@
+#!/usr/bin/env bash
+
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)
+#           2015  Guoguo Chen
+#           2017  Hainan Xu
+#           2017  Xiaohui Zhang
+
+# This script trains LMs on the swbd LM-training data.
+
+dir=exp/rnnlm_lstm_1e
+embedding_dim=1024
+lstm_rpd=256
+lstm_nrpd=256
+stage=-10
+train_stage=-10
+
+# variables for lattice rescoring
+run_lat_rescore=true
+run_nbest_rescore=false
+
+ac_model_dir=exp/chain/tdnn_1a_sp/
+decode_dir_suffix=rnnlm_1a
+ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
+              # if it's set, it merges histories in the lattice if they share
+              # the same ngram history and this prevents the lattice from 
+              # exploding exponentially
+pruned_rescore=true
+
+. ./cmd.sh
+. ./utils/parse_options.sh
+
+text=data/train/text
+giga_text=giga/text.2000k
+lexicon=data/local/dict/lexiconp.txt
+text_dir=data/rnnlm/text_nosp_1a
+mkdir -p $dir/config
+set -e
+
+for f in $text $lexicon $giga_text; do
+  [ ! -f $f ] && \
+    echo "$0: expected file $f to exist;" && exit 1
+done
+
+if [ $stage -le 0 ]; then
+  mkdir -p $text_dir
+  echo -n >$text_dir/dev.txt
+  # hold out one in every 50 lines as dev data.
+  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/gale.txt
+  cp $giga_text > $text_dir/giga.txt
+fi
+
+if [ $stage -le 1 ]; then
+  cp data/lang/words.txt $dir/config/
+  n=`cat $dir/config/words.txt | wc -l`
+  echo "<brk> $n" >> $dir/config/words.txt
+
+  # words that are not present in words.txt but are in the training or dev data, will be
+  # mapped to <SPOKEN_NOISE> during training.
+  echo "<UNK>" >$dir/config/oov.txt
+
+  cat > $dir/config/data_weights.txt <<EOF
+gale   3   1.0
+giga   1   1.0
+EOF
+
+  rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
+                             --unk-word="<UNK>" \
+                             --data-weights-file=$dir/config/data_weights.txt \
+                             $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
+
+  # choose features
+  rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
+                           --use-constant-feature=true \
+                           --special-words='<s>,</s>,<brk>' \
+                           $dir/config/words.txt > $dir/config/features.txt
+
+  cat >$dir/config/xconfig <<EOF
+input dim=$embedding_dim name=input
+relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1))
+fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
+relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3))
+fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
+relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3))
+output-layer name=output include-log-softmax=false dim=$embedding_dim
+EOF
+  rnnlm/validate_config_dir.sh $text_dir $dir/config
+fi
+
+if [ $stage -le 2 ]; then
+  rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir
+fi
+
+if [ $stage -le 3 ]; then
+  rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
+                  --stage $train_stage --num-epochs 4 --cmd "$train_cmd" $dir
+fi
+
+LM=lang_test # using the 4-gram const arpa file as old lm
+if [ $stage -le 4 ] && $run_lat_rescore; then
+  echo "$0: Perform lattice-rescoring on $ac_model_dir"
+#  LM=sw1_tg # if using the original 3-gram G.fst as old lm
+  pruned=
+  if $pruned_rescore; then
+    pruned=_pruned
+  fi
+  for decode_set in test_p2; do
+    decode_dir=exp/chain/tdnn_1a_sp/decode_test_p2
+
+    # Lattice rescoring
+    rnnlm/lmrescore$pruned.sh \
+      --cmd "$decode_cmd --mem 4G" \
+      --weight 0.45 --max-ngram-order $ngram_order \
+      data/$LM $dir \
+      data/${decode_set}_hires ${decode_dir} \
+      ${decode_dir}_${decode_dir_suffix}_0.45
+  done
+fi
+echo "finish rnnlm-rescoring and exit" && exit 1;
+
+if [ $stage -le 5 ] && $run_nbest_rescore; then
+  echo "$0: Perform nbest-rescoring on $ac_model_dir"
+  for decode_set in test; do
+    decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}_looped
+
+    # Lattice rescoring
+    rnnlm/lmrescore_nbest.sh \
+      --cmd "$decode_cmd --mem 4G" --N 20 \
+      0.8 data/lang_$LM $dir \
+      data/${decode_set}_hires ${decode_dir} \
+      ${decode_dir}_${decode_dir_suffix}_nbest
+  done
+fi
+
+exit 0
diff --git a/egs/gale_arabic/s5d/local/score.sh b/egs/gale_arabic/s5d/local/score.sh
new file mode 100755
index 00000000000..6168f38a929
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/score.sh
@@ -0,0 +1,6 @@
+
+#!/usr/bin/env bash
+
+
+steps/scoring/score_kaldi_wer.sh "$@"
+steps/scoring/score_kaldi_cer.sh --stage 2 "$@"
diff --git a/egs/gale_arabic/s5d/local/split_wer.sh b/egs/gale_arabic/s5d/local/split_wer.sh
new file mode 100755
index 00000000000..c4c323003a3
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/split_wer.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+
+# Report WER for reports and conversational
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Apache 2.0
+
+if [ $# -ne 1 ]; then
+   echo "Arguments should be the gale folder, see ../run.sh for example."
+   exit 1;
+fi
+
+[ -f ./path.sh ] && . ./path.sh
+
+
+galeFolder=$(utils/make_absolute.sh $1)
+symtab=./data/lang/words.txt
+find exp/ -maxdepth 3 -type d -name decode\* > list_decode$$
+
+#split the test set per type:
+awk '{print $2}' $galeFolder/all.test | sort -u > $galeFolder/test_id$$
+
+# generate the report test set
+awk '{print $2}' $galeFolder/report | sort -u  > $galeFolder/report_id$$
+comm -1 -2 $galeFolder/test_id$$ $galeFolder/report_id$$ > $galeFolder/report.test
+
+# generate the conversational test set
+awk '{print $2}' $galeFolder/conversational | sort -u  > $galeFolder/conversational_id$$
+
+comm -1 -2 $galeFolder/test_id$$ $galeFolder/conversational_id$$ > $galeFolder/conversational.test
+
+rm -fr $galeFolder/test_id$$ $galeFolder/report_id$$ $galeFolder/conversational_id$$
+
+min_lmwt=7
+max_lmwt=20
+cat list_decode$$ | while read dir; do
+ for type in report conversational; do
+ #echo "Processing: $dir $type"
+  rm -fr $dir/scoring_$type
+  cp -pr $dir/scoring  $dir/scoring_$type
+  ( cd $dir/scoring_$type;
+    for x in *.tra test_filt.txt; do
+      sort -u $x > tmp$$
+      join tmp$$ $galeFolder/${type}.test > $x
+      rm -fr tmp$$
+    done
+   )
+
+utils/run.pl LMWT=$min_lmwt:$max_lmwt $dir/scoring_$type/log/score.LMWT.log \
+   cat $dir/scoring_${type}/LMWT.tra \| \
+    utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
+    compute-wer --text --mode=present \
+     ark:$dir/scoring_${type}/test_filt.txt  ark,p:- ">&" $dir/wer_${type}_LMWT
+done
+done
+
+
+time=$(date +"%Y-%m-%d-%H-%M-%S")
+echo "RESULTS generated by $USER at $time"
+
+echo "Report Results WER:"
+cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_report_* | utils/best_wer.sh; done | sort -n -k2
+
+echo "Conversational Results WER:"
+cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_conversational_* | utils/best_wer.sh; done | sort -n -k2
+
+echo "Combined Results for Reports and Conversational WER:"
+cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_?? $x/wer_?| utils/best_wer.sh; done | sort -n -k2
+
+rm list_decode$$
+
+
+
diff --git a/egs/gale_arabic/s5d/local/test/dev_all b/egs/gale_arabic/s5d/local/test/dev_all
new file mode 100644
index 00000000000..8b295b5602d
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/dev_all
@@ -0,0 +1,16 @@
+ALBAGHDADYA_BAGHDADYANEWS10_ARB_20080728_100000
+ALHURRA_NEWS10_ARB_20061005_102800
+ALJZ_NEWS15_ARB_20081210_095801
+ALURDUNYA_URDUNYANEWS_ARB_20070424_000000
+ARABIYA_PANORAMA_ARB_20090302_200000
+IRAQIYAH_ECONRPT_ARB_20081210_075801
+LBC_NEWS_ARB_20070322_195800
+SCOLA_EGYPNNSCO_ARB_20070426_035900
+YEMENTV_YEMENNEWS_ARB_20080728_130000
+ALHIWAR_FREEOPINION_ARB_20090519_180000
+ALHURRA_FREEHOUR_ARB_20080731_020000
+ALJZ_TODINTER_ARB_20070811_222800
+ARABIYA_ARABSDEBATE_ARB_20070830_210000
+ARABIYA_THIRDEYE_ARB_20070323_000000
+OMANTV_MORNCOFF_ARB_20070418_000000
+SYRIANTV_CIRCLEVT_ARB_20070813_142801
diff --git a/egs/gale_arabic/s5d/local/test/dev_bc b/egs/gale_arabic/s5d/local/test/dev_bc
new file mode 100644
index 00000000000..2a5c09250d6
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/dev_bc
@@ -0,0 +1,7 @@
+ALHIWAR_FREEOPINION_ARB_20090519_180000
+ALHURRA_FREEHOUR_ARB_20080731_020000
+ALJZ_TODINTER_ARB_20070811_222800
+ARABIYA_ARABSDEBATE_ARB_20070830_210000
+ARABIYA_THIRDEYE_ARB_20070323_000000
+OMANTV_MORNCOFF_ARB_20070418_000000
+SYRIANTV_CIRCLEVT_ARB_20070813_142801
diff --git a/egs/gale_arabic/s5d/local/test/dev_bn b/egs/gale_arabic/s5d/local/test/dev_bn
new file mode 100644
index 00000000000..d3aa6ca77be
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/dev_bn
@@ -0,0 +1,9 @@
+ALBAGHDADYA_BAGHDADYANEWS10_ARB_20080728_100000
+ALHURRA_NEWS10_ARB_20061005_102800
+ALJZ_NEWS15_ARB_20081210_095801
+ALURDUNYA_URDUNYANEWS_ARB_20070424_000000
+ARABIYA_PANORAMA_ARB_20090302_200000
+IRAQIYAH_ECONRPT_ARB_20081210_075801
+LBC_NEWS_ARB_20070322_195800
+SCOLA_EGYPNNSCO_ARB_20070426_035900
+YEMENTV_YEMENNEWS_ARB_20080728_130000
diff --git a/egs/gale_arabic/s5d/local/test/mt_eval_all b/egs/gale_arabic/s5d/local/test/mt_eval_all
new file mode 100644
index 00000000000..5a425ceb382
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/mt_eval_all
@@ -0,0 +1,22 @@
+ALAM_IRAQNOW_ARB_20070208_085800
+ALJZ_TODINTER_ARB_20070205_132800
+SYRIANTV_WEEKFILE_ARB_20070203_142800
+ALAM_WITHEVENT_ARB_20070206_205801
+ALAM_WITHEVENT_ARB_20070227_205800
+ARABIYA_ALARABIYANEWS2_ARB_20070312_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070322_000000
+ARABIYA_LATEHRNEWS_ARB_20070227_000000
+DUBAI_DUBAINEWS2_ARB_20070313_000000
+ABUDHABI_ABUDHNEWS2_ARB_20070228_000000
+ALURDUNYA_URDUNYANEWS_ARB_20070312_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070308_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070316_000000
+ARABIYA_LATEHRNEWS_ARB_20070222_000000
+ARABIYA_PANORAMA_ARB_20070226_000000
+ARABIYA_PANORAMA_ARB_20070306_000000
+ARABIYA_PANORAMA_ARB_20070311_000000
+DUBAI_DUBAINEWS2_ARB_20070227_000000
+DUBAI_DUBAINEWS2_ARB_20070306_000000
+DUBAI_DUBAINEWS2_ARB_20070312_000000
+ALURDUNYA_URDUNYANEWS_ARB_20070326_000000
+ARABIYA_PANORAMA_ARB_20070326_000000
diff --git a/egs/gale_arabic/s5d/local/test/mt_eval_bc b/egs/gale_arabic/s5d/local/test/mt_eval_bc
new file mode 100644
index 00000000000..423a77c543b
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/mt_eval_bc
@@ -0,0 +1,5 @@
+ALAM_IRAQNOW_ARB_20070208_085800
+ALJZ_TODINTER_ARB_20070205_132800
+SYRIANTV_WEEKFILE_ARB_20070203_142800
+ALAM_WITHEVENT_ARB_20070206_205801
+ALAM_WITHEVENT_ARB_20070227_205800
diff --git a/egs/gale_arabic/s5d/local/test/mt_eval_bn b/egs/gale_arabic/s5d/local/test/mt_eval_bn
new file mode 100644
index 00000000000..02542707633
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/mt_eval_bn
@@ -0,0 +1,17 @@
+ARABIYA_ALARABIYANEWS2_ARB_20070312_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070322_000000
+ARABIYA_LATEHRNEWS_ARB_20070227_000000
+DUBAI_DUBAINEWS2_ARB_20070313_000000
+ABUDHABI_ABUDHNEWS2_ARB_20070228_000000
+ALURDUNYA_URDUNYANEWS_ARB_20070312_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070308_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070316_000000
+ARABIYA_LATEHRNEWS_ARB_20070222_000000
+ARABIYA_PANORAMA_ARB_20070226_000000
+ARABIYA_PANORAMA_ARB_20070306_000000
+ARABIYA_PANORAMA_ARB_20070311_000000
+DUBAI_DUBAINEWS2_ARB_20070227_000000
+DUBAI_DUBAINEWS2_ARB_20070306_000000
+DUBAI_DUBAINEWS2_ARB_20070312_000000
+ALURDUNYA_URDUNYANEWS_ARB_20070326_000000
+ARABIYA_PANORAMA_ARB_20070326_000000
diff --git a/egs/gale_arabic/s5d/local/test/test_p2 b/egs/gale_arabic/s5d/local/test/test_p2
new file mode 100644
index 00000000000..d82cf498804
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/test_p2
@@ -0,0 +1,11 @@
+ALAM_WITHEVENT_ARB_20070116_205800
+ALAM_WITHEVENT_ARB_20070130_205800
+ALAM_WITHEVENT_ARB_20070206_205801
+ALAM_WITHEVENT_ARB_20070213_205800
+ALAM_WITHEVENT_ARB_20070227_205800
+ALAM_WITHEVENT_ARB_20070306_205800
+ALAM_WITHEVENT_ARB_20070313_205800
+ARABIYA_FROMIRAQ_ARB_20070216_175800
+ARABIYA_FROMIRAQ_ARB_20070223_175801
+ARABIYA_FROMIRAQ_ARB_20070302_175801
+ARABIYA_FROMIRAQ_ARB_20070309_175800
diff --git a/egs/gale_arabic/s5d/path.sh b/egs/gale_arabic/s5d/path.sh
new file mode 100755
index 00000000000..be11b34cbc6
--- /dev/null
+++ b/egs/gale_arabic/s5d/path.sh
@@ -0,0 +1,5 @@
+export KALDI_ROOT=$(pwd)/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/gale_arabic/s5d/rnnlm b/egs/gale_arabic/s5d/rnnlm
new file mode 120000
index 00000000000..e136939ba72
--- /dev/null
+++ b/egs/gale_arabic/s5d/rnnlm
@@ -0,0 +1 @@
+../../../scripts/rnnlm/
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/run.sh b/egs/gale_arabic/s5d/run.sh
new file mode 100755
index 00000000000..f8fdafe0a77
--- /dev/null
+++ b/egs/gale_arabic/s5d/run.sh
@@ -0,0 +1,163 @@
+#!/bin/bash -e
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+#           2019 Dongji Gao
+# Apache 2.0
+
+# This is the recipe for GALE Arabic speech translation project.
+# It is similar to gale_arabic/s5b but with more training data.
+
+num_jobs=60
+num_decode_jobs=60
+decode_gmm=true
+stage=0
+overwrite=true
+
+# GALE Arabic phase 2 Conversation Speech
+dir1=/export/corpora/LDC/LDC2013S02/          # checked
+dir2=/export/corpora/LDC/LDC2013S07/          # checked (16k)
+text1=/export/corpora/LDC/LDC2013T04/         # checked
+text2=/export/corpora/LDC/LDC2013T17/         # checked
+# GALE Arabic phase 2 News Speech
+dir3=/export/corpora/LDC/LDC2014S07/          # checked (16k)
+dir4=/export/corpora/LDC/LDC2015S01/          # checked (16k)
+text3=/export/corpora/LDC/LDC2014T17/         # checked
+text4=/export/corpora/LDC/LDC2015T01/         # checked
+# GALE Arabic phase 3 Conversation Speech
+dir5=/export/corpora/LDC/LDC2015S11/          # checked (16k)
+dir6=/export/corpora/LDC/LDC2016S01/          # checked (16k)
+text5=/export/corpora/LDC/LDC2015T16/         # checked
+text6=/export/corpora/LDC/LDC2016T06/         # checked
+# GALE Arabic phase 3 News Speech
+dir7=/export/corpora/LDC/LDC2016S07/          # checked (16k)
+dir8=/export/corpora/LDC/LDC2017S02/          # checked (16k)
+text7=/export/corpora/LDC/LDC2016T17/         # checked
+text8=/export/corpora/LDC/LDC2017T04/         # checked
+# GALE Arabic phase 4 Conversation Speech
+dir9=/export/corpora/LDC/LDC2017S15/          # checked (16k)
+text9=/export/corpora/LDC/LDC2017T12/         # checked
+# GALE Arabic phase 4 News Speech
+dir10=/export/corpora/LDC/LDC2018S05/          # checked (16k)
+text10=/export/corpora/LDC/LDC2018T14/         # checked
+
+# Training: 941h Testing: 10.4h
+
+galeData=GALE
+mgb2_dir=""
+giga_dir=""
+
+# preference on how to process xml file (use xml binary or python)
+process_xml=""
+
+run_rnnlm=false
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+           ## This relates to the queue.
+. ./path.sh
+. ./utils/parse_options.sh  # e.g. this parses the above options
+                            # if supplied.
+
+if [ $stage -le 0 ]; then
+
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+
+  echo "$0: Preparing data..."
+
+  options=""
+  [ ! -z $mgb2_dir ] && options="--process-xml python --mgb2-dir $mgb2_dir"
+  local/prepare_data.sh $options
+
+  echo "$0: Preparing lexicon and LM..." 
+  local/prepare_dict.sh
+
+  utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang data/lang
+
+  local/gale_train_lms.sh data/train/text data/local/dict/lexicon.txt data/local/lm $giga_dir  # giga is Arabic Gigawords
+
+  utils/format_lm.sh data/lang data/local/lm/gale_giga.o4g.kn.gz \
+                     data/local/dict/lexicon.txt data/lang_test
+fi
+
+mfccdir=mfcc
+if [ $stage -le 1 ]; then
+  echo "$0: Preparing the test and train feature files..."
+  for x in dev test_p2 mt_all train; do
+    utils/fix_data_dir.sh data/$x
+    steps/make_mfcc.sh --cmd "$train_cmd" --nj $num_jobs \
+      data/$x exp/make_mfcc/$x $mfccdir
+    utils/fix_data_dir.sh data/$x # some files fail to get mfcc for many reasons
+    steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating sub-set and training monophone system"
+  utils/subset_data_dir.sh data/train 10000 data/train.10K || exit 1;
+
+  steps/train_mono.sh --nj 40 --cmd "$train_cmd" \
+    data/train.10K data/lang exp/mono || exit 1;
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Aligning data using monophone system"
+  steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \
+    data/train data/lang exp/mono exp/mono_ali || exit 1;
+
+  echo "$0: training triphone system with delta features"
+  steps/train_deltas.sh --cmd "$train_cmd" \
+    2500 30000 data/train data/lang exp/mono_ali exp/tri1 || exit 1;
+fi
+
+if [ $stage -le 4 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph
+  steps/decode.sh  --nj $num_decode_jobs --cmd "$decode_cmd" \
+    exp/tri1/graph data/dev exp/tri1/decode
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Aligning data and retraining and realigning with lda_mllt"
+  steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \
+    data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
+
+  steps/train_lda_mllt.sh --cmd "$train_cmd" 4000 50000 \
+    data/train data/lang exp/tri1_ali exp/tri2b || exit 1;
+fi
+
+if [ $stage -le 6 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph
+  steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \
+    exp/tri2b/graph data/dev exp/tri2b/decode
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: Aligning data and retraining and realigning with sat_basis"
+  steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \
+    data/train data/lang exp/tri2b exp/tri2b_ali || exit 1;
+
+  steps/train_sat_basis.sh --cmd "$train_cmd" \
+    5000 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1;
+
+  steps/align_fmllr.sh --nj $num_jobs --cmd "$train_cmd" \
+    data/train data/lang exp/tri3b exp/tri3b_ali || exit 1;
+fi
+
+if [ $stage -le 8 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph
+  steps/decode_fmllr.sh --nj $num_decode_jobs --cmd \
+    "$decode_cmd" exp/tri3b/graph data/dev exp/tri3b/decode
+fi
+
+if [ $stage -le 9 ]; then
+  echo "$0: Training a regular chain model using the e2e alignments..."
+  local/chain/run_tdnn.sh
+fi
+
+if [ $stage -le 10 ] && $run_rnnlm; then
+  local/rnnlm/run_tdnn_lstm.sh
+fi
+
+echo "$0: training succedded"
+exit 0
diff --git a/egs/gale_arabic/s5d/steps b/egs/gale_arabic/s5d/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/gale_arabic/s5d/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/utils b/egs/gale_arabic/s5d/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/gale_arabic/s5d/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/gale_mandarin/s5/local/gale_data_prep_audio.sh b/egs/gale_mandarin/s5/local/gale_data_prep_audio.sh
index 21f325b9b84..0ea6cfcf9f9 100755
--- a/egs/gale_mandarin/s5/local/gale_data_prep_audio.sh
+++ b/egs/gale_mandarin/s5/local/gale_data_prep_audio.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 QCRI (author: Ahmed Ali)
 # Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
diff --git a/egs/gale_mandarin/s5/local/gale_data_prep_split.sh b/egs/gale_mandarin/s5/local/gale_data_prep_split.sh
index f7ca324355d..23774cd299b 100755
--- a/egs/gale_mandarin/s5/local/gale_data_prep_split.sh
+++ b/egs/gale_mandarin/s5/local/gale_data_prep_split.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 (author: Ahmed Ali, Hainan Xu)
 # Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
diff --git a/egs/gale_mandarin/s5/local/gale_data_prep_txt.sh b/egs/gale_mandarin/s5/local/gale_data_prep_txt.sh
index d9b82902f0d..8404529e85a 100755
--- a/egs/gale_mandarin/s5/local/gale_data_prep_txt.sh
+++ b/egs/gale_mandarin/s5/local/gale_data_prep_txt.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 (author: Ahmed Ali, Hainan Xu)
 # Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
diff --git a/egs/gale_mandarin/s5/local/gale_format_data.sh b/egs/gale_mandarin/s5/local/gale_format_data.sh
index fcd04e572e8..4ecac74d340 100755
--- a/egs/gale_mandarin/s5/local/gale_format_data.sh
+++ b/egs/gale_mandarin/s5/local/gale_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 QCRI (author: Ahmed Ali)
 # Apache 2.0
diff --git a/egs/gale_mandarin/s5/local/gale_prep_dict.sh b/egs/gale_mandarin/s5/local/gale_prep_dict.sh
index c6a80240754..bc7c91b7fc7 100755
--- a/egs/gale_mandarin/s5/local/gale_prep_dict.sh
+++ b/egs/gale_mandarin/s5/local/gale_prep_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # prepare dictionary for HKUST
 # it is done for English and Chinese separately,
 # For English, we use CMU dictionary, and Sequitur G2P
@@ -41,7 +41,7 @@ cat $dict_dir/vocab-full.txt | grep -v '[a-zA-Z]' | \
 if [ ! -f $dict_dir/cmudict/cmudict.0.7a ]; then
   echo "--- Downloading CMU dictionary ..."
   svn co http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/  $dict_dir/cmudict || \
-  wget -e robots=off  -r -np -nH --cut-dirs=4 -R index.html http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/ -P $dict_dir  || exit 1
+  wget -c -e robots=off  -r -np -nH --cut-dirs=4 -R index.html http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/ -P $dict_dir  || exit 1
 fi
 
 if [ ! -f $dict_dir/cmudict/scripts/make_baseform.pl ] ; then
diff --git a/egs/gale_mandarin/s5/local/gale_train_lms.sh b/egs/gale_mandarin/s5/local/gale_train_lms.sh
index b70bf8de564..11573d06ffe 100755
--- a/egs/gale_mandarin/s5/local/gale_train_lms.sh
+++ b/egs/gale_mandarin/s5/local/gale_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/gale_mandarin/s5/local/nnet/run_dnn.sh b/egs/gale_mandarin/s5/local/nnet/run_dnn.sh
index 31159ae1754..79a20851a94 100755
--- a/egs/gale_mandarin/s5/local/nnet/run_dnn.sh
+++ b/egs/gale_mandarin/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 (author: Hainan Xu, Ahmed Ali)
 # Apache 2.0
diff --git a/egs/gale_mandarin/s5/local/score_combine.sh b/egs/gale_mandarin/s5/local/score_combine.sh
index 65caab06ecc..c4d3c13886a 100755
--- a/egs/gale_mandarin/s5/local/score_combine.sh
+++ b/egs/gale_mandarin/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/gale_mandarin/s5/local/score_mbr.sh b/egs/gale_mandarin/s5/local/score_mbr.sh
index 04b84ccce5a..8c752368906 100755
--- a/egs/gale_mandarin/s5/local/score_mbr.sh
+++ b/egs/gale_mandarin/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script for minimum bayes risk decoding.
 
diff --git a/egs/gale_mandarin/s5/local/split_wer_per_corpus.sh b/egs/gale_mandarin/s5/local/split_wer_per_corpus.sh
index baaf55e50a9..a438a142891 100755
--- a/egs/gale_mandarin/s5/local/split_wer_per_corpus.sh
+++ b/egs/gale_mandarin/s5/local/split_wer_per_corpus.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Report WER for reports and conversational
 # Copyright 2014 QCRI (author: Ahmed Ali)
diff --git a/egs/gale_mandarin/s5/run.sh b/egs/gale_mandarin/s5/run.sh
index fe9fdbdd483..7cb89ab6b65 100755
--- a/egs/gale_mandarin/s5/run.sh
+++ b/egs/gale_mandarin/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 (author: Hainan Xu, Ahmed Ali)
 # Apache 2.0
diff --git a/egs/gop/README.md b/egs/gop/README.md
new file mode 100644
index 00000000000..d95f4e966fd
--- /dev/null
+++ b/egs/gop/README.md
@@ -0,0 +1,98 @@
+There is a copy of this document on Google Docs, which renders the equations better:
+[link](https://docs.google.com/document/d/1pie-PU6u2NZZC_FzocBGGm6mpfBJMiCft9UoG0uA1kA/edit?usp=sharing)
+
+* * *
+
+# GOP on Kaldi
+
+The Goodness of Pronunciation (GOP) is a variation of the posterior probability, for phone level pronunciation scoring.
+GOP is widely used in pronunciation evaluation and mispronunciation detection tasks.
+
+This implementation is mainly based on the following paper:
+
+Hu, W., Qian, Y., Soong, F. K., & Wang, Y. (2015). Improved mispronunciation detection with deep neural network trained acoustic models and transfer learning based logistic regression classifiers. Speech Communication, 67(January), 154-166.
+
+## GOP-GMM
+
+In the conventional GMM-HMM based system, GOP was first proposed in (Witt et al., 2000). It was defined as the duration normalised log of the posterior:
+
+$$
+GOP(p)=\frac{1}{t_e-t_s+1} \log p(p|\mathbf o)
+$$
+
+where $\mathbf o$ is the input observations, $p$ is the canonical phone, $t_s, t_e$ are the start and end frame indexes.
+
+Assuming $p(q_i)\approx p(q_j)$ for any $q_i, q_j$, we have:
+
+$$
+\log p(p|\mathbf o)=\frac{p(\mathbf o|p)p(p)}{\sum_{q\in Q} p(\mathbf o|q)p(q)}
+                   \approx\frac{p(\mathbf o|p)}{\sum_{q\in Q} p(\mathbf o|q)}
+$$
+
+where $Q$ is the whole phone set.
+
+The numerator of the equation is calculated from forced alignment result and the denominator is calculated from an Viterbi decoding with a unconstrained phone loop.
+
+We do not implement GOP-GMM for Kaldi, as GOP-NN performs much better than GOP-GMM.
+
+## GOP-NN
+
+The definition of GOP-NN is a bit different from the GOP-GMM. GOP-NN was defined as the log phone posterior ratio between the canonical phone and the one with the highest score (Hu et al., 2015).
+
+Firstly we define Log Phone Posterior (LPP):
+
+$$
+LPP(p)=\log p(p|\mathbf o; t_s,t_e)
+$$
+
+Then we define the GOP-NN using LPP:
+
+$$
+GOP(p)=\log \frac{LPP(p)}{\max_{q\in Q} LPP(q)}
+$$
+
+LPP could be calculated as:
+
+$$
+LPP(p) \approx \frac{1}{t_e-t_s+1} \sum_{t=t_s}^{t_e}\log p(p|o_t)
+$$
+
+$$
+p(p|o_t) = \sum_{s \in p} p(s|o_t)
+$$
+
+where $s$ is the senone label, $\{s|s \in p\}$ is the states belonging to those triphones whose current phone is $p$.
+
+## Phone-level Feature
+
+Normally the classifier-based approach archives better performance than GOP-based approach.
+
+Different from GOP based method, an extra supervised training process is needed. The input features for supervised training are phone-level, segmental features. The phone-level feature is defined as:
+
+$$
+{[LPP(p_1),\cdots,LPP(p_M), LPR(p_1|p_i), \cdots, LPR(p_j|p_i),\cdots]}^T
+$$
+
+where the Log Posterior Ratio (LPR) between phone $p_j$ and $p_i$ is defined as:
+
+$$
+LPR(p_j|p_i) = \log p(p_j|\mathbf o; t_s, t_e) - \log p(p_i|\mathbf o; t_s, t_e)
+$$
+
+## Implementation
+
+This implementation consists of a executable binary `bin/compute-gop` and some scripts.
+
+`compute-gop` computes GOP and extracts phone-level features using nnet output probabilities.
+The output probabilities are assumed to be from a log-softmax layer.
+
+The script `run.sh` shows a typical pipeline based on librispeech's model and data.
+
+In Hu's paper, GOP was computed using a feed-forward DNN.
+We have tried to use the output-xent of a chain model to compute GOP, but the result was not good.
+We guess the HMM topo of chain model may not fit for GOP.
+
+The nnet3's TDNN (no chain) model performs well in GOP computing, so this recipe uses it.
+
+## Acknowledgement
+The author of this recipe would like to thank Xingyu Na for his works of model tuning and his helpful suggestions.
diff --git a/egs/gop/s5/cmd.sh b/egs/gop/s5/cmd.sh
new file mode 100644
index 00000000000..9139633e57a
--- /dev/null
+++ b/egs/gop/s5/cmd.sh
@@ -0,0 +1,13 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export cmd="run.pl"
diff --git a/egs/gop/s5/local/make_testcase.sh b/egs/gop/s5/local/make_testcase.sh
new file mode 100755
index 00000000000..93ed03e3653
--- /dev/null
+++ b/egs/gop/s5/local/make_testcase.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+src=$1
+dst=$2
+
+# Select a very small set for testing
+utils/subset_data_dir.sh --shortest $src 10 $dst
+
+# make fake transcripts as negative examples
+cp $dst/text $dst/text.ori
+sed -i "s/ THERE / THOSE /" $dst/text
+sed -i "s/ IN / ON /" $dst/text
diff --git a/egs/gop/s5/local/remove_phone_markers.pl b/egs/gop/s5/local/remove_phone_markers.pl
new file mode 100755
index 00000000000..16236a749cf
--- /dev/null
+++ b/egs/gop/s5/local/remove_phone_markers.pl
@@ -0,0 +1,72 @@
+#!/usr/bin/env perl
+# Copyright 2019 Junbo Zhang
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+my $Usage = <<EOU;
+remove_phone_markers.pl:
+This script processes a phone set (i.e. the phones.txt file), remove the stress
+markers and the pos-in-word markers, and creates a new phone.txt file and an
+old->new phone mapping file, in which each line is: "old-integer-id new-integer-id.
+
+Usage: utils/remove_phone_markers.pl <old-phone-symbols> <new-phone-symbols> <mapping>
+ e.g.: utils/remove_phone_markers.pl phones.txt phones-pure.txt phone-to-pure-phone.int
+EOU
+
+if (@ARGV < 3) {
+  die $Usage;
+}
+
+my $old_phone_symbols_filename = shift @ARGV;
+my $new_phone_symbols_filename = shift @ARGV;
+my $mapping_filename = shift @ARGV;
+
+my %id_of_old_phone;
+open(IN, $old_phone_symbols_filename) or die "Can't open $old_phone_symbols_filename";
+while (<IN>) {
+  chomp;
+  my ($phone, $id) = split;
+  next if $phone =~ /\#/;
+  $id_of_old_phone{$phone} = $id;
+}
+close IN;
+
+my $new_id = 0;
+my %id_of_new_phone;
+my %id_old_to_new;
+foreach (sort { $id_of_old_phone{$a} <=> $id_of_old_phone{$b} } keys %id_of_old_phone) {
+  my $old_phone = $_;
+  s/_[BIES]//;
+  s/\d//;
+  my $new_phone = $_;
+  $id_of_new_phone{$new_phone} = $new_id++ if not exists $id_of_new_phone{$new_phone};
+  $id_old_to_new{$id_of_old_phone{$old_phone}} = $id_of_new_phone{$new_phone};
+}
+
+# Write to file
+open(OUT, ">$new_phone_symbols_filename") or die "Can\'t write to $new_phone_symbols_filename";
+foreach (sort { $id_of_new_phone{$a} <=> $id_of_new_phone{$b} } keys %id_of_new_phone) {
+  print OUT "$_\t$id_of_new_phone{$_}\n";
+}
+close OUT;
+
+open(OUT, ">$mapping_filename") or die "Can\'t write to $mapping_filename";
+foreach (sort { $a <=> $b } keys %id_old_to_new) {
+  next if $_ == 0;
+  print OUT "$_ $id_old_to_new{$_}\n";
+}
+close OUT;
diff --git a/egs/gop/s5/path.sh b/egs/gop/s5/path.sh
new file mode 100755
index 00000000000..03df6dd9f2b
--- /dev/null
+++ b/egs/gop/s5/path.sh
@@ -0,0 +1,27 @@
+export KALDI_ROOT=`pwd`/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
+
+# we use this both in the (optional) LM training and the G2P-related scripts
+PYTHON='python2.7'
+
+### Below are the paths used by the optional parts of the recipe
+
+# We only need the Festival stuff below for the optional text normalization(for LM-training) step
+FEST_ROOT=tools/festival
+NSW_PATH=${FEST_ROOT}/festival/bin:${FEST_ROOT}/nsw/bin
+export PATH=$PATH:$NSW_PATH
+
+# SRILM is needed for LM model building
+SRILM_ROOT=$KALDI_ROOT/tools/srilm
+SRILM_PATH=$SRILM_ROOT/bin:$SRILM_ROOT/bin/i686-m64
+export PATH=$PATH:$SRILM_PATH
+
+# Sequitur G2P executable
+sequitur=$KALDI_ROOT/tools/sequitur/g2p.py
+sequitur_path="$(dirname $sequitur)/lib/$PYTHON/site-packages"
+
+# Directory under which the LM training corpus should be extracted
+LM_CORPUS_ROOT=./lm-corpus
diff --git a/egs/gop/s5/run.sh b/egs/gop/s5/run.sh
new file mode 100755
index 00000000000..f9ae0d45672
--- /dev/null
+++ b/egs/gop/s5/run.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Junbo Zhang
+# Apache 2.0
+
+# This script shows how to calculate Goodness of Pronunciation (GOP) and
+# extract phone-level pronunciation feature for mispronunciations detection
+# tasks. Read ../README.md or the following paper for details:
+#
+# "Hu et al., Improved mispronunciation detection with deep neural network
+# trained acoustic models and transfer learning based logistic regression
+# classifiers, 2015."
+
+# You might not want to do this for interactive shells.
+set -e
+
+# Before running this recipe, you have to run the librispeech recipe firstly.
+# This script assumes the following paths exist.
+librispeech_eg=../../librispeech/s5
+model=$librispeech_eg/exp/nnet3_cleaned/tdnn_sp
+ivector=$librispeech_eg/exp/nnet3_cleaned/ivectors_test_clean_hires
+lang=$librispeech_eg/data/lang
+test_data=$librispeech_eg/data/test_clean_hires
+
+for d in $model $ivector $lang $test_data; do
+  [ ! -d $d ] && echo "$0: no such path $d" && exit 1;
+done
+
+# Global configurations
+stage=0
+nj=4
+
+data=test_10short
+dir=exp/gop_$data
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+
+if [ $stage -le 0 ]; then
+  # Prepare test data
+  [ -d data ] || mkdir -p data/$data
+  local/make_testcase.sh $test_data data/$data
+fi
+
+if [ $stage -le 1 ]; then
+  # Compute Log-likelihoods
+  steps/nnet3/compute_output.sh --cmd "$cmd" --nj $nj \
+    --online-ivector-dir $ivector data/$data $model exp/probs_$data
+fi
+
+if [ $stage -le 2 ]; then
+  steps/nnet3/align.sh --cmd "$cmd" --nj $nj --use_gpu false \
+    --online_ivector_dir $ivector data/$data $lang $model $dir
+fi
+
+if [ $stage -le 3 ]; then
+  # make a map which converts phones to "pure-phones"
+  # "pure-phone" means the phone whose stress and pos-in-word markers are ignored
+  # eg. AE1_B --> AE, EH2_S --> EH, SIL --> SIL
+  local/remove_phone_markers.pl $lang/phones.txt $dir/phones-pure.txt \
+    $dir/phone-to-pure-phone.int
+
+  # Convert transition-id to pure-phone id
+  $cmd JOB=1:$nj $dir/log/ali_to_phones.JOB.log \
+    ali-to-phones --per-frame=true $model/final.mdl "ark,t:gunzip -c $dir/ali.JOB.gz|" \
+      "ark,t:-" \| utils/apply_map.pl -f 2- $dir/phone-to-pure-phone.int \| \
+      gzip -c \>$dir/ali-pure-phone.JOB.gz   || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # The outputs of the binary compute-gop are the GOPs and the phone-level features.
+  #
+  # An example of the GOP result (extracted from "ark,t:$dir/gop.3.txt"):
+  # 4446-2273-0031 [ 1 0 ] [ 12 0 ] [ 27 -5.382001 ] [ 40 -13.91807 ] [ 1 -0.2555897 ] \
+  #                [ 21 -0.2897284 ] [ 5 0 ] [ 31 0 ] [ 33 0 ] [ 3 -11.43557 ] [ 25 0 ] \
+  #                [ 16 0 ] [ 30 -0.03224623 ] [ 5 0 ] [ 25 0 ] [ 33 0 ] [ 1 0 ]
+  # It is in the posterior format, where each pair stands for [pure-phone-index gop-value].
+  # For example, [ 27 -5.382001 ] means the GOP of the pure-phone 27 (it corresponds to the
+  # phone "OW", according to "$dir/phones-pure.txt") is -5.382001, indicating the audio
+  # segment of this phone should be a mispronunciation.
+  #
+  # The phone-level features are in matrix format:
+  # 4446-2273-0031  [ -0.2462088 -10.20292 -11.35369 ...
+  #                   -8.584108 -7.629755 -13.04877 ...
+  #                   ...
+  #                   ... ]
+  # The row number is the phone number of the utterance. In this case, it is 17.
+  # The column number is 2 * (pure-phone set size), as the feature is consist of LLR + LPR.
+  # The phone-level features can be used to train a classifier with human labels. See Hu's
+  # paper for detail.
+  $cmd JOB=1:$nj $dir/log/compute_gop.JOB.log \
+    compute-gop --phone-map=$dir/phone-to-pure-phone.int $model/final.mdl \
+      "ark,t:gunzip -c $dir/ali-pure-phone.JOB.gz|" \
+      "ark:exp/probs_$data/output.JOB.ark" \
+      "ark,t:$dir/gop.JOB.txt" "ark,t:$dir/phonefeat.JOB.txt"   || exit 1;
+  echo "Done compute-gop, the results: \"$dir/gop.<JOB>.txt\" in posterior format."
+
+  # We set -5 as a universal empirical threshold here. You can also determine multiple phone
+  # dependent thresholds based on the human-labeled mispronunciation data.
+  echo "The phones whose gop values less than -5 could be treated as mispronunciations."
+fi
diff --git a/egs/gop/s5/steps b/egs/gop/s5/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/gop/s5/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/gop/s5/utils b/egs/gop/s5/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/gop/s5/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/gp/s1/install.sh b/egs/gp/s1/install.sh
index 0222b098664..ab486ee1f97 100755
--- a/egs/gp/s1/install.sh
+++ b/egs/gp/s1/install.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/local/gp_train_multi_ubm.sh b/egs/gp/s1/local/gp_train_multi_ubm.sh
index 9afc78ae7c0..f6d4abc19c8 100755
--- a/egs/gp/s1/local/gp_train_multi_ubm.sh
+++ b/egs/gp/s1/local/gp_train_multi_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Arnab Ghoshal
 # Copyright 2010-2011  Microsoft Corporation
diff --git a/egs/gp/s1/steps/align_deltas.sh b/egs/gp/s1/steps/align_deltas.sh
index 22da04432c7..a9d25853fdf 100755
--- a/egs/gp/s1/steps/align_deltas.sh
+++ b/egs/gp/s1/steps/align_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012  Microsoft Corporation;  Arnab Ghoshal
 
diff --git a/egs/gp/s1/steps/decode_deltas.sh b/egs/gp/s1/steps/decode_deltas.sh
index 98f9c8ed337..58cf9ae3873 100755
--- a/egs/gp/s1/steps/decode_deltas.sh
+++ b/egs/gp/s1/steps/decode_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/gp/s1/steps/train_deltas.sh b/egs/gp/s1/steps/train_deltas.sh
index 0efe7b60379..975faa93bc6 100755
--- a/egs/gp/s1/steps/train_deltas.sh
+++ b/egs/gp/s1/steps/train_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Arnab Ghoshal
 # Copyright 2010-2011  Microsoft Corporation
diff --git a/egs/gp/s1/steps/train_mono.sh b/egs/gp/s1/steps/train_mono.sh
index e82c14fcaf2..10970a5714b 100755
--- a/egs/gp/s1/steps/train_mono.sh
+++ b/egs/gp/s1/steps/train_mono.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Arnab Ghoshal
 # Copyright 2010-2011  Microsoft Corporation
diff --git a/egs/gp/s1/steps/train_trees.sh b/egs/gp/s1/steps/train_trees.sh
index 9a3a51c8654..52511a0a5c6 100755
--- a/egs/gp/s1/steps/train_trees.sh
+++ b/egs/gp/s1/steps/train_trees.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Arnab Ghoshal
 # Copyright 2010-2011  Microsoft Corporation
diff --git a/egs/gp/s1/steps/train_ubm_deltas.sh b/egs/gp/s1/steps/train_ubm_deltas.sh
index 7a666ca7668..73617e2a1d6 100755
--- a/egs/gp/s1/steps/train_ubm_deltas.sh
+++ b/egs/gp/s1/steps/train_ubm_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Arnab Ghoshal
 # Copyright 2010-2011  Microsoft Corporation
diff --git a/egs/gp/s1/utils/lmrescore.sh b/egs/gp/s1/utils/lmrescore.sh
index c911d0ce8b0..9e706395c4f 100755
--- a/egs/gp/s1/utils/lmrescore.sh
+++ b/egs/gp/s1/utils/lmrescore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012  Microsoft Corporation;  Arnab Ghoshal
 
diff --git a/egs/gp/s1/utils/mkgraph.sh b/egs/gp/s1/utils/mkgraph.sh
index 3aba742832d..c9225a63b81 100755
--- a/egs/gp/s1/utils/mkgraph.sh
+++ b/egs/gp/s1/utils/mkgraph.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/score_lats.sh b/egs/gp/s1/utils/score_lats.sh
index 5aed89b5ef4..fadb3d635f5 100755
--- a/egs/gp/s1/utils/score_lats.sh
+++ b/egs/gp/s1/utils/score_lats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/score_sclite.sh b/egs/gp/s1/utils/score_sclite.sh
index 9e7426e84b7..49e2398095b 100755
--- a/egs/gp/s1/utils/score_sclite.sh
+++ b/egs/gp/s1/utils/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/score_text.sh b/egs/gp/s1/utils/score_text.sh
index cf485261bca..b71e0bc030e 100755
--- a/egs/gp/s1/utils/score_text.sh
+++ b/egs/gp/s1/utils/score_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2010-2011 Microsoft Corporation
 
 if [ -f ./path.sh ]; then . ./path.sh; fi
diff --git a/egs/gp/s1/utils/split_data.sh b/egs/gp/s1/utils/split_data.sh
index 19431aa5c6d..e8f5f048edb 100755
--- a/egs/gp/s1/utils/split_data.sh
+++ b/egs/gp/s1/utils/split_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s5/local/gp_install.sh b/egs/gp/s5/local/gp_install.sh
index 85d16bc5c21..6f65704e5f4 100755
--- a/egs/gp/s5/local/gp_install.sh
+++ b/egs/gp/s5/local/gp_install.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/score.sh b/egs/gp/s5/local/score.sh
index d74868282f1..802be09ba9c 100755
--- a/egs/gp/s5/local/score.sh
+++ b/egs/gp/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/heroico/s5/local/chain/compare_wer.sh b/egs/heroico/s5/local/chain/compare_wer.sh
index 3ee755dee36..157e618927b 100755
--- a/egs/heroico/s5/local/chain/compare_wer.sh
+++ b/egs/heroico/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
index 361879b4142..715e7f398b7 100755
--- a/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_cnn_tdnn_1a.sh is modified from run_tdnn_1b.sh but taking
 #   the xconfig from mini-librispeech's run_cnn_tdnn_1a54.sh; only
diff --git a/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh
index 290bd4c7970..42391b37e5f 100755
--- a/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # local/chain/compare_wer.sh exp/chain/tdnn1a_sp
 # ./local/chain/compare_wer.sh exp/chain/tdnn1a_sp
diff --git a/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh
index cfb4dc1f697..b8f397baff8 100755
--- a/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1b is as 1a but a re-tuned model with quite a few changes, including moving to
 #   a resnet-style factored TDNN-F model.
diff --git a/egs/heroico/s5/local/get_wav_list.sh b/egs/heroico/s5/local/get_wav_list.sh
index 2d79079935b..419b449f2ab 100755
--- a/egs/heroico/s5/local/get_wav_list.sh
+++ b/egs/heroico/s5/local/get_wav_list.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 John Morgan
 # Apache 2.0.
diff --git a/egs/heroico/s5/local/nnet3/run_ivector_common.sh b/egs/heroico/s5/local/nnet3/run_ivector_common.sh
index e882ce0c918..4d2014cd4fb 100755
--- a/egs/heroico/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/heroico/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/heroico/s5/local/prepare_data.sh b/egs/heroico/s5/local/prepare_data.sh
index b78d9f1d1cb..d5fcd782766 100755
--- a/egs/heroico/s5/local/prepare_data.sh
+++ b/egs/heroico/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 John Morgan
 # Apache 2.0.
diff --git a/egs/heroico/s5/local/prepare_lm.sh b/egs/heroico/s5/local/prepare_lm.sh
index e2a92ba3c5a..7072b9b7088 100755
--- a/egs/heroico/s5/local/prepare_lm.sh
+++ b/egs/heroico/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 John Morgan
 # Apache 2.0.
diff --git a/egs/heroico/s5/local/subs_download.sh b/egs/heroico/s5/local/subs_download.sh
index 98dcb42d4e0..ab236a8ecb5 100755
--- a/egs/heroico/s5/local/subs_download.sh
+++ b/egs/heroico/s5/local/subs_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 John Morgan
 # Apache 2.0.
diff --git a/egs/heroico/s5/run.sh b/egs/heroico/s5/run.sh
index 4cc5617e985..c990468a9db 100755
--- a/egs/heroico/s5/run.sh
+++ b/egs/heroico/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 . ./path.sh
diff --git a/egs/hkust/s5/local/chain/compare_wer.sh b/egs/hkust/s5/local/chain/compare_wer.sh
index 27a6b783433..364d40d4ec4 100755
--- a/egs/hkust/s5/local/chain/compare_wer.sh
+++ b/egs/hkust/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018  Emotech LTD (Author: Xuechen Liu)
 
 # compare wer between diff. models in hkust chain directory
diff --git a/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh
index c62b776de2b..85e2323688e 100755
--- a/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh
+++ b/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on run_tdnn_7p.sh in swbd chain recipe.
 
diff --git a/egs/hkust/s5/local/ext/score.sh b/egs/hkust/s5/local/ext/score.sh
index e4009abfe94..f6f5d5af61d 100755
--- a/egs/hkust/s5/local/ext/score.sh
+++ b/egs/hkust/s5/local/ext/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012-2013.
 #           Hong Kong University of Science and Technology (Ricky Chan) 2013. Apache 2.0.
 
diff --git a/egs/hkust/s5/local/ext/score_basic_ext.sh b/egs/hkust/s5/local/ext/score_basic_ext.sh
index c54c732edf1..1378cbd16c5 100755
--- a/egs/hkust/s5/local/ext/score_basic_ext.sh
+++ b/egs/hkust/s5/local/ext/score_basic_ext.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012-2013. 
 #           Hong Kong University of Science and Technology (Author: Ricky Chan Ho Yin). Apache 2.0.
 #
diff --git a/egs/hkust/s5/local/hkust_data_prep.sh b/egs/hkust/s5/local/hkust_data_prep.sh
index 6342ccfe861..7857f2c6722 100755
--- a/egs/hkust/s5/local/hkust_data_prep.sh
+++ b/egs/hkust/s5/local/hkust_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
  
 . ./path.sh || exit 1;
 
diff --git a/egs/hkust/s5/local/hkust_format_data.sh b/egs/hkust/s5/local/hkust_format_data.sh
index 7fc9b701f49..7396485e4bb 100755
--- a/egs/hkust/s5/local/hkust_format_data.sh
+++ b/egs/hkust/s5/local/hkust_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 if [ -f ./path.sh ]; then . ./path.sh; fi
diff --git a/egs/hkust/s5/local/hkust_prepare_dict.sh b/egs/hkust/s5/local/hkust_prepare_dict.sh
index 49f27f2f868..0f3f26efa53 100755
--- a/egs/hkust/s5/local/hkust_prepare_dict.sh
+++ b/egs/hkust/s5/local/hkust_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2016 LeSpeech (Author: Xingyu Na)
 
 # prepare dictionary for HKUST
diff --git a/egs/hkust/s5/local/hkust_train_lms.sh b/egs/hkust/s5/local/hkust_train_lms.sh
index 8520bb26d2d..7d83ffd00fc 100755
--- a/egs/hkust/s5/local/hkust_train_lms.sh
+++ b/egs/hkust/s5/local/hkust_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/hkust/s5/local/nnet/run_cnn.sh b/egs/hkust/s5/local/nnet/run_cnn.sh
index e0b7e10df86..1d92bcb0cb5 100755
--- a/egs/hkust/s5/local/nnet/run_cnn.sh
+++ b/egs/hkust/s5/local/nnet/run_cnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
diff --git a/egs/hkust/s5/local/nnet/run_dnn.sh b/egs/hkust/s5/local/nnet/run_dnn.sh
index e7e229b1d2b..4cfa53a9cc7 100755
--- a/egs/hkust/s5/local/nnet/run_dnn.sh
+++ b/egs/hkust/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 #                2014  Guoguo Chen
diff --git a/egs/hkust/s5/local/nnet/run_lstm.sh b/egs/hkust/s5/local/nnet/run_lstm.sh
index ec5d0e3a856..6a15f87c1d1 100755
--- a/egs/hkust/s5/local/nnet/run_lstm.sh
+++ b/egs/hkust/s5/local/nnet/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/hkust/s5/local/nnet2/run_5d.sh b/egs/hkust/s5/local/nnet2/run_5d.sh
index b97f5c8af99..d73c04e6112 100755
--- a/egs/hkust/s5/local/nnet2/run_5d.sh
+++ b/egs/hkust/s5/local/nnet2/run_5d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This runs on the full training set (with duplicates removed), with p-norm
diff --git a/egs/hkust/s5/local/nnet2/run_convnet.sh b/egs/hkust/s5/local/nnet2/run_convnet.sh
index 56b81c42a11..f21727643b8 100755
--- a/egs/hkust/s5/local/nnet2/run_convnet.sh
+++ b/egs/hkust/s5/local/nnet2/run_convnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 2015 Xingyu Na
 # This script runs on the full training set, using ConvNet setup on top of
diff --git a/egs/hkust/s5/local/nnet3/compare_wer.sh b/egs/hkust/s5/local/nnet3/compare_wer.sh
index 252fab12e18..8216ed6b00e 100755
--- a/egs/hkust/s5/local/nnet3/compare_wer.sh
+++ b/egs/hkust/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018  Emotech LTD (Author: Xuechen Liu)
 
 # compare wer between diff. models in hkust nnet3 directory
diff --git a/egs/hkust/s5/local/nnet3/run_ivector_common.sh b/egs/hkust/s5/local/nnet3/run_ivector_common.sh
index de952e08904..e7c79cdf3bf 100755
--- a/egs/hkust/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/hkust/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is modified based on swbd/s5c/local/nnet3/run_ivector_common.sh
 
diff --git a/egs/hkust/s5/local/nnet3/run_lstm.sh b/egs/hkust/s5/local/nnet3/run_lstm.sh
index 2a470868298..279302759d9 100755
--- a/egs/hkust/s5/local/nnet3/run_lstm.sh
+++ b/egs/hkust/s5/local/nnet3/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is a basic lstm script
 
diff --git a/egs/hkust/s5/local/nnet3/tuning/run_tdnn_2a.sh b/egs/hkust/s5/local/nnet3/tuning/run_tdnn_2a.sh
index 1cdbbf3bb2c..7339ce595c6 100755
--- a/egs/hkust/s5/local/nnet3/tuning/run_tdnn_2a.sh
+++ b/egs/hkust/s5/local/nnet3/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on run_tdnn_7h.sh in swbd chain recipe.
 # exp 2a: change the step of making configs, using xconfig with
diff --git a/egs/hkust/s5/local/online/run_nnet2_common.sh b/egs/hkust/s5/local/online/run_nnet2_common.sh
index 185bca38d8f..737694e625a 100755
--- a/egs/hkust/s5/local/online/run_nnet2_common.sh
+++ b/egs/hkust/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e
diff --git a/egs/hkust/s5/local/online/run_nnet2_ms.sh b/egs/hkust/s5/local/online/run_nnet2_ms.sh
index c3177e1136e..4c3f01de3fc 100755
--- a/egs/hkust/s5/local/online/run_nnet2_ms.sh
+++ b/egs/hkust/s5/local/online/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e
diff --git a/egs/hkust/s5/local/run_discriminative.sh b/egs/hkust/s5/local/run_discriminative.sh
index f7c399c65dd..6d62c493cd0 100755
--- a/egs/hkust/s5/local/run_discriminative.sh
+++ b/egs/hkust/s5/local/run_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2016 Alibaba Robotics Corp. (Author: Xingyu Na)
 # Apache 2.0
 
diff --git a/egs/hkust/s5/local/run_sgmm.sh b/egs/hkust/s5/local/run_sgmm.sh
index 74c4811f0a3..58ce1b6dafa 100755
--- a/egs/hkust/s5/local/run_sgmm.sh
+++ b/egs/hkust/s5/local/run_sgmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2016 Alibaba Robotics Corp. (Author: Xingyu Na)
 # Apache2.0
 
diff --git a/egs/hkust/s5/local/score.sh b/egs/hkust/s5/local/score.sh
index a9786169973..d283ceb68dc 100755
--- a/egs/hkust/s5/local/score.sh
+++ b/egs/hkust/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 set -x
diff --git a/egs/hkust/s5/run.sh b/egs/hkust/s5/run.sh
index 9b684d5a215..9347598464c 100755
--- a/egs/hkust/s5/run.sh
+++ b/egs/hkust/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Chao Weng
 #           2016 Alibaba Robotics Corp. (Author: Xingyu Na)
diff --git a/egs/hub4_english/s5/local/data_prep/prepare_1996_bn_data.sh b/egs/hub4_english/s5/local/data_prep/prepare_1996_bn_data.sh
index ea4e5699ce3..da355ddfc19 100755
--- a/egs/hub4_english/s5/local/data_prep/prepare_1996_bn_data.sh
+++ b/egs/hub4_english/s5/local/data_prep/prepare_1996_bn_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 #               2017  Vimal Manohar
 # License: Apache 2.0
diff --git a/egs/hub4_english/s5/local/data_prep/prepare_1997_bn_data.sh b/egs/hub4_english/s5/local/data_prep/prepare_1997_bn_data.sh
index 5f049f7831c..865d6eb54d4 100755
--- a/egs/hub4_english/s5/local/data_prep/prepare_1997_bn_data.sh
+++ b/egs/hub4_english/s5/local/data_prep/prepare_1997_bn_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 #               2017  Vimal Manohar
 # License: Apache 2.0
diff --git a/egs/hub4_english/s5/local/format_lms.sh b/egs/hub4_english/s5/local/format_lms.sh
index 1d18209aa60..7afdf51757f 100755
--- a/egs/hub4_english/s5/local/format_lms.sh
+++ b/egs/hub4_english/s5/local/format_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014 Nickolay V. Shmyrev
 # Apache 2.0
diff --git a/egs/hub4_english/s5/local/prepare_dict.sh b/egs/hub4_english/s5/local/prepare_dict.sh
index 3f53ec6af74..acbbd6de364 100755
--- a/egs/hub4_english/s5/local/prepare_dict.sh
+++ b/egs/hub4_english/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation
 #           2012-2014 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/hub4_english/s5/local/run_cleanup_segmentation.sh b/egs/hub4_english/s5/local/run_cleanup_segmentation.sh
index e91ec318650..3d3558d032c 100755
--- a/egs/hub4_english/s5/local/run_cleanup_segmentation.sh
+++ b/egs/hub4_english/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/hub4_english/s5/local/run_segmentation_wsj.sh b/egs/hub4_english/s5/local/run_segmentation_wsj.sh
index a321abe9a29..89598e7916d 100755
--- a/egs/hub4_english/s5/local/run_segmentation_wsj.sh
+++ b/egs/hub4_english/s5/local/run_segmentation_wsj.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016-18  Vimal Manohar
 # Apache 2.0
diff --git a/egs/hub4_english/s5/local/score_sclite.sh b/egs/hub4_english/s5/local/score_sclite.sh
index add014c2dcc..f8ec12e8339 100755
--- a/egs/hub4_english/s5/local/score_sclite.sh
+++ b/egs/hub4_english/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/hub4_english/s5/local/train_lm.sh b/egs/hub4_english/s5/local/train_lm.sh
index 4378a287d42..04f979af135 100755
--- a/egs/hub4_english/s5/local/train_lm.sh
+++ b/egs/hub4_english/s5/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 # Apache 2.0
diff --git a/egs/hub4_english/s5/run.sh b/egs/hub4_english/s5/run.sh
index 5db61d4eb10..f9e6cf4ab2c 100755
--- a/egs/hub4_english/s5/run.sh
+++ b/egs/hub4_english/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016   Vimal Manohar
 # Apache 2.0.
diff --git a/egs/hub4_spanish/s5/local/chain/compare_wer.sh b/egs/hub4_spanish/s5/local/chain/compare_wer.sh
index 0194b86ac69..72bb3ab1dc9 100755
--- a/egs/hub4_spanish/s5/local/chain/compare_wer.sh
+++ b/egs/hub4_spanish/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
index d1b657a2d74..b18d8ec0aaa 100755
--- a/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ## This is taken from mini_librispeech.
 
diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh
index 40bbbe1ae79..0c13f46ffa6 100755
--- a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ## This is taken from mini_librispeech, but the proportional-shrink value was
 # tuned for this corpus (hub4-spanish)
diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh
index a498d8157f3..159f3134fb4 100755
--- a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ## This is taken from mini_librispeech
 
diff --git a/egs/hub4_spanish/s5/local/nnet3/run_ivector_common.sh b/egs/hub4_spanish/s5/local/nnet3/run_ivector_common.sh
index 623e0e7e02f..14387b7a30c 100755
--- a/egs/hub4_spanish/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/hub4_spanish/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/hub4_spanish/s5/local/prepare_data.sh b/egs/hub4_spanish/s5/local/prepare_data.sh
index cadefe560a8..6a8e8a60213 100755
--- a/egs/hub4_spanish/s5/local/prepare_data.sh
+++ b/egs/hub4_spanish/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/hub4_spanish/s5/local/prepare_lexicon.sh b/egs/hub4_spanish/s5/local/prepare_lexicon.sh
index 1460e60e558..c54bdbc92a3 100755
--- a/egs/hub4_spanish/s5/local/prepare_lexicon.sh
+++ b/egs/hub4_spanish/s5/local/prepare_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/hub4_spanish/s5/local/reestimate_langp.sh b/egs/hub4_spanish/s5/local/reestimate_langp.sh
index 62fc4c24a16..3380152aa3b 100755
--- a/egs/hub4_spanish/s5/local/reestimate_langp.sh
+++ b/egs/hub4_spanish/s5/local/reestimate_langp.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal <jtrmal@gmail.com> )
 # License: Apache 2.0
 
diff --git a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1a.sh b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1a.sh
index 3055fe95928..255872e1b33 100755
--- a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1a.sh
+++ b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1b.sh b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1b.sh
index 5835de9d373..4577f966574 100755
--- a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1b.sh
+++ b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_tdnn.sh b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_tdnn.sh
index 5dc840d9b03..6d0fbdead34 100755
--- a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_tdnn.sh
+++ b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/hub4_spanish/s5/local/run_sgmm2.sh b/egs/hub4_spanish/s5/local/run_sgmm2.sh
index eb23eddea2d..c0a515c26e3 100755
--- a/egs/hub4_spanish/s5/local/run_sgmm2.sh
+++ b/egs/hub4_spanish/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/hub4_spanish/s5/local/train_lms_srilm.sh b/egs/hub4_spanish/s5/local/train_lms_srilm.sh
index 12ce6d79490..29d60eecd60 100755
--- a/egs/hub4_spanish/s5/local/train_lms_srilm.sh
+++ b/egs/hub4_spanish/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017  Johns Hopkins University (Author: Yenda Trmal <jtrmal@gmail.com>)
 # Apache 2.0
 
diff --git a/egs/hub4_spanish/s5/run.sh b/egs/hub4_spanish/s5/run.sh
index a0420c99839..6857976567f 100755
--- a/egs/hub4_spanish/s5/run.sh
+++ b/egs/hub4_spanish/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/iam/v1/local/augment_data.sh b/egs/iam/v1/local/augment_data.sh
index 31e4a8217ca..e5179a77c72 100755
--- a/egs/iam/v1/local/augment_data.sh
+++ b/egs/iam/v1/local/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018 Hossein Hadian
 #             2018 Ashish Arora
 
diff --git a/egs/iam/v1/local/chain/compare_wer.sh b/egs/iam/v1/local/chain/compare_wer.sh
index 4a2cc29481c..ae575b29d4f 100755
--- a/egs/iam/v1/local/chain/compare_wer.sh
+++ b/egs/iam/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
index ef1273f3961..1c766e270ad 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2017 Hossein Hadian
 #              2017 Chun Chieh Chang
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index bbcc55aa2b0..7b334fdbdbc 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # chainali_1a is as 1a except it uses chain alignments (using 1a system) instead of gmm alignments
 # local/chain/compare_wer.sh exp/chain/cnn_chainali_1a
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
index 401ffa14e19..840803f0249 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # chainali_1b is as chainali_1a except it has 3 more cnn layers and 1 less tdnn layer.
 # local/chain/compare_wer.sh exp/chain/cnn_chainali_1b
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
index 17209b9204f..9423612f029 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # chainali_1c is as chainali_1b except it uses l2-regularize
 # local/chain/compare_wer.sh exp/chain/cnn_chainali_1c
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
index 89a40ed2a13..a26882e99da 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # chainali_1d is as chainali_1c except it uses unconstrained egs
 # local/chain/compare_wer.sh exp/chain/cnn_chainali_1d
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index 703d404159a..76493fcc0f1 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a
 # System                      cnn_e2eali_1a_(dict_50k) cnn_e2eali_1a_(dict_50k + unk model)
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index 905c4661477..af3ff312cc0 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1b is the same as e2eali_1a but uses unconstrained egs
 # local/chain/compare_wer.sh exp/chain/cnn_e2eali_1b
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
index 26b1aca0929..fa884390c26 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1c is the same as e2eali_1b but has more CNN layers, different filter size
 # smaller lm-opts, minibatch, frams-per-iter, less epochs and more initial/finaljobs.
diff --git a/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 462ad0522de..35528afcad0 100755
--- a/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/iam/v1/local/extract_features.sh b/egs/iam/v1/local/extract_features.sh
index 1741ad3f9b2..1479f21d127 100755
--- a/egs/iam/v1/local/extract_features.sh
+++ b/egs/iam/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
diff --git a/egs/iam/v1/local/prepare_data.sh b/egs/iam/v1/local/prepare_data.sh
index dc07f07e318..59e619f196f 100755
--- a/egs/iam/v1/local/prepare_data.sh
+++ b/egs/iam/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
diff --git a/egs/iam/v1/local/score.sh b/egs/iam/v1/local/score.sh
index d964d70206b..2e27f23ce29 100755
--- a/egs/iam/v1/local/score.sh
+++ b/egs/iam/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/iam/v1/local/train_lm.sh b/egs/iam/v1/local/train_lm.sh
index 3e8c838efdb..57afa897743 100755
--- a/egs/iam/v1/local/train_lm.sh
+++ b/egs/iam/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/iam/v1/run.sh b/egs/iam/v1/run.sh
index 85811b6cb3d..9362c353346 100755
--- a/egs/iam/v1/run.sh
+++ b/egs/iam/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
diff --git a/egs/iam/v1/run_end2end.sh b/egs/iam/v1/run_end2end.sh
index 0a8b014715f..2f85a9940a9 100755
--- a/egs/iam/v1/run_end2end.sh
+++ b/egs/iam/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017    Hossein Hadian
 
 set -e
diff --git a/egs/iam/v2/local/augment_data.sh b/egs/iam/v2/local/augment_data.sh
index 31e4a8217ca..e5179a77c72 100755
--- a/egs/iam/v2/local/augment_data.sh
+++ b/egs/iam/v2/local/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018 Hossein Hadian
 #             2018 Ashish Arora
 
diff --git a/egs/iam/v2/local/chain/compare_wer.sh b/egs/iam/v2/local/chain/compare_wer.sh
index 2ce14e13694..a1b8fffe166 100755
--- a/egs/iam/v2/local/chain/compare_wer.sh
+++ b/egs/iam/v2/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
index 9a01688ba35..07188c7186e 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1a is the same as chainali_1c but uses the e2e chain model to get the
 # lattice alignments and to build a tree
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
index 28aa246f334..3db893e7a65 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1b is the same as e2eali_1a but uses unconstrained egs
 
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
index f158317950a..1ceec555b7a 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1c is the same as e2eali_1b but has fewer CNN layers, smaller
 # l2-regularize, more epochs and uses dropout.
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index 1c44057454a..9575b55c3c6 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1d is the same as e2eali_1c but has more CNN layers, different filter size
 # smaller lm-opts, minibatch, frams-per-iter, less epochs and more initial/finaljobs.
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
index cb2bfa0a82d..cf7f3051654 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
index d5f79602695..72703583954 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/iam/v2/local/extract_features.sh b/egs/iam/v2/local/extract_features.sh
index 1741ad3f9b2..1479f21d127 100755
--- a/egs/iam/v2/local/extract_features.sh
+++ b/egs/iam/v2/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
diff --git a/egs/iam/v2/local/prepare_data.sh b/egs/iam/v2/local/prepare_data.sh
index cf729d9a939..97ceb618b1a 100755
--- a/egs/iam/v2/local/prepare_data.sh
+++ b/egs/iam/v2/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
diff --git a/egs/iam/v2/local/score.sh b/egs/iam/v2/local/score.sh
index 1d84815fc69..6168f38a929 100755
--- a/egs/iam/v2/local/score.sh
+++ b/egs/iam/v2/local/score.sh
@@ -1,5 +1,5 @@
 
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/iam/v2/local/train_lm.sh b/egs/iam/v2/local/train_lm.sh
index cc0119eb748..a63ec51d874 100755
--- a/egs/iam/v2/local/train_lm.sh
+++ b/egs/iam/v2/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index c515c85fc72..1b0538f2560 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017    Hossein Hadian
 
 set -e
diff --git a/egs/iban/s5/local/arpa2G.sh b/egs/iban/s5/local/arpa2G.sh
index dddd7eb9097..97a139db0ec 100755
--- a/egs/iban/s5/local/arpa2G.sh
+++ b/egs/iban/s5/local/arpa2G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013-2014  Johns Hopkins University (authors: Yenda Trmal, Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh
index 10650a18269..23bacb2cc23 100755
--- a/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017-2018  Johns Hopkins University (author: Daniel Povey)
 #           2017-2018  Yiming Wang
diff --git a/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh
index db62e6f8a55..338fc090e07 100755
--- a/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017-2018  Johns Hopkins University (author: Daniel Povey)
 #           2017-2018  Yiming Wang
diff --git a/egs/iban/s5/local/nnet3/run_ivector_common.sh b/egs/iban/s5/local/nnet3/run_ivector_common.sh
index b909ed04cde..ddec4419a61 100755
--- a/egs/iban/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/iban/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/iban/s5/local/nnet3/tuning/run_tdnn_1a.sh b/egs/iban/s5/local/nnet3/tuning/run_tdnn_1a.sh
index f53c5443ba8..88ff628e3df 100755
--- a/egs/iban/s5/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/iban/s5/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # cat exp/nnet3/tdnn_1a/decode_dev/scoring_kaldi/best_wer
 # %WER 17.34 [ 1908 / 11006, 257 ins, 303 del, 1348 sub ] exp/nnet3/tdnn_1a/decode_dev/wer_12_0.0
diff --git a/egs/iban/s5/local/prepare_data.sh b/egs/iban/s5/local/prepare_data.sh
index 8afffb56898..cb1077054d3 100755
--- a/egs/iban/s5/local/prepare_data.sh
+++ b/egs/iban/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
diff --git a/egs/iban/s5/local/prepare_dict.sh b/egs/iban/s5/local/prepare_dict.sh
index ebec12bc171..6f87e1c1fe4 100755
--- a/egs/iban/s5/local/prepare_dict.sh
+++ b/egs/iban/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
diff --git a/egs/iban/s5/local/prepare_lm.sh b/egs/iban/s5/local/prepare_lm.sh
index 10d5e276aa3..2c3a1aee1da 100755
--- a/egs/iban/s5/local/prepare_lm.sh
+++ b/egs/iban/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2016  Sarah Flora Juan
 # Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
diff --git a/egs/iban/s5/local/train_lms_srilm.sh b/egs/iban/s5/local/train_lms_srilm.sh
index f72596e750a..ecc88611554 100755
--- a/egs/iban/s5/local/train_lms_srilm.sh
+++ b/egs/iban/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 export LC_ALL=C
 
 words_file=
diff --git a/egs/iban/s5/run.sh b/egs/iban/s5/run.sh
index 278a8177c0e..2d08c0ad6fd 100755
--- a/egs/iban/s5/run.sh
+++ b/egs/iban/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015 Sarah Samson Juan
 # Apache 2.0
diff --git a/egs/ifnenit/v1/local/chain/compare_wer.sh b/egs/ifnenit/v1/local/chain/compare_wer.sh
index ff2a766f9e2..2e7d657f330 100755
--- a/egs/ifnenit/v1/local/chain/compare_wer.sh
+++ b/egs/ifnenit/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/ifnenit/v1/local/chain/run_cnn_1a.sh b/egs/ifnenit/v1/local/chain/run_cnn_1a.sh
index b0ecd547741..135d74fa8c9 100755
--- a/egs/ifnenit/v1/local/chain/run_cnn_1a.sh
+++ b/egs/ifnenit/v1/local/chain/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # steps/info/chain_dir_info.pl exp/chainfsf4/cnn1a_1/
 # exp/chainfsf4/cnn1a_1/: num-iters=21 nj=2..4 num-params=4.4M dim=40->380 combine=-0.033->-0.025 xent:train/valid[13,20,final]=(-1.07,-1.31,-0.560/-1.30,-1.70,-0.978) logprob:train/valid[13,20,final]=(-0.064,-0.119,-0.011/-0.115,-0.208,-0.096)
diff --git a/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh b/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh
index 7f3132d657e..4ca73450b77 100755
--- a/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh
+++ b/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # chainali_1b is as chainali_1a except it has 3 more cnn layers and 1 less tdnn layer.
 
diff --git a/egs/ifnenit/v1/local/ienit_initialize.sh b/egs/ifnenit/v1/local/ienit_initialize.sh
index e9412eb715f..072d85388ba 100755
--- a/egs/ifnenit/v1/local/ienit_initialize.sh
+++ b/egs/ifnenit/v1/local/ienit_initialize.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is originally from qatip project (http://qatsdemo.cloudapp.net/qatip/demo/)
 # of Qatar Computing Research Institute (http://qcri.qa/)
diff --git a/egs/ifnenit/v1/local/prepare_data.sh b/egs/ifnenit/v1/local/prepare_data.sh
index ee20822d557..99fce0b239d 100755
--- a/egs/ifnenit/v1/local/prepare_data.sh
+++ b/egs/ifnenit/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # To be run from one directory above this script.
 # Creat text, utt2spk, spk2utt, images.scp, and feats.scp for test and train.
diff --git a/egs/ifnenit/v1/local/prepare_dict.sh b/egs/ifnenit/v1/local/prepare_dict.sh
index de0ee6a433c..38b461ba6ad 100755
--- a/egs/ifnenit/v1/local/prepare_dict.sh
+++ b/egs/ifnenit/v1/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 . ./cmd.sh
 . ./path.sh
 
diff --git a/egs/ifnenit/v1/path.sh b/egs/ifnenit/v1/path.sh
index 0d7641cd5c1..85a0ae8e0d0 100755
--- a/egs/ifnenit/v1/path.sh
+++ b/egs/ifnenit/v1/path.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # path to Kaldi's root directory
 export KALDI_ROOT=`pwd`/../../..
diff --git a/egs/ifnenit/v1/run.sh b/egs/ifnenit/v1/run.sh
index 2df5a06161c..65cc7adf9a4 100755
--- a/egs/ifnenit/v1/run.sh
+++ b/egs/ifnenit/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 stage=0
 nj=8
diff --git a/egs/librispeech/s5/RESULTS b/egs/librispeech/s5/RESULTS
index b45271765bc..bf67de1d840 100644
--- a/egs/librispeech/s5/RESULTS
+++ b/egs/librispeech/s5/RESULTS
@@ -1,6 +1,6 @@
 # In the results below, "tgsmall" is the pruned 3-gram LM, which is used for lattice generation.
 # The following language models are then used for rescoring:
-# a) tgmed- slightly less pruned 3-gram LM  
+# a) tgmed- slightly less pruned 3-gram LM
 # b) tglarge- the full, non-pruned 3-gram LM
 # c) fglarge- non-pruned 4-gram LM
 #
@@ -337,7 +337,7 @@
 %WER 4.39 [ 2387 / 54402, 377 ins, 199 del, 1811 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch3_dev_clean_tglarge/wer_14
 %WER 5.36 [ 2918 / 54402, 328 ins, 338 del, 2252 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch3_dev_clean_tgmed/wer_17
 %WER 6.08 [ 3305 / 54402, 369 ins, 396 del, 2540 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch3_dev_clean_tgsmall/wer_15
-%WER 4.40 [ 2395 / 54402, 375 ins, 200 del, 1820 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch4_dev_clean_tglarge/wer_14 
+%WER 4.40 [ 2395 / 54402, 375 ins, 200 del, 1820 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch4_dev_clean_tglarge/wer_14
 %WER 5.35 [ 2909 / 54402, 328 ins, 339 del, 2242 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch4_dev_clean_tgmed/wer_17
 %WER 6.05 [ 3291 / 54402, 384 ins, 381 del, 2526 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch4_dev_clean_tgsmall/wer_14
 %WER 13.45 [ 6850 / 50948, 808 ins, 876 del, 5166 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch0_dev_other_tglarge/wer_15
@@ -423,7 +423,7 @@
 %WER 17.64 [ 9231 / 52343, 764 ins, 1662 del, 6805 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_test_other_tgsmall_utt_offline/wer_14
 
 # Results with nnet3 tdnn
-# local/nnet3/run_tdnn.sh
+# local/nnet3/run_tdnn.sh (with old configs, now moved to local/nnet3/tuning/run_tdnn_1a.sh)
 # (4 epoch training on speed-perturbed data)
 # num_params=19.3M
 %WER 4.43 [ 2410 / 54402, 306 ins, 278 del, 1826 sub ] exp/nnet3/tdnn_sp/decode_dev_clean_fglarge/wer_13_1.0
@@ -444,7 +444,7 @@
 %WER 16.29 [ 8528 / 52343, 828 ins, 1320 del, 6380 sub ] exp/nnet3/tdnn_sp/decode_test_other_tgsmall/wer_14_0.0
 
 # Results with nnet3 tdnn
-# local/nnet3/run_tdnn.sh
+# local/nnet3/run_tdnn.sh (with old configs, now moved to local/nnet3/tuning/run_tdnn_1a.sh)
 # (4 epoch training on speed-perturbed and volumn-perturbed "cleaned" data)
 # num_params=19.3M, average training time=68.8s per job(on Tesla K80), real-time factor=1.23161
 # for x in exp/nnet3_cleaned/tdnn_sp/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
@@ -465,7 +465,6 @@
 %WER 14.78 [ 7737 / 52343, 807 ins, 1115 del, 5815 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_other_tgmed/wer_15_0.0
 %WER 16.28 [ 8521 / 52343, 843 ins, 1258 del, 6420 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_other_tgsmall/wer_14_0.0
 
-
 # Results with nnet3 tdnn+sMBR
 # local/nnet3/run_tdnn_discriminative.sh
 # a subset of the full list of results (using the acoustic model obtained at the end of the training):
diff --git a/egs/librispeech/s5/local/chain/compare_wer.sh b/egs/librispeech/s5/local/chain/compare_wer.sh
index ec205670b76..dd84279df30 100755
--- a/egs/librispeech/s5/local/chain/compare_wer.sh
+++ b/egs/librispeech/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/librispeech/s5/local/chain/run_chain_common.sh b/egs/librispeech/s5/local/chain/run_chain_common.sh
index da37e148441..710625cf489 100755
--- a/egs/librispeech/s5/local/chain/run_chain_common.sh
+++ b/egs/librispeech/s5/local/chain/run_chain_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script has common stages shared across librispeech chain recipes.
 # It generates a new topology in a new lang directory, gets the alignments as
diff --git a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh
index 6bf3a139ad1..02214315263 100755
--- a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh
+++ b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 echo "This script has not yet been tested, you would have to comment this statement if you want to run it. Please let us know if you see any issues" && exit 1;
 
@@ -95,8 +95,8 @@ if [ $frame_subsampling_factor -ne 1 ]; then
 
     data_dirs=
     for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
-      steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \
-        $x $train_data_dir exp/shift_hires mfcc_hires
+      utils/data/shift_feats.sh \
+        $x $train_data_dir ${train_data_dir}_fs$x
       utils/fix_data_dir.sh ${train_data_dir}_fs$x
       data_dirs="$data_dirs ${train_data_dir}_fs$x"
       awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp
diff --git a/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
index db17a35be64..b995ff10b16 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is based on tdnn_1d_sp, but adding cnn as the front-end.
 # The cnn-tdnn-f (tdnn_cnn_1a_sp) outperforms the tdnn-f (tdnn_1d_sp).
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
index 48d6ddb804f..d8eedcfad2b 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # run_tdnn_1b.sh's topo is similiar with run_tdnn_1a.sh but we used the xconfigs. Otherwise "frames_per_eg=150,140,100".
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
index 101fd6a4c15..9c7194a2041 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 ## Adapted from swbd for librispeech by David van Leeuwen
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
index 865b10dea0c..cae7e0249a1 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # 1d is as 1c but a recipe based on the newer, more compact configs, and with
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index 0e97e46194d..a25078fbbec 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # this is the tdnn-lstmp based on the run_tdnn_lstm_1n.sh under Switchboard.
 
 # training acoustic model and decoding:
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
index 0da813267fc..27f15581e67 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # this is the tdnn-lstmp based on the run_tdnn_lstm_1a.sh under Librispeech but with larger model size.
 
 # training acoustic model and decoding:
diff --git a/egs/librispeech/s5/local/data_prep.sh b/egs/librispeech/s5/local/data_prep.sh
index 20c5697d61f..c05c3f89109 100755
--- a/egs/librispeech/s5/local/data_prep.sh
+++ b/egs/librispeech/s5/local/data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vassil Panayotov
 #           2014  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/librispeech/s5/local/decode_example.sh b/egs/librispeech/s5/local/decode_example.sh
index 815bf17b9f7..3b1b93d90d9 100755
--- a/egs/librispeech/s5/local/decode_example.sh
+++ b/egs/librispeech/s5/local/decode_example.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Guoguo Chen
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/download_and_untar.sh b/egs/librispeech/s5/local/download_and_untar.sh
index 1bb6d909edc..5cf6adde8bc 100755
--- a/egs/librispeech/s5/local/download_and_untar.sh
+++ b/egs/librispeech/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/download_lm.sh b/egs/librispeech/s5/local/download_lm.sh
index 382f313df7c..129ca1edbe3 100755
--- a/egs/librispeech/s5/local/download_lm.sh
+++ b/egs/librispeech/s5/local/download_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/format_data.sh b/egs/librispeech/s5/local/format_data.sh
index 71ef6f0e36c..7b5a0823b15 100755
--- a/egs/librispeech/s5/local/format_data.sh
+++ b/egs/librispeech/s5/local/format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/format_lms.sh b/egs/librispeech/s5/local/format_lms.sh
index b530f61d2d9..d1a18bada88 100755
--- a/egs/librispeech/s5/local/format_lms.sh
+++ b/egs/librispeech/s5/local/format_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/g2p.sh b/egs/librispeech/s5/local/g2p.sh
index 5bc934499d9..b3ffa8e19b1 100755
--- a/egs/librispeech/s5/local/g2p.sh
+++ b/egs/librispeech/s5/local/g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/g2p/train_g2p.sh b/egs/librispeech/s5/local/g2p/train_g2p.sh
index 635a382e575..216fc3b5dc0 100755
--- a/egs/librispeech/s5/local/g2p/train_g2p.sh
+++ b/egs/librispeech/s5/local/g2p/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/lm/install_festival.sh b/egs/librispeech/s5/local/lm/install_festival.sh
index d5fa72b6d45..01dacd29b71 100755
--- a/egs/librispeech/s5/local/lm/install_festival.sh
+++ b/egs/librispeech/s5/local/lm/install_festival.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./path.sh || exit 1
 
diff --git a/egs/librispeech/s5/local/lm/normalize_text.sh b/egs/librispeech/s5/local/lm/normalize_text.sh
index e4eed324735..50e58205382 100755
--- a/egs/librispeech/s5/local/lm/normalize_text.sh
+++ b/egs/librispeech/s5/local/lm/normalize_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vassil Panayotov
 # Apache 2.0
@@ -39,10 +39,10 @@ for b in $(cat $in_list); do
   [[ -f "$in_file" ]] || { echo "WARNING: $in_file does not exists"; continue; }
   out_file=$out_root/$id/$id.txt
   mkdir -p $out_root/$id
-  $PYTHON local/lm/python/pre_filter.py $in_file /dev/stdout |\
-    $PYTHON local/lm/python/text_pre_process.py /dev/stdin /dev/stdout |\
+  python local/lm/python/pre_filter.py $in_file /dev/stdout |\
+    python local/lm/python/text_pre_process.py /dev/stdin /dev/stdout |\
     nsw_expand -format opl /dev/stdin |\
-    $PYTHON local/lm/python/text_post_process.py /dev/stdin $out_file /dev/null || exit 1
+    python local/lm/python/text_post_process.py /dev/stdin $out_file /dev/null || exit 1
   processed=$((processed + 1))
   echo "Processing of $id has finished at $(date '+%T %F') [$processed texts ready so far]"
 done
diff --git a/egs/librispeech/s5/local/lm/train_lm.sh b/egs/librispeech/s5/local/lm/train_lm.sh
index 6e6ae5970fb..0af3bb0e28c 100755
--- a/egs/librispeech/s5/local/lm/train_lm.sh
+++ b/egs/librispeech/s5/local/lm/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/lookahead/run_lookahead.sh b/egs/librispeech/s5/local/lookahead/run_lookahead.sh
new file mode 100755
index 00000000000..acd123fa8d3
--- /dev/null
+++ b/egs/librispeech/s5/local/lookahead/run_lookahead.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. ./path.sh
+
+# Example script for lookahead composition
+
+lm=tgmed
+am=exp/chain_cleaned/tdnn_1d_sp
+testset=test_clean
+
+# %WER 4.86 [ 2553 / 52576, 315 ins, 222 del, 2016 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead/wer_11_0.0
+# %WER 4.79 [ 2518 / 52576, 279 ins, 292 del, 1947 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_arpa/wer_11_0.0
+# %WER 4.82 [ 2532 / 52576, 286 ins, 290 del, 1956 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_arpa_fast/wer_11_0.0
+# %WER 4.86 [ 2553 / 52576, 314 ins, 222 del, 2017 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_base/wer_11_0.0
+# %WER 4.86 [ 2553 / 52576, 315 ins, 222 del, 2016 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_static/wer_11_0.0
+
+
+# Speed
+#
+# base       0.18 xRT
+# static     0.18 xRT
+# lookahead  0.29 xRT
+# arpa       0.35 xRT
+# arpa_fast  0.21 xRT
+
+# Graph size
+#
+# Base                 476 Mb
+# Static               621 Mb
+# Lookahead            48 Mb HCL + 77 Mb Grammar
+# Lookahead + OpenGrm  48 Mb HCL + 42 Mb Grammar
+
+if [ ! -f "${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so" ]; then
+    echo "Missing ${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so"
+    echo "Make sure you compiled openfst with lookahead support. Run make in ${KALDI_ROOT}/tools after git pull."
+    exit 1
+fi
+if [ ! -f "${KALDI_ROOT}/tools/openfst/bin/ngramread" ]; then
+    echo "You appear to not have OpenGRM tools installed. Missing ${KALDI_ROOT}/tools/openfst/bin/ngramread"
+    echo "cd to $KALDI_ROOT/tools and run extras/install_opengrm.sh."
+    exit 1
+fi
+export LD_LIBRARY_PATH=${KALDI_ROOT}/tools/openfst/lib/fst
+
+# Baseline
+utils/format_lm.sh data/lang data/local/lm/lm_${lm}.arpa.gz \
+    data/local/dict/lexicon.txt data/lang_test_${lm}_base
+
+utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov \
+    data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead_base
+
+steps/nnet3/decode.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_base data/${testset}_hires ${am}/decode_${testset}_lookahead_base
+
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --remove-oov --compose-graph \
+    data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead
+
+# Decode with statically composed lookahead graph
+steps/nnet3/decode.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead_static
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead
+
+# Compile arpa graph
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --compose-graph \
+    data/lang_test_${lm}_base ${am} data/local/lm/lm_tgmed.arpa.gz ${am}/graph_${lm}_lookahead_arpa
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa
+
+# Decode with runtime composition and tuned beams
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --beam 12.0 --max-active 3000 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa_fast
diff --git a/egs/librispeech/s5/local/nnet2/run_5a_clean_100.sh b/egs/librispeech/s5/local/nnet2/run_5a_clean_100.sh
index b0b46859170..c84072995ac 100755
--- a/egs/librispeech/s5/local/nnet2/run_5a_clean_100.sh
+++ b/egs/librispeech/s5/local/nnet2/run_5a_clean_100.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is p-norm neural net training, with the "fast" script, on top of adapted
 # 40-dimensional features.
diff --git a/egs/librispeech/s5/local/nnet2/run_5c.sh b/egs/librispeech/s5/local/nnet2/run_5c.sh
index fefca74b748..bce646615c7 100755
--- a/egs/librispeech/s5/local/nnet2/run_5c.sh
+++ b/egs/librispeech/s5/local/nnet2/run_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is neural net training on top of adapted 40-dimensional features.
 #
diff --git a/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh b/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh
index 06be974d8ac..1e33e50681c 100755
--- a/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh
+++ b/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is p-norm neural net training, with the "fast" script, on top of adapted
 # 40-dimensional features.
diff --git a/egs/librispeech/s5/local/nnet2/run_7a_960.sh b/egs/librispeech/s5/local/nnet2/run_7a_960.sh
index f05ff8d2542..0eb2af6434f 100755
--- a/egs/librispeech/s5/local/nnet2/run_7a_960.sh
+++ b/egs/librispeech/s5/local/nnet2/run_7a_960.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is p-norm neural net training, with the "fast" script, on top of adapted
 # 40-dimensional features.
diff --git a/egs/librispeech/s5/local/nnet3/compare_wer.sh b/egs/librispeech/s5/local/nnet3/compare_wer.sh
new file mode 100755
index 00000000000..4a272839687
--- /dev/null
+++ b/egs/librispeech/s5/local/nnet3/compare_wer.sh
@@ -0,0 +1,152 @@
+#!/usr/bin/env bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+  include_looped=true
+  shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+  include_online=true
+  shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System                     "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+strings=(
+  "# WER on dev(fglarge)        "
+  "# WER on dev(tglarge)        "
+  "# WER on dev(tgmed)          "
+  "# WER on dev(tgsmall)        "
+  "# WER on dev_other(fglarge)  "
+  "# WER on dev_other(tglarge)  "
+  "# WER on dev_other(tgmed)    "
+  "# WER on dev_other(tgsmall)  "
+  "# WER on test(fglarge)       "
+  "# WER on test(tglarge)       "
+  "# WER on test(tgmed)         "
+  "# WER on test(tgsmall)       "
+  "# WER on test_other(fglarge) "
+  "# WER on test_other(tglarge) "
+  "# WER on test_other(tgmed)   "
+  "# WER on test_other(tgsmall) ")
+
+for n in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
+   echo -n "${strings[$n]}"
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+     decode_names=(dev_clean_fglarge dev_clean_tglarge dev_clean_tgmed dev_clean_tgsmall dev_other_fglarge dev_other_tglarge dev_other_tgmed dev_other_tgsmall test_clean_fglarge test_clean_tglarge test_clean_tgmed test_clean_tgsmall test_other_fglarge test_other_tglarge test_other_tgmed test_other_tgsmall)
+
+     wer=$(grep WER $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+     printf "% 10s" $wer
+   done
+   echo
+   if $include_looped; then
+     echo -n "#             [looped:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(grep WER $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+   if $include_online; then
+     echo -n "#             [online:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(grep WER ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+done
+
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+
+echo -n "# Final train prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.combined.log | grep -v likelihood | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.combined.log | grep -v likelihood | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (logLL)   "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.combined.log | grep -w likelihood | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (logLL)   "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.combined.log | grep -w likelihood | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Num-parameters             "
+for x in $*; do
+  num_params=$(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
+  printf "% 10d" $num_params
+done
+echo
diff --git a/egs/librispeech/s5/local/nnet3/run_ivector_common.sh b/egs/librispeech/s5/local/nnet3/run_ivector_common.sh
index b937232eb8d..1e1d4aef901 100755
--- a/egs/librispeech/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/librispeech/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/librispeech/s5/local/nnet3/run_tdnn.sh b/egs/librispeech/s5/local/nnet3/run_tdnn.sh
deleted file mode 100755
index 28ee2b92004..00000000000
--- a/egs/librispeech/s5/local/nnet3/run_tdnn.sh
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/bin/bash
-
-# this is the standard "tdnn" system, built in nnet3; it's what we use to
-# call multi-splice.
-
-# without cleanup:
-# local/nnet3/run_tdnn.sh  --train-set train960 --gmm tri6b --nnet3-affix "" &
-
-
-# At this script level we don't support not running on GPU, as it would be painfully slow.
-# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
-# --num-threads 16 and --minibatch-size 128.
-
-# First the options that are passed through to run_ivector_common.sh
-# (some of which are also used in this script directly).
-stage=0
-decode_nj=30
-train_set=train_960_cleaned
-gmm=tri6b_cleaned  # this is the source gmm-dir for the data-type of interest; it
-                   # should have alignments for the specified training data.
-nnet3_affix=_cleaned
-
-# Options which are not passed through to run_ivector_common.sh
-affix=
-train_stage=-10
-common_egs_dir=
-reporting_email=
-remove_egs=true
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-local/nnet3/run_ivector_common.sh --stage $stage \
-                                  --train-set $train_set \
-                                  --gmm $gmm \
-                                  --nnet3-affix "$nnet3_affix" || exit 1;
-
-
-gmm_dir=exp/${gmm}
-graph_dir=$gmm_dir/graph_tgsmall
-ali_dir=exp/${gmm}_ali_${train_set}_sp
-dir=exp/nnet3${nnet3_affix}/tdnn${affix:+_$affix}_sp
-train_data_dir=data/${train_set}_sp_hires
-train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
-
-
-for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
-     $graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-if [ $stage -le 11 ]; then
-  echo "$0: creating neural net configs";
-
-  # create the config files for nnet initialization
-  python steps/nnet3/tdnn/make_configs.py  \
-    --feat-dir $train_data_dir \
-    --ivector-dir $train_ivector_dir \
-    --ali-dir $ali_dir \
-    --relu-dim 1280 \
-    --splice-indexes "-2,-1,0,1,2 -1,2 -3,3 -7,2 0"  \
-    --use-presoftmax-prior-scale true \
-   $dir/configs || exit 1;
-fi
-
-
-
-if [ $stage -le 12 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-
-  steps/nnet3/train_dnn.py --stage=$train_stage \
-    --cmd="$decode_cmd" \
-    --feat.online-ivector-dir $train_ivector_dir \
-    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
-    --trainer.num-epochs 4 \
-    --trainer.optimization.num-jobs-initial 3 \
-    --trainer.optimization.num-jobs-final 16 \
-    --trainer.optimization.initial-effective-lrate 0.0017 \
-    --trainer.optimization.final-effective-lrate 0.00017 \
-    --egs.dir "$common_egs_dir" \
-    --cleanup.remove-egs $remove_egs \
-    --cleanup.preserve-model-interval 100 \
-    --feat-dir=$train_data_dir \
-    --ali-dir $ali_dir \
-    --lang data/lang \
-    --reporting.email="$reporting_email" \
-    --dir=$dir  || exit 1;
-
-fi
-
-if [ $stage -le 13 ]; then
-  # this does offline decoding that should give about the same results as the
-  # real online decoding (the one with --per-utt true)
-  rm $dir/.error 2>/dev/null || true
-  for test in test_clean test_other dev_clean dev_other; do
-    (
-    steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
-      --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
-      ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
-    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
-      data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1
-    steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
-      data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
-    steps/lmrescore_const_arpa.sh \
-      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
-      data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
-    ) || touch $dir/.error &
-  done
-  wait
-  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
-fi
-
-exit 0;
diff --git a/egs/librispeech/s5/local/nnet3/run_tdnn.sh b/egs/librispeech/s5/local/nnet3/run_tdnn.sh
new file mode 120000
index 00000000000..61f8f499182
--- /dev/null
+++ b/egs/librispeech/s5/local/nnet3/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1b.sh
\ No newline at end of file
diff --git a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh
index f283d69e7fe..d4ecd6ea0f0 100755
--- a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh
+++ b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 echo "This script has not yet been tested, you would have to comment this statement if you want to run it. Please let us know if you see any issues" && exit 1;
 
diff --git a/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1a.sh b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..e4c98ccef12
--- /dev/null
+++ b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1a.sh
@@ -0,0 +1,127 @@
+#!/usr/bin/env bash
+
+# this is the standard "tdnn" system, built in nnet3; it's what we use to
+# call multi-splice.
+
+# without cleanup:
+# local/nnet3/run_tdnn.sh  --train-set train960 --gmm tri6b --nnet3-affix "" &
+
+
+# At this script level we don't support not running on GPU, as it would be painfully slow.
+# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
+# --num-threads 16 and --minibatch-size 128.
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=30
+train_set=train_960_cleaned
+gmm=tri6b_cleaned  # this is the source gmm-dir for the data-type of interest; it
+                   # should have alignments for the specified training data.
+nnet3_affix=_cleaned
+
+# Options which are not passed through to run_ivector_common.sh
+affix=
+train_stage=-10
+common_egs_dir=
+reporting_email=
+remove_egs=true
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+
+gmm_dir=exp/${gmm}
+graph_dir=$gmm_dir/graph_tgsmall
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+dir=exp/nnet3${nnet3_affix}/tdnn${affix:+_$affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+     $graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs";
+
+  # create the config files for nnet initialization
+  python steps/nnet3/tdnn/make_configs.py  \
+    --feat-dir $train_data_dir \
+    --ivector-dir $train_ivector_dir \
+    --ali-dir $ali_dir \
+    --relu-dim 1280 \
+    --splice-indexes "-2,-1,0,1,2 -1,2 -3,3 -7,2 0"  \
+    --use-presoftmax-prior-scale true \
+   $dir/configs || exit 1;
+fi
+
+
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/train_dnn.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.0017 \
+    --trainer.optimization.final-effective-lrate 0.00017 \
+    --egs.dir "$common_egs_dir" \
+    --cleanup.remove-egs $remove_egs \
+    --cleanup.preserve-model-interval 100 \
+    --feat-dir=$train_data_dir \
+    --ali-dir $ali_dir \
+    --lang data/lang \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+
+fi
+
+if [ $stage -le 13 ]; then
+  # this does offline decoding that should give about the same results as the
+  # real online decoding (the one with --per-utt true)
+  rm $dir/.error 2>/dev/null || true
+  for test in test_clean test_other dev_clean dev_other; do
+    (
+    steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
+      --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
+      ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1
+    steps/lmrescore_const_arpa.sh \
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
+    steps/lmrescore_const_arpa.sh \
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+exit 0;
diff --git a/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1b.sh b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1b.sh
new file mode 100755
index 00000000000..bb695d8bd79
--- /dev/null
+++ b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1b.sh
@@ -0,0 +1,163 @@
+#!/usr/bin/env bash
+
+# 1b is as 1a but uses xconfigs.
+
+# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_sp
+# System                        tdnn_sp
+# WER on dev(fglarge)              4.52
+# WER on dev(tglarge)              4.80
+# WER on dev(tgmed)                6.02
+# WER on dev(tgsmall)              6.80
+# WER on dev_other(fglarge)       12.54
+# WER on dev_other(tglarge)       13.16
+# WER on dev_other(tgmed)         15.51
+# WER on dev_other(tgsmall)       17.12
+# WER on test(fglarge)             5.00
+# WER on test(tglarge)             5.22
+# WER on test(tgmed)               6.40
+# WER on test(tgsmall)             7.14
+# WER on test_other(fglarge)      12.56
+# WER on test_other(tglarge)      13.04
+# WER on test_other(tgmed)        15.58
+# WER on test_other(tgsmall)      16.88
+# Final train prob               0.7180
+# Final valid prob               0.7003
+# Final train prob (logLL)      -0.9483
+# Final valid prob (logLL)      -0.9963
+# Num-parameters               19268504
+
+
+# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn_sp
+# exp/nnet3_cleaned/tdnn_sp/: num-iters=1088 nj=3..16 num-params=19.3M dim=40+100->5784 combine=-0.94->-0.93 (over 7) loglike:train/valid[723,1087,combined]=(-0.99,-0.95,-0.95/-1.02,-0.99,-1.00) accuracy:train/valid[723,1087,combined]=(0.710,0.721,0.718/0.69,0.70,0.700)
+
+# this is the standard "tdnn" system, built in nnet3; it's what we use to
+# call multi-splice.
+
+# without cleanup:
+# local/nnet3/run_tdnn.sh  --train-set train960 --gmm tri6b --nnet3-affix "" &
+
+
+# At this script level we don't support not running on GPU, as it would be painfully slow.
+# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
+# --num-threads 16 and --minibatch-size 128.
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=30
+train_set=train_960_cleaned
+gmm=tri6b_cleaned  # this is the source gmm-dir for the data-type of interest; it
+                   # should have alignments for the specified training data.
+nnet3_affix=_cleaned
+
+# Options which are not passed through to run_ivector_common.sh
+affix=
+train_stage=-10
+common_egs_dir=
+reporting_email=
+remove_egs=true
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+
+gmm_dir=exp/${gmm}
+graph_dir=$gmm_dir/graph_tgsmall
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+dir=exp/nnet3${nnet3_affix}/tdnn${affix:+_$affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+     $graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs";
+
+  num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}')
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  relu-batchnorm-layer name=tdnn0 dim=1280
+  relu-batchnorm-layer name=tdnn1 dim=1280 input=Append(-1,2)
+  relu-batchnorm-layer name=tdnn2 dim=1280 input=Append(-3,3)
+  relu-batchnorm-layer name=tdnn3 dim=1280 input=Append(-7,2)
+  relu-batchnorm-layer name=tdnn4 dim=1280
+  output-layer name=output input=tdnn4 dim=$num_targets max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
+    --config-dir $dir/configs || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/train_dnn.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.0017 \
+    --trainer.optimization.final-effective-lrate 0.00017 \
+    --egs.dir "$common_egs_dir" \
+    --cleanup.remove-egs $remove_egs \
+    --cleanup.preserve-model-interval 100 \
+    --feat-dir=$train_data_dir \
+    --ali-dir $ali_dir \
+    --lang data/lang \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+
+fi
+
+if [ $stage -le 13 ]; then
+  # this does offline decoding that should give about the same results as the
+  # real online decoding (the one with --per-utt true)
+  rm $dir/.error 2>/dev/null || true
+  for test in test_clean test_other dev_clean dev_other; do
+    (
+    steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
+      --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
+      ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1
+    steps/lmrescore_const_arpa.sh \
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
+    steps/lmrescore_const_arpa.sh \
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+exit 0;
diff --git a/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1c.sh b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1c.sh
new file mode 100755
index 00000000000..2f1ceb7595e
--- /dev/null
+++ b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1c.sh
@@ -0,0 +1,177 @@
+#!/usr/bin/env bash
+
+# 1c is as 1b, but uses more modern TDNN configuration.
+
+# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_sp exp/nnet3_cleaned/tdnn_1c_sp
+# System                        tdnn_sp tdnn_1c_sp
+# WER on dev(fglarge)              4.52      4.20
+# WER on dev(tglarge)              4.80      4.37
+# WER on dev(tgmed)                6.02      5.31
+# WER on dev(tgsmall)              6.80      5.86
+# WER on dev_other(fglarge)       12.54     12.55
+# WER on dev_other(tglarge)       13.16     13.00
+# WER on dev_other(tgmed)         15.51     14.98
+# WER on dev_other(tgsmall)       17.12     15.88
+# WER on test(fglarge)             5.00      4.91
+# WER on test(tglarge)             5.22      4.99
+# WER on test(tgmed)               6.40      5.93
+# WER on test(tgsmall)             7.14      6.49
+# WER on test_other(fglarge)      12.56     12.94
+# WER on test_other(tglarge)      13.04     13.38
+# WER on test_other(tgmed)        15.58     15.11
+# WER on test_other(tgsmall)      16.88     16.28
+# Final train prob               0.7180    0.8509
+# Final valid prob               0.7003    0.8157
+# Final train prob (logLL)      -0.9483   -0.4294
+# Final valid prob (logLL)      -0.9963   -0.5662
+# Num-parameters               19268504  18391704
+
+# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn_sp
+# exp/nnet3_cleaned/tdnn_1c_sp: num-iters=1088 nj=3..16 num-params=18.4M dim=40+100->5784 combine=-0.43->-0.43 (over 4) loglike:train/valid[723,1087,combined]=(-0.48,-0.43,-0.43/-0.58,-0.57,-0.57) accuracy:train/valid[723,1087,combined]=(0.840,0.854,0.851/0.811,0.816,0.816)
+
+# this is the standard "tdnn" system, built in nnet3; it's what we use to
+# call multi-splice.
+
+# without cleanup:
+# local/nnet3/run_tdnn.sh  --train-set train960 --gmm tri6b --nnet3-affix "" &
+
+
+# At this script level we don't support not running on GPU, as it would be painfully slow.
+# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
+# --num-threads 16 and --minibatch-size 128.
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=30
+train_set=train_960_cleaned
+gmm=tri6b_cleaned  # this is the source gmm-dir for the data-type of interest; it
+                   # should have alignments for the specified training data.
+nnet3_affix=_cleaned
+
+# Options which are not passed through to run_ivector_common.sh
+affix=
+train_stage=-10
+common_egs_dir=
+reporting_email=
+remove_egs=true
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+
+gmm_dir=exp/${gmm}
+graph_dir=$gmm_dir/graph_tgsmall
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+dir=exp/nnet3${nnet3_affix}/tdnn${affix:+_$affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+
+for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+     $graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs";
+
+  num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}')
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  linear-component name=prefinal-l dim=256 $linear_opts
+
+  prefinal-layer name=prefinal input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+  output-layer name=output input=prefinal dim=$num_targets max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
+    --config-dir $dir/configs || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/train_dnn.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.0017 \
+    --trainer.optimization.final-effective-lrate 0.00017 \
+    --egs.dir "$common_egs_dir" \
+    --cleanup.remove-egs $remove_egs \
+    --cleanup.preserve-model-interval 100 \
+    --feat-dir=$train_data_dir \
+    --ali-dir $ali_dir \
+    --lang data/lang \
+    --reporting.email="$reporting_email" \
+    --dir=$dir  || exit 1;
+
+fi
+
+if [ $stage -le 13 ]; then
+  # this does offline decoding that should give about the same results as the
+  # real online decoding (the one with --per-utt true)
+  rm $dir/.error 2>/dev/null || true
+  for test in test_clean test_other dev_clean dev_other; do
+    (
+    steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
+      --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
+      ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
+    steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1
+    steps/lmrescore_const_arpa.sh \
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
+    steps/lmrescore_const_arpa.sh \
+      --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+      data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+exit 0;
diff --git a/egs/librispeech/s5/local/online/run_nnet2.sh b/egs/librispeech/s5/local/online/run_nnet2.sh
index 5e0a616906f..b7dbe22a8d5 100755
--- a/egs/librispeech/s5/local/online/run_nnet2.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # example script for online-nnet2 system training and decoding,
 # based on the one for fisher-English.
diff --git a/egs/librispeech/s5/local/online/run_nnet2_common.sh b/egs/librispeech/s5/local/online/run_nnet2_common.sh
index 1813233e7d0..9666e6b26b9 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_common.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script contains some common (shared) parts of the run_nnet*.sh scripts.
 
diff --git a/egs/librispeech/s5/local/online/run_nnet2_disc.sh b/egs/librispeech/s5/local/online/run_nnet2_disc.sh
index 65a661a21fe..108b1717871 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_disc.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script does discriminative training on top of the online,
diff --git a/egs/librispeech/s5/local/online/run_nnet2_ms.sh b/egs/librispeech/s5/local/online/run_nnet2_ms.sh
index c74de372fcc..7fa3f179fd6 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_ms.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is the "multi-splice" version of the online-nnet2 training script.
 # It's currently the best recipe.
diff --git a/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh b/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh
index f20d3230cc2..6c52db7831b 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script does discriminative training on top of the online, multi-splice
diff --git a/egs/librispeech/s5/local/online_pitch/run_nnet2_common.sh b/egs/librispeech/s5/local/online_pitch/run_nnet2_common.sh
index c5fdd6033f6..32e03f922ae 100755
--- a/egs/librispeech/s5/local/online_pitch/run_nnet2_common.sh
+++ b/egs/librispeech/s5/local/online_pitch/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script contains some common (shared) parts of the run_nnet*.sh scripts.
 
diff --git a/egs/librispeech/s5/local/online_pitch/run_nnet2_ms.sh b/egs/librispeech/s5/local/online_pitch/run_nnet2_ms.sh
index ca7b6139e0b..3509e0d2eed 100755
--- a/egs/librispeech/s5/local/online_pitch/run_nnet2_ms.sh
+++ b/egs/librispeech/s5/local/online_pitch/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is the "multi-splice" version of the online-nnet2 training script,
 # with pitch.
diff --git a/egs/librispeech/s5/local/prepare_dict.sh b/egs/librispeech/s5/local/prepare_dict.sh
index f9efb2ee46b..7b345b6bf1c 100755
--- a/egs/librispeech/s5/local/prepare_dict.sh
+++ b/egs/librispeech/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/prepare_example_data.sh b/egs/librispeech/s5/local/prepare_example_data.sh
index 169aaea52a3..fc010dcc907 100755
--- a/egs/librispeech/s5/local/prepare_example_data.sh
+++ b/egs/librispeech/s5/local/prepare_example_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Guoguo Chen
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh b/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
index 137a972f3d9..b0eace29773 100755
--- a/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2018  Ke Li
diff --git a/egs/librispeech/s5/local/run_cleanup_segmentation.sh b/egs/librispeech/s5/local/run_cleanup_segmentation.sh
index c67d76a0096..e88b4e44619 100755
--- a/egs/librispeech/s5/local/run_cleanup_segmentation.sh
+++ b/egs/librispeech/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 #           2016  Yiming Wang
diff --git a/egs/librispeech/s5/local/run_data_cleaning.sh b/egs/librispeech/s5/local/run_data_cleaning.sh
index 3300ad4c4a1..f856ce1154e 100755
--- a/egs/librispeech/s5/local/run_data_cleaning.sh
+++ b/egs/librispeech/s5/local/run_data_cleaning.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script shows how you can do data-cleaning, and exclude data that has a
diff --git a/egs/librispeech/s5/local/run_nnet2.sh b/egs/librispeech/s5/local/run_nnet2.sh
index f816e41af6e..be50a1c8608 100755
--- a/egs/librispeech/s5/local/run_nnet2.sh
+++ b/egs/librispeech/s5/local/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 train_set="train-clean-100"
 test_sets="dev-clean dev-other"
diff --git a/egs/librispeech/s5/local/run_nnet2_clean_100.sh b/egs/librispeech/s5/local/run_nnet2_clean_100.sh
index 091bf2ab237..d64f6861395 100755
--- a/egs/librispeech/s5/local/run_nnet2_clean_100.sh
+++ b/egs/librispeech/s5/local/run_nnet2_clean_100.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . utils/parse_options.sh
 . ./cmd.sh
diff --git a/egs/librispeech/s5/local/run_nnet2_clean_460.sh b/egs/librispeech/s5/local/run_nnet2_clean_460.sh
index 67d40eab629..fb5ecfbea28 100755
--- a/egs/librispeech/s5/local/run_nnet2_clean_460.sh
+++ b/egs/librispeech/s5/local/run_nnet2_clean_460.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . utils/parse_options.sh
 . ./cmd.sh
diff --git a/egs/librispeech/s5/local/run_rnnlm.sh b/egs/librispeech/s5/local/run_rnnlm.sh
index ebb1832040b..819a2fafd61 100755
--- a/egs/librispeech/s5/local/run_rnnlm.sh
+++ b/egs/librispeech/s5/local/run_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Yandex (Author: Ilya Edrenkin)
 # Apache 2.0
diff --git a/egs/librispeech/s5/local/score.sh b/egs/librispeech/s5/local/score.sh
index c812199fc98..cb5bbb7277b 100755
--- a/egs/librispeech/s5/local/score.sh
+++ b/egs/librispeech/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #           2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/librispeech/s5/run.sh b/egs/librispeech/s5/run.sh
index f784a8972db..86907413503 100755
--- a/egs/librispeech/s5/run.sh
+++ b/egs/librispeech/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # Set this to somewhere where you want to put your data, or where
diff --git a/egs/lre/v1/lid/extract_ivectors.sh b/egs/lre/v1/lid/extract_ivectors.sh
index 4a5bf5650cc..a6fb4953332 100755
--- a/egs/lre/v1/lid/extract_ivectors.sh
+++ b/egs/lre/v1/lid/extract_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright     2013  Daniel Povey
 #               2014  David Snyder
diff --git a/egs/lre/v1/lid/get_vtln_warps.sh b/egs/lre/v1/lid/get_vtln_warps.sh
index 72a8fb33200..94f06bb6b80 100755
--- a/egs/lre/v1/lid/get_vtln_warps.sh
+++ b/egs/lre/v1/lid/get_vtln_warps.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright       2014  Daniel Povey
 # Apache 2.0
diff --git a/egs/lre/v1/lid/train_diag_ubm.sh b/egs/lre/v1/lid/train_diag_ubm.sh
index 8ba703073c0..52a947b7a39 100755
--- a/egs/lre/v1/lid/train_diag_ubm.sh
+++ b/egs/lre/v1/lid/train_diag_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2012  Johns Hopkins University (Author: Daniel Povey)
 #             2013  Daniel Povey
diff --git a/egs/lre/v1/lid/train_full_ubm.sh b/egs/lre/v1/lid/train_full_ubm.sh
index 4511d0985fa..7fc25e86667 100755
--- a/egs/lre/v1/lid/train_full_ubm.sh
+++ b/egs/lre/v1/lid/train_full_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #           2013  Daniel Povey
 #           2014  David Snyder
diff --git a/egs/lre/v1/lid/train_ivector_extractor.sh b/egs/lre/v1/lid/train_ivector_extractor.sh
index eda607a9eff..17f87600892 100755
--- a/egs/lre/v1/lid/train_ivector_extractor.sh
+++ b/egs/lre/v1/lid/train_ivector_extractor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2013  Daniel Povey
 #             2014  David Snyder
diff --git a/egs/lre/v1/lid/train_lvtln_model.sh b/egs/lre/v1/lid/train_lvtln_model.sh
index 35e6968e4e6..015e80ea62c 100755
--- a/egs/lre/v1/lid/train_lvtln_model.sh
+++ b/egs/lre/v1/lid/train_lvtln_model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright       2014  Daniel Povey
 # Apache 2.0
diff --git a/egs/lre/v1/local/split_long_utts.sh b/egs/lre/v1/local/split_long_utts.sh
index 083954ea288..f8df872d77b 100755
--- a/egs/lre/v1/local/split_long_utts.sh
+++ b/egs/lre/v1/local/split_long_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 max_utt_len=60 # 60 seconds.
 stage=0
diff --git a/egs/lre/v1/run.sh b/egs/lre/v1/run.sh
index 9818a8aa5f7..6941196f94a 100755
--- a/egs/lre/v1/run.sh
+++ b/egs/lre/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2014   David Snyder
 #            2014   Daniel Povey
 # Apache 2.0.
diff --git a/egs/lre/v1/run_logistic_regression.sh b/egs/lre/v1/run_logistic_regression.sh
index d39dc3353c2..d19d0d29638 100755
--- a/egs/lre/v1/run_logistic_regression.sh
+++ b/egs/lre/v1/run_logistic_regression.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2014   David Snyder,  Daniel Povey
 # Apache 2.0.
 #
diff --git a/egs/lre07/v1/lid/extract_ivectors.sh b/egs/lre07/v1/lid/extract_ivectors.sh
index b1d745dda32..0ed3bbd53d8 100755
--- a/egs/lre07/v1/lid/extract_ivectors.sh
+++ b/egs/lre07/v1/lid/extract_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright     2013  Daniel Povey
 #               2014  David Snyder
diff --git a/egs/lre07/v1/lid/extract_ivectors_dnn.sh b/egs/lre07/v1/lid/extract_ivectors_dnn.sh
index f2c3f2697d3..56b8d339dc0 100755
--- a/egs/lre07/v1/lid/extract_ivectors_dnn.sh
+++ b/egs/lre07/v1/lid/extract_ivectors_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright     2013  Daniel Povey
 #          2014-2015  David Snyder
diff --git a/egs/lre07/v1/lid/get_vtln_warps.sh b/egs/lre07/v1/lid/get_vtln_warps.sh
index 72a8fb33200..94f06bb6b80 100755
--- a/egs/lre07/v1/lid/get_vtln_warps.sh
+++ b/egs/lre07/v1/lid/get_vtln_warps.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright       2014  Daniel Povey
 # Apache 2.0
diff --git a/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh b/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh
index 45e32477cfc..334a88551a3 100755
--- a/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh
+++ b/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 #           2015   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #           2015   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/lre07/v1/lid/nnet2/get_egs2.sh b/egs/lre07/v1/lid/nnet2/get_egs2.sh
index 78f4b5a5101..3edf37e97d7 100755
--- a/egs/lre07/v1/lid/nnet2/get_egs2.sh
+++ b/egs/lre07/v1/lid/nnet2/get_egs2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).
 #                2015 David Snyder
diff --git a/egs/lre07/v1/lid/nnet2/get_lda.sh b/egs/lre07/v1/lid/nnet2/get_lda.sh
index a8b0a87fa22..a030abbda2b 100755
--- a/egs/lre07/v1/lid/nnet2/get_lda.sh
+++ b/egs/lre07/v1/lid/nnet2/get_lda.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  
 #           2015 David Snyder
diff --git a/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh b/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh
index 533001934ab..48fd79556cb 100755
--- a/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh
+++ b/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Xiaohui Zhang
diff --git a/egs/lre07/v1/lid/run_logistic_regression.sh b/egs/lre07/v1/lid/run_logistic_regression.sh
index 9caeda2423f..3ce16e40297 100755
--- a/egs/lre07/v1/lid/run_logistic_regression.sh
+++ b/egs/lre07/v1/lid/run_logistic_regression.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2014   David Snyder,  Daniel Povey
 # Apache 2.0.
 #
diff --git a/egs/lre07/v1/lid/train_diag_ubm.sh b/egs/lre07/v1/lid/train_diag_ubm.sh
index a5e256818ce..3092f1c559b 100755
--- a/egs/lre07/v1/lid/train_diag_ubm.sh
+++ b/egs/lre07/v1/lid/train_diag_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2012  Johns Hopkins University (Author: Daniel Povey)
 #             2013  Daniel Povey
diff --git a/egs/lre07/v1/lid/train_full_ubm.sh b/egs/lre07/v1/lid/train_full_ubm.sh
index 4511d0985fa..7fc25e86667 100755
--- a/egs/lre07/v1/lid/train_full_ubm.sh
+++ b/egs/lre07/v1/lid/train_full_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #           2013  Daniel Povey
 #           2014  David Snyder
diff --git a/egs/lre07/v1/lid/train_ivector_extractor.sh b/egs/lre07/v1/lid/train_ivector_extractor.sh
index 55bd54bb275..6bdc23c08e2 100755
--- a/egs/lre07/v1/lid/train_ivector_extractor.sh
+++ b/egs/lre07/v1/lid/train_ivector_extractor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2013  Daniel Povey
 #             2014  David Snyder
diff --git a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh
index 72c7e486273..19e1315d7f0 100755
--- a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh
+++ b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Daniel Povey
 #      2014-2015  David Snyder
diff --git a/egs/lre07/v1/lid/train_lvtln_model.sh b/egs/lre07/v1/lid/train_lvtln_model.sh
index 77dd8c4bb5a..531e18bc246 100755
--- a/egs/lre07/v1/lid/train_lvtln_model.sh
+++ b/egs/lre07/v1/lid/train_lvtln_model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright       2014  Daniel Povey
 # Apache 2.0
diff --git a/egs/lre07/v1/local/lre07_eval/lre07_eval.sh b/egs/lre07/v1/local/lre07_eval/lre07_eval.sh
index e12ddccece0..fe185a1c397 100755
--- a/egs/lre07/v1/local/lre07_eval/lre07_eval.sh
+++ b/egs/lre07/v1/local/lre07_eval/lre07_eval.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2014   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/lre07/v1/local/split_long_utts.sh b/egs/lre07/v1/local/split_long_utts.sh
index 083954ea288..f8df872d77b 100755
--- a/egs/lre07/v1/local/split_long_utts.sh
+++ b/egs/lre07/v1/local/split_long_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 max_utt_len=60 # 60 seconds.
 stage=0
diff --git a/egs/lre07/v1/run.sh b/egs/lre07/v1/run.sh
index ca9f3df41bb..984b1982c6a 100755
--- a/egs/lre07/v1/run.sh
+++ b/egs/lre07/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2014-2015  David Snyder
 #                       Daniel Povey
 # Apache 2.0.
diff --git a/egs/lre07/v2/local/dnn/fisher_data_prep.sh b/egs/lre07/v2/local/dnn/fisher_data_prep.sh
index 771c868064d..fc96c491f51 100755
--- a/egs/lre07/v2/local/dnn/fisher_data_prep.sh
+++ b/egs/lre07/v2/local/dnn/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/lre07/v2/local/dnn/fisher_prepare_dict.sh b/egs/lre07/v2/local/dnn/fisher_prepare_dict.sh
index 1ffa4928fd7..a4f3f3c2ca1 100755
--- a/egs/lre07/v2/local/dnn/fisher_prepare_dict.sh
+++ b/egs/lre07/v2/local/dnn/fisher_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 # To be run from one directory above this script.
diff --git a/egs/lre07/v2/local/dnn/fisher_train_lms.sh b/egs/lre07/v2/local/dnn/fisher_train_lms.sh
index 354882a3760..3497da32213 100755
--- a/egs/lre07/v2/local/dnn/fisher_train_lms.sh
+++ b/egs/lre07/v2/local/dnn/fisher_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/lre07/v2/local/dnn/remove_dup_utts.sh b/egs/lre07/v2/local/dnn/remove_dup_utts.sh
index 1211e0e04fd..f40a7b781c8 100755
--- a/egs/lre07/v2/local/dnn/remove_dup_utts.sh
+++ b/egs/lre07/v2/local/dnn/remove_dup_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Remove excess utterances once they appear  more than a specified
 # number of times with the same transcription, in a data set.
diff --git a/egs/lre07/v2/local/dnn/run_nnet2_common.sh b/egs/lre07/v2/local/dnn/run_nnet2_common.sh
index 2d0703b51c0..032282c11ef 100755
--- a/egs/lre07/v2/local/dnn/run_nnet2_common.sh
+++ b/egs/lre07/v2/local/dnn/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Make the features.
 
diff --git a/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh b/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh
index 699cbe60542..c8e818e45a5 100755
--- a/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh
+++ b/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on run_nnet2_multisplice.sh in
 # egs/fisher_english/s5/local/online. It has been modified
diff --git a/egs/lre07/v2/local/dnn/train_dnn.sh b/egs/lre07/v2/local/dnn/train_dnn.sh
index dd2469b4009..b779f20d8a0 100755
--- a/egs/lre07/v2/local/dnn/train_dnn.sh
+++ b/egs/lre07/v2/local/dnn/train_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on egs/fisher_english/s5/run.sh. It trains a
 # multisplice time-delay neural network used in the DNN-based speaker
diff --git a/egs/lre07/v2/local/lre07_eval/lre07_eval.sh b/egs/lre07/v2/local/lre07_eval/lre07_eval.sh
index e12ddccece0..fe185a1c397 100755
--- a/egs/lre07/v2/local/lre07_eval/lre07_eval.sh
+++ b/egs/lre07/v2/local/lre07_eval/lre07_eval.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2014   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/lre07/v2/local/split_long_utts.sh b/egs/lre07/v2/local/split_long_utts.sh
index 083954ea288..f8df872d77b 100755
--- a/egs/lre07/v2/local/split_long_utts.sh
+++ b/egs/lre07/v2/local/split_long_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 max_utt_len=60 # 60 seconds.
 stage=0
diff --git a/egs/lre07/v2/run.sh b/egs/lre07/v2/run.sh
index e81dd869cc6..f8693234454 100755
--- a/egs/lre07/v2/run.sh
+++ b/egs/lre07/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2016-2017  Go-Vivace Inc. (Author: Mousmita Sarma)
 #
 # Apache 2.0.
diff --git a/egs/madcat_ar/v1/local/chain/compare_wer.sh b/egs/madcat_ar/v1/local/chain/compare_wer.sh
index 7f04061dafb..01e403e8ba9 100755
--- a/egs/madcat_ar/v1/local/chain/compare_wer.sh
+++ b/egs/madcat_ar/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
index 892ee441516..7478c5acedb 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2017 Hossein Hadian
 #              2017 Chun Chieh Chang
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index 7ca7c652fd2..047893b8659 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index a8bc1836ffe..7fbd52b5965 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1a is the same as chainali_1c but uses the e2e chain model to get the
 # lattice alignments and to build a tree
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index 0828e051dcc..dfd0ad40bfc 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
 # lattice alignments and to build a tree
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 3caf8ae4494..ad5f3cd4879 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 9fe588f31b8..dd802417f4f 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
diff --git a/egs/madcat_ar/v1/local/extract_lines.sh b/egs/madcat_ar/v1/local/extract_lines.sh
index ab87836ae3a..c4b5e77a4b2 100755
--- a/egs/madcat_ar/v1/local/extract_lines.sh
+++ b/egs/madcat_ar/v1/local/extract_lines.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018 Ashish Arora
 
 nj=4
diff --git a/egs/madcat_ar/v1/local/prepare_data.sh b/egs/madcat_ar/v1/local/prepare_data.sh
index 1049db9826d..8a6fa428807 100755
--- a/egs/madcat_ar/v1/local/prepare_data.sh
+++ b/egs/madcat_ar/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
diff --git a/egs/madcat_ar/v1/local/score.sh b/egs/madcat_ar/v1/local/score.sh
index 31564d25326..e1befafd8b2 100755
--- a/egs/madcat_ar/v1/local/score.sh
+++ b/egs/madcat_ar/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/madcat_ar/v1/local/tl/augment_data.sh b/egs/madcat_ar/v1/local/tl/augment_data.sh
index cc44aa58a62..6f2d3cc0217 100755
--- a/egs/madcat_ar/v1/local/tl/augment_data.sh
+++ b/egs/madcat_ar/v1/local/tl/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018 Hossein Hadian
 #             2018 Ashish Arora
 
diff --git a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
index ccbb7119674..ec73d4dd406 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # ./local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a/
 # System                      cnn_e2eali_1a
diff --git a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
index 3fca8cf5fdc..ca7fef9eb85 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/madcat_ar/v1/local/tl/run_text_localization.sh b/egs/madcat_ar/v1/local/tl/run_text_localization.sh
index 8d12f7d802f..24269d9f479 100755
--- a/egs/madcat_ar/v1/local/tl/run_text_localization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_text_localization.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017    Hossein Hadian
 #           2018    Ashish Arora
 
diff --git a/egs/madcat_ar/v1/local/tl/train_lm.sh b/egs/madcat_ar/v1/local/tl/train_lm.sh
index 524bb2e9f40..c37fe64569f 100755
--- a/egs/madcat_ar/v1/local/tl/train_lm.sh
+++ b/egs/madcat_ar/v1/local/tl/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/madcat_ar/v1/local/train_lm.sh b/egs/madcat_ar/v1/local/train_lm.sh
index 903b288a834..c53a6fa8f35 100755
--- a/egs/madcat_ar/v1/local/train_lm.sh
+++ b/egs/madcat_ar/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/madcat_ar/v1/run.sh b/egs/madcat_ar/v1/run.sh
index 01bfdbed543..076f34ced2b 100755
--- a/egs/madcat_ar/v1/run.sh
+++ b/egs/madcat_ar/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 62f4eeb7c71..837c482af0f 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017    Hossein Hadian
 #           2018    Ashish Arora
 set -e
diff --git a/egs/madcat_zh/v1/local/chain/compare_wer.sh b/egs/madcat_zh/v1/local/chain/compare_wer.sh
index 4eb665fc702..2e2bc73e01c 100755
--- a/egs/madcat_zh/v1/local/chain/compare_wer.sh
+++ b/egs/madcat_zh/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
index 164d62a7ad9..17bae9941c4 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2017 Hossein Hadian
 #              2017 Chun Chieh Chang
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index be51bdcc3d1..7b15b89a549 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # chainali_1a is as 1a except it uses chain alignments (using 1a system) instead of gmm alignments
 
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
index aa61620a92f..e14dbda0c39 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # chainali_1b is as chainali_1a except it has 3 more cnn layers and 1 less tdnn layer.
 # ./local/chain/compare_wer.sh exp/chain/cnn_chainali_1a/ exp/chain/cnn_chainali_1b/
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index ffc9a4c8a14..037fcbaaad2 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
diff --git a/egs/madcat_zh/v1/local/extract_features.sh b/egs/madcat_zh/v1/local/extract_features.sh
index 9fe588f31b8..dd802417f4f 100755
--- a/egs/madcat_zh/v1/local/extract_features.sh
+++ b/egs/madcat_zh/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
diff --git a/egs/madcat_zh/v1/local/extract_lines.sh b/egs/madcat_zh/v1/local/extract_lines.sh
index ed752e97e13..391f51a9ea9 100755
--- a/egs/madcat_zh/v1/local/extract_lines.sh
+++ b/egs/madcat_zh/v1/local/extract_lines.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018 Ashish Arora
 
 nj=4
diff --git a/egs/madcat_zh/v1/local/prepare_data.sh b/egs/madcat_zh/v1/local/prepare_data.sh
index ba35b90b173..33086111426 100755
--- a/egs/madcat_zh/v1/local/prepare_data.sh
+++ b/egs/madcat_zh/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
diff --git a/egs/madcat_zh/v1/local/score.sh b/egs/madcat_zh/v1/local/score.sh
index 31564d25326..e1befafd8b2 100755
--- a/egs/madcat_zh/v1/local/score.sh
+++ b/egs/madcat_zh/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/madcat_zh/v1/local/train_lm.sh b/egs/madcat_zh/v1/local/train_lm.sh
index a8e2dc71f28..d37c8ef110a 100755
--- a/egs/madcat_zh/v1/local/train_lm.sh
+++ b/egs/madcat_zh/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/madcat_zh/v1/run.sh b/egs/madcat_zh/v1/run.sh
index b3ef370c830..c9dba0443ec 100755
--- a/egs/madcat_zh/v1/run.sh
+++ b/egs/madcat_zh/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
diff --git a/egs/madcat_zh/v1/run_end2end.sh b/egs/madcat_zh/v1/run_end2end.sh
index 7e0fc1e25d1..eefb19a85b6 100755
--- a/egs/madcat_zh/v1/run_end2end.sh
+++ b/egs/madcat_zh/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017    Hossein Hadian
 
 set -e
diff --git a/egs/malach/s5/local/chain/compare_wer_general.sh b/egs/malach/s5/local/chain/compare_wer_general.sh
index 9bd017414ab..7d36c298396 100755
--- a/egs/malach/s5/local/chain/compare_wer_general.sh
+++ b/egs/malach/s5/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 echo -n "System               "
 for x in $*; do   printf " % 10s" $x;   done
diff --git a/egs/malach/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/malach/s5/local/chain/tuning/run_tdnn_1a.sh
index 007e94ef1a3..69995f0b8aa 100644
--- a/egs/malach/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/malach/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019  IBM Corp. (Author: Michael Picheny) Adapted AMI recipe to MALACH corpus
 
diff --git a/egs/malach/s5/local/malach_data_prep.sh b/egs/malach/s5/local/malach_data_prep.sh
index 174adf9ce0e..4b812e7ee39 100755
--- a/egs/malach/s5/local/malach_data_prep.sh
+++ b/egs/malach/s5/local/malach_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  University of Edinburgh (Author: Pawel Swietojanski)
 #           2016  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/malach/s5/local/malach_prepare_dict.sh b/egs/malach/s5/local/malach_prepare_dict.sh
index 4c3c039f74a..e4638b7693e 100755
--- a/egs/malach/s5/local/malach_prepare_dict.sh
+++ b/egs/malach/s5/local/malach_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #adapted from fisher dict preparation script, Author: Pawel Swietojanski
 # Copyright 2019  IBM Corp. (Author: Michael Picheny) Adapted AMI recipe to MALACH corpus
diff --git a/egs/malach/s5/local/malach_scoring_data_prep.sh b/egs/malach/s5/local/malach_scoring_data_prep.sh
index 8c9c79a1fd6..1b1b1c83492 100755
--- a/egs/malach/s5/local/malach_scoring_data_prep.sh
+++ b/egs/malach/s5/local/malach_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # Copyright 2014  University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/malach/s5/local/malach_text_prep.sh b/egs/malach/s5/local/malach_text_prep.sh
index 55885c66ce9..fcb8d17fb18 100755
--- a/egs/malach/s5/local/malach_text_prep.sh
+++ b/egs/malach/s5/local/malach_text_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019  IBM Corp. (Author: Michael Picheny) Adapted AMI recipe to MALACH corpus
 # Copyright 2015, Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/malach/s5/local/malach_train_lms.sh b/egs/malach/s5/local/malach_train_lms.sh
index c4919022bf1..722ba4cfffd 100755
--- a/egs/malach/s5/local/malach_train_lms.sh
+++ b/egs/malach/s5/local/malach_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019, IBM Research (Author: Michael Picheny) Adapted AMI recipe to MALACH Corpus
 # Copyright 2013  Arnab Ghoshal, Pawel Swietojanski
diff --git a/egs/malach/s5/local/nnet3/prepare_lores_feats.sh b/egs/malach/s5/local/nnet3/prepare_lores_feats.sh
index 5601fcf7dd6..17822f0283d 100755
--- a/egs/malach/s5/local/nnet3/prepare_lores_feats.sh
+++ b/egs/malach/s5/local/nnet3/prepare_lores_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/malach/s5/local/nnet3/run_ivector_common.sh b/egs/malach/s5/local/nnet3/run_ivector_common.sh
index d841b8f50ef..e179f319815 100755
--- a/egs/malach/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/malach/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
index 7205a3adcd0..392b9e6c819 100755
--- a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
+++ b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -x
 
diff --git a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
index db01b3ecbf4..72fcd135a0a 100755
--- a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
+++ b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
index 278ee345d50..0a96b10b8a9 100755
--- a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
+++ b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/malach/s5/local/run_cleanup_segmentation.sh b/egs/malach/s5/local/run_cleanup_segmentation.sh
index c2c730f1a9e..778fe96d2e7 100755
--- a/egs/malach/s5/local/run_cleanup_segmentation.sh
+++ b/egs/malach/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019  IBM (Michael Picheny) Adapted from AMI recipe for MALACH Corpus
 # Copyright 2016  Vimal Manohar
diff --git a/egs/malach/s5/local/score.sh b/egs/malach/s5/local/score.sh
index 00cc0c0f1a6..e8859c5011c 100755
--- a/egs/malach/s5/local/score.sh
+++ b/egs/malach/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -x
 
diff --git a/egs/malach/s5/local/score_asclite.sh b/egs/malach/s5/local/score_asclite.sh
index a47e66581ad..2e5bb20f000 100755
--- a/egs/malach/s5/local/score_asclite.sh
+++ b/egs/malach/s5/local/score_asclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -x
 
diff --git a/egs/malach/s5/run.sh b/egs/malach/s5/run.sh
index b8961d4df3c..57be778bb3f 100755
--- a/egs/malach/s5/run.sh
+++ b/egs/malach/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 . ./path.sh
diff --git a/egs/mandarin_bn_bc/s5/README b/egs/mandarin_bn_bc/s5/README
new file mode 100644
index 00000000000..8c5b111acb5
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/README
@@ -0,0 +1,34 @@
+This recipe contains the following corpora from LDC:
+
+Audio:
+  Gale phase 2/3/4
+  LDC2013S08
+  LDC2013S04
+  LDC2014S09
+  LDC2015S06
+  LDC2015S13
+  LDC2016S03
+  LDC2017S25
+
+  TDT 2/3/4
+  LDC2001S93
+  LDC2001S95
+  LDC2005S11
+
+Text:
+  Gale phase 2/3/4
+  LDC2013T20
+  LDC2013T08
+  LDC2014T28
+  LDC2015T09
+  LDC2015T25
+  LDC2016T12
+  LDC2017T18
+
+  TDT 2/3/4
+  LDC2001T57
+  LDC2001T58
+  LDC2005T16
+  Besides, it uses Gigga word, simplified Mandarin for LM training and expanding dictionary:
+  Gigga word (xin:simplified, cna:traditional. Use only xin)
+  LDC2003T09
diff --git a/egs/mandarin_bn_bc/s5/RESULTS b/egs/mandarin_bn_bc/s5/RESULTS
new file mode 100644
index 00000000000..dcb541497e9
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/RESULTS
@@ -0,0 +1,15 @@
+# In the results below, "large_test" is the pruned 4-gram LM, which is used for
+# lattice generation.
+
+# Results with nnet3 tdnn+chain model
+# local/chain/run_tdnn.sh
+# (4 epoch training on speed-perturbed and volum-perturbed "cleaned" data and left-biphone model)
+# num_params=20.7 M
+%CER 8.83 [ 7901 / 89515, 929 ins, 1738 del, 5234 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_dev_large_test/cer_9_0.0
+%CER 9.03 [ 17749 / 196659, 3770 ins, 3988 del, 9991 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_eval_large_test/cer_10_0.0
+
+# Results with RNNLM rescoring of tdnn+chain model
+%CER 8.49 [ 7600 / 89515, 863 ins, 1805 del, 4932 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_dev_large_test_rnnlm_1a_nbest_rescore/cer_8_0.0
+%CER 8.47 [ 7585 / 89515, 783 ins, 2027 del, 4775 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_dev_large_test_rnnlm_1a_rescore/cer_9_0.0
+%CER 8.82 [ 17342 / 196659, 3891 ins, 3809 del, 9642 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_eval_large_test_rnnlm_1a_nbest_rescore/cer_8_0.0
+%CER 8.72 [ 17142 / 196659, 3876 ins, 3766 del, 9500 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_eval_large_test_rnnlm_1a_rescore/cer_9_0.0
diff --git a/egs/mandarin_bn_bc/s5/cmd.sh b/egs/mandarin_bn_bc/s5/cmd.sh
new file mode 100644
index 00000000000..b2f193f08ac
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/cmd.sh
@@ -0,0 +1,18 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G --config conf/queue.conf --allow-a09 false"
+export decode_cmd="queue.pl --mem 4G --config conf/queue.conf --allow-a09 false"
+export mkgraph_cmd="queue.pl --mem 8G --config conf/queue.conf --allow-a09 false"
+# the use of cuda_cmd is deprecated, but it's still used in this example
+# directory.
+export cuda_cmd="queue.pl --gpu 1 --config conf/queue.conf"
diff --git a/egs/mandarin_bn_bc/s5/conf/cmu2pinyin b/egs/mandarin_bn_bc/s5/conf/cmu2pinyin
new file mode 100644
index 00000000000..c02eb600fcc
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/cmu2pinyin
@@ -0,0 +1,39 @@
+AA A
+AE A
+AH A
+AO UO
+AW U
+AY AI
+B B
+CH CH 
+D D
+DH S I
+EH AI
+ER E
+EY AI
+F F
+G G
+HH H
+IH I
+IY I
+JH ZH 
+K K
+L L
+M M
+N N
+NG N
+OW UO
+OY UO
+P P
+R R
+S S
+SH SH
+T T
+TH S
+UH U
+UW U
+V W
+W W
+Y Y
+Z Z 
+ZH X  
diff --git a/egs/mandarin_bn_bc/s5/conf/decode.config b/egs/mandarin_bn_bc/s5/conf/decode.config
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/egs/mandarin_bn_bc/s5/conf/decode_dnn.config b/egs/mandarin_bn_bc/s5/conf/decode_dnn.config
new file mode 100644
index 00000000000..89dd9929a62
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/decode_dnn.config
@@ -0,0 +1,2 @@
+beam=18.0 # beam for decoding.  Was 13.0 in the scripts.
+lattice_beam=10.0 # this has most effect on size of the lattices.
diff --git a/egs/mandarin_bn_bc/s5/conf/fbank.conf b/egs/mandarin_bn_bc/s5/conf/fbank.conf
new file mode 100644
index 00000000000..62f6dc83b48
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/fbank.conf
@@ -0,0 +1,3 @@
+# No non-default options for now.
+--sample-frequency=16000
+--num-mel-bins=30
diff --git a/egs/mandarin_bn_bc/s5/conf/mfcc.conf b/egs/mandarin_bn_bc/s5/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/mandarin_bn_bc/s5/conf/mfcc_hires.conf b/egs/mandarin_bn_bc/s5/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..c8ad04dae66
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--sample-frequency=16000 #  Switchboard is sampled at 8kHz
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=40    # low cutoff frequency for mel bins
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
diff --git a/egs/mandarin_bn_bc/s5/conf/online_cmvn.conf b/egs/mandarin_bn_bc/s5/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/mandarin_bn_bc/s5/conf/online_pitch.conf b/egs/mandarin_bn_bc/s5/conf/online_pitch.conf
new file mode 100644
index 00000000000..e959a19d5b8
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/online_pitch.conf
@@ -0,0 +1 @@
+--sample-frequency=16000
diff --git a/egs/mandarin_bn_bc/s5/conf/pinyin2cmu b/egs/mandarin_bn_bc/s5/conf/pinyin2cmu
new file mode 100644
index 00000000000..a6e53620479
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/pinyin2cmu
@@ -0,0 +1,58 @@
+A AA
+AI AY
+AN AE N 
+ANG AE NG
+AO AW   
+B B 
+CH CH
+C T S
+D D
+E ER 
+EI EY
+EN AH N
+ENG AH NG
+ER AA R 
+F F
+G G
+H HH
+IA IY AA
+IANG IY AE NG
+IAN IY AE N
+IAO IY AW
+IE IY EH
+I IY
+ING IY NG
+IN IY N
+IONG IY UH NG
+IU IY UH 
+J J
+K K
+L L
+M M
+N N
+O AO
+ONG UH NG
+OU OW
+P P
+Q Q
+R R
+SH SH
+S S
+T T
+UAI UW AY
+UANG UW AE NG
+UAN UW AE N
+UA UW AA
+UI UW IY 
+UN UW AH N
+UO UW AO
+U UW
+UE IY EH 
+VE IY EH 
+V IY UW
+VN IY N 
+W W
+X X 
+Y Y
+ZH JH 
+Z Z
diff --git a/egs/mandarin_bn_bc/s5/conf/pitch.conf b/egs/mandarin_bn_bc/s5/conf/pitch.conf
new file mode 100644
index 00000000000..e959a19d5b8
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/pitch.conf
@@ -0,0 +1 @@
+--sample-frequency=16000
diff --git a/egs/mandarin_bn_bc/s5/conf/queue.conf b/egs/mandarin_bn_bc/s5/conf/queue.conf
new file mode 100644
index 00000000000..dfedb6424bf
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/queue.conf
@@ -0,0 +1,13 @@
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q
+option gpu=* -l gpu=$0 -q g.q
+default allow_a09=false
+option allow_a09=true
+option allow_a09=false -l 'hostname=!a09*&!a17*&!a13*&!a14*&!a10*&!a11*&!c16*'
diff --git a/egs/mandarin_bn_bc/s5/local/chain/run_chain_common.sh b/egs/mandarin_bn_bc/s5/local/chain/run_chain_common.sh
new file mode 100755
index 00000000000..85c8589dc78
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/chain/run_chain_common.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+
+# this script has common stages shared across librispeech chain recipes.
+# It generates a new topology in a new lang directory, gets the alignments as
+# lattices, and builds a tree for the new topology
+set -e
+
+stage=11
+
+# input directory names. These options are actually compulsory, and they have
+# been named for convenience
+gmm_dir=
+ali_dir=
+ali_nj=
+lores_train_data_dir=
+lang_original=
+num_leaves=6000
+
+# output directory names. They are also compulsory.
+lang=
+lang_original=
+lat_dir=
+tree_dir=
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+[ -z $lang ] && echo "Set --lang, this specifies the new lang directory which will have the new topology" && exit 1;
+[ -z $lat_dir ] && echo "Set --lat-dir, this specifies the experiment directory to store lattice" && exit 1;
+[ -z $tree_dir ] && echo "Set --tree-dir, this specifies the directory to store new tree " && exit 1;
+
+for f in $gmm_dir/final.mdl $ali_dir/ali.1.gz $lores_train_data_dir/feats.scp; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating lang directory with one state per phone."
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt $lang_original/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r $lang_original $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 12 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  nj=$(cat ${ali_dir}/num_jobs) || exit 1;
+  steps/align_fmllr_lats.sh --nj $ali_nj --cmd "$train_cmd" ${lores_train_data_dir} \
+    $lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 13 ]; then
+  # Build a tree using our new topology. We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+      --context-opts "--context-width=2 --central-position=1" \
+      --cmd "$train_cmd" $num_leaves ${lores_train_data_dir} $lang $ali_dir $tree_dir
+fi
+
+exit 0;
diff --git a/egs/mandarin_bn_bc/s5/local/chain/run_tdnn.sh b/egs/mandarin_bn_bc/s5/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..e1adaa9346d
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1d.sh
\ No newline at end of file
diff --git a/egs/mandarin_bn_bc/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/mandarin_bn_bc/s5/local/chain/tuning/run_tdnn_1d.sh
new file mode 100755
index 00000000000..3c4f53eee7b
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/chain/tuning/run_tdnn_1d.sh
@@ -0,0 +1,205 @@
+#!/usr/bin/env bash
+set -e
+
+
+# configs for 'chain'
+stage=-1
+decode_nj=60
+ali_nj=80
+train_set=train_gale_tdt_cleanup
+gmm=tri6b_cleanup
+nnet3_affix=_cleanup
+lang_affix="_large_test"
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1d
+tree_affix=
+train_stage=-8
+get_egs_stage=-10
+decode_iter=
+
+# TDNN options
+frames_per_eg=150,110,100
+remove_egs=true
+common_egs_dir=
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.5@0.50,0'
+
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_sp_ali
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang${lang_affix}_chain
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn${affix:+_$affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_nopitch
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 11" if you have already
+# run those things.
+
+if [ $stage -le 12 ]; then
+  echo "Training and extracting ivectors"
+  local/nnet3/run_ivector_common.sh --stage $stage \
+                                    --train-set $train_set \
+                                    --gmm exp/$gmm \
+                                    --num-threads-ubm 6 --num-processes 3 \
+                                    --nnet3-affix "$nnet3_affix" || exit 1;
+fi
+
+
+# if we are using the speed-perturbed data we need to generate
+# alignments for it.
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+# Please take this as a reference on how to specify all the options of
+
+if [ $stage -le 13 ]; then
+  local/chain/run_chain_common.sh --stage $stage \
+                                  --gmm-dir $gmm_dir \
+                                  --ali-dir $ali_dir \
+                                  --lores-train-data-dir ${lores_train_data_dir} \
+                                  --lang $lang \
+                                  --lang-original data/lang${lang_affix} \
+                                  --lat-dir $lat_dir \
+                                  --num-leaves 7000 \
+                                  --ali-nj $ali_nj \
+                                  --tree-dir $tree_dir || exit 1;
+fi
+
+if [ $stage -le 14 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+
+  ivector_dim=$(feat-to-dim scp:$train_ivector_dir/ivector_online.scp -)
+  feat_dim=$(feat-to-dim scp:$train_data_dir/feats.scp -)
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+  affine_opts="l2-regularize=0.008 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.008 dropout-proportion=0.0 bypass-scale=0.75"
+  linear_opts="l2-regularize=0.008 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.008"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=$ivector_dim name=ivector
+  input dim=$feat_dim name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+  linear-component name=prefinal-l dim=256 $linear_opts
+
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 15 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b{09,10,11,12}/$USER/kaldi-data/egs/mandarin-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$train_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.0 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0 --constrained false" \
+    --egs.chunk-width $frames_per_eg \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.num-chunk-per-minibatch 64 \
+    --trainer.frames-per-iter 2500000 \
+    --trainer.num-epochs 4 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.00015 \
+    --trainer.optimization.final-effective-lrate 0.000015 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+
+fi
+
+echo "Train chain tdnn succeeded !"
+graph_dir=$dir/graph${lang_affix}
+if [ $stage -le 16 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang${lang_affix} $dir $graph_dir
+  # remove <UNK> from the graph, and convert back to const-FST.
+  fstrmsymbols --apply-to-output=true --remove-arcs=true "echo 3|" $graph_dir/HCLG.fst - | \
+    fstconvert --fst_type=const > $graph_dir/temp.fst
+  mv $graph_dir/temp.fst $graph_dir/HCLG.fst
+fi
+
+echo "Decoding "
+iter_opts=
+for t in dev eval; do
+  ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${t}_hires_nopitch
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+	  --nj $decode_nj --cmd "$decode_cmd" $iter_opts \
+	  --online-ivector-dir "$ivector_dir" \
+	  $graph_dir data/${t}_hires $dir/decode_${t}_large_test || exit 1
+done
+exit 0;
diff --git a/egs/mandarin_bn_bc/s5/local/check_oov_rate.sh b/egs/mandarin_bn_bc/s5/local/check_oov_rate.sh
new file mode 100644
index 00000000000..6c655205e31
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/check_oov_rate.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (author: Jinyi Yang)
+# Apache 2.0
+
+# This script checks the Out Of Vocabulary words rate of given data set.
+
+if [ $# -ne 2 ]; then
+  echo "Usage: $0 <lexicon> <text-file>"
+  exit 1
+fi
+lex=$1
+fname=$2
+
+cat $fname | awk '{for(n=2;n<=NF;n++) { print $n; }}' | perl -e '
+  $lex = shift @ARGV; open(L, "<$lex")||die;
+  while(<L>){ @A=split; $seen{$A[0]}=1;}
+  while(<STDIN>) {
+    @A=split;
+    $word=$A[0];
+    $tot++;
+    if(defined $seen{$word}) {
+      $invoc++;
+    } else {print "OOV word $word\n";}
+  }
+  $oov_rate = 100.0 * (1.0 - ($invoc / $tot));
+  printf("Seen $invoc out of $tot tokens; OOV rate is %.2f\n", $oov_rate);
+  ' $lex
diff --git a/egs/mandarin_bn_bc/s5/local/create_oov_char_lexicon.pl b/egs/mandarin_bn_bc/s5/local/create_oov_char_lexicon.pl
new file mode 100755
index 00000000000..33e2e8061c3
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/create_oov_char_lexicon.pl
@@ -0,0 +1,48 @@
+#!/usr/bin/env perl
+# Copyright 2016 Alibaba Robotics Corp. (Author: Xingyu Na)
+#
+# A script for char-based Chinese OOV lexicon generation.
+#
+# Input 1: char-based dictionary, example
+# CHAR1 ph1 ph2
+# CHAR2 ph3
+# CHAR3 ph2 ph4
+#
+# Input 2: OOV word list, example
+# WORD1
+# WORD2
+# WORD3
+#
+# where WORD1 is in the format of "CHAR1CHAR2".
+#
+# Output: OOV lexicon, in the format of normal lexicon
+
+if($#ARGV != 1) {
+  print STDERR "usage: perl create_oov_char_lexicon.pl chardict oovwordlist > oovlex\n\n";
+  print STDERR "### chardict: a dict in which each line contains the pronunciation of one Chinese char\n";
+  print STDERR "### oovwordlist: OOV word list\n";
+  print STDERR "### oovlex: output OOV lexicon\n";
+  exit;
+}
+
+use utf8;
+my %prons;
+open(DICT, $ARGV[0]) || die("Can't open dict ".$ARGV[0]."\n");
+binmode(DICT,":encoding(utf8)");
+foreach (<DICT>) {
+  chomp; @A = split(" ", $_); $prons{$A[0]} = $A[1];
+}
+close DICT;
+
+open(WORDS, $ARGV[1]) || die("Can't open oov word list ".$ARGV[1]."\n");
+binmode(WORDS,":encoding(utf8)");
+while (<WORDS>) {
+  chomp;
+  print $_;
+  @A = split("", $_);
+  foreach (@A) {
+    print " $prons{$_}";
+  }
+  print "\n";
+}
+close WORDS;
diff --git a/egs/mandarin_bn_bc/s5/local/gale_bad_utts b/egs/mandarin_bn_bc/s5/local/gale_bad_utts
new file mode 100644
index 00000000000..2dd361f58a9
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_bad_utts
@@ -0,0 +1,100 @@
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20070308_040701
+CCTV2_ECONOMYANDLAW_CMN_20070426_202800
+CCTV2_ECONOMYANDLAW_CMN_20070426_202800(1)
+CCTV2_LIANGHUI_PROBLEM_20070308_213000
+CCTV4_TDYFOCUS_CMN_20070824_092801
+VOA_ISSUESANDOPINIONS_CMN_20070801_210500
+VOA_ISSUESANDOPINIONS_CMN_20070926_210500
+VOA_LISTENERSHOTLINE_CMN_20070906_223000
+VOA_LISTENERSHOTLINE_CMN_20070926_223000
+VOA_LISTENERSHOTLINE_CMN_20070927_223000
+PHOENIX_NEWSLINE_CMN_20070101_114800
+PHOENIX_NEWSLINE_CMN_20070101_114800(1)
+CCTV2_ECONOMYANDLAW_CMN_20070426_202800(1)
+CCTV2_LIANGHUI_PROBLEM_20070308_213000
+CCTV4_TDYFOCUS_CMN_20070824_092801
+CCTV4_TDYFOCUS_CMN_20071004_092800
+CCTV4_TDYFOCUS_CMN_20071008_092801
+CCTV4_TDYFOCUS_CMN_20071012_092801
+CCTVNEWS_LIANGHUIZHICHUANG_PROBLEM_20070309_085702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070403_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070404_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070405_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070409_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070703_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070704_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070705_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070706_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070712_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070713_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070716_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070717_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070718_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070719_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070810_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070813_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070814_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070815_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070816_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070817_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070820_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070821_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070822_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070823_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070824_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070827_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070828_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070829_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070830_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070905_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070906_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070907_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070910_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070911_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070912_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070913_215701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070401_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070408_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070708_140702
+CCTVNEWS_TELLITLIKEITIS_CMN_20070715_140702
+CCTVNEWS_TELLITLIKEITIS_CMN_20070812_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070819_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070826_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070902_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070909_140701
+HUBEI_COMMUNICATE_CMN_20070401_230202
+HUBEI_COMMUNICATE_CMN_20070408_225927
+HUBEI_COMMUNICATE_CMN_20070701_222922
+HUBEI_COMMUNICATE_CMN_20070708_222931
+HUBEI_COMMUNICATE_CMN_20070715_222707
+HUBEI_COMMUNICATE_CMN_20070826_223006
+HUBEI_COMMUNICATE_CMN_20070902_223950
+HUBEI_COMMUNICATE_CMN_20070909_222959
+PHOENIX_ASIANJRNL_CMN_20070102_075800(1)
+PHOENIX_ASIANJRNL_CMN_20070103_075800(1)
+PHOENIX_ASIANJRNL_CMN_20070104_075800(1)
+PHOENIX_ASIANJRNL_CMN_20070108_075800(1)
+PHOENIX_ASIANJRNL_CMN_20070109_075800(1)
+PHOENIX_ASIANJRNL_CMN_20080217_085801
+PHOENIX_ASIANJRNL_CMN_20080224_085801
+PHOENIX_ASIANJRNL_CMN_20080311_085801
+PHOENIX_BEHINDHL_CMN_20080227_082800
+PHOENIX_BEHINDHL_CMN_20080306_082801
+PHOENIX_BEHINDHL_CMN_20080308_082801
+PHOENIX_DATELUYU_CMN_20080201_142801
+PHOENIX_NEWSHACK_CMN_20070407_212300
+PHOENIX_NEWSLINE_CMN_20070101_114800(1)
+PHOENIX_NEWSLINE_CMN_20080306_114801
+PHOENIX_SOCWATCH_CMN_20070802_225801
+PHOENIX_SOCWATCH_CMN_20070816_225801
+PHOENIX_SOCWATCH_CMN_20070823_225801
+PHOENIX_SOCWATCH_CMN_20070906_225800
+PHOENIX_SOCWATCH_CMN_20070913_225801
+PHOENIX_SOCWATCH_CMN_20080117_225800
+PHOENIX_SOCWATCH_CMN_20080131_225800
+PHOENIX_SOCWATCH_CMN_20080214_225801
+VOA_ISSUESANDOPINIONS_CMN_20070801_210500
+VOA_ISSUESANDOPINIONS_CMN_20070926_210500
+VOA_LISTENERSHOTLINE_CMN_20070906_223000
+VOA_LISTENERSHOTLINE_CMN_20070926_223000
+VOA_LISTENERSHOTLINE_CMN_20070927_223000
diff --git a/egs/mandarin_bn_bc/s5/local/gale_data_prep_audio.sh b/egs/mandarin_bn_bc/s5/local/gale_data_prep_audio.sh
new file mode 100755
index 00000000000..0ea6cfcf9f9
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_data_prep_audio.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
+# Apache 2.0
+
+
+echo $0 "$@"
+
+galeData=$(utils/make_absolute.sh "${@: -1}" );
+wavedir=$galeData/wav
+mkdir -p $wavedir
+
+
+length=$(($#-1))
+args=${@:1:$length}
+
+# check that sox is installed
+which sox  &>/dev/null
+if [[ $? != 0 ]]; then
+ echo "$0: sox is not installed"
+ exit 1
+fi
+
+set -e -o pipefail
+
+for var in $args; do
+  CD=$(basename $var)
+  [ -d $wavedir/$CD ] && rm -rf $wavedir/$CD
+  mkdir -p $wavedir/$CD
+  find $var -type f -name *.wav | while read file; do
+    f=$(basename $file)
+    if [[ ! -L "$wavedir/$CD/$f" ]]; then
+      ln -sf $file $wavedir/$CD/$f
+    fi
+  done
+
+  #make an flac symmlink as well
+  find $var -type f -name *.flac  | while read file; do
+    f=$(basename $file)
+
+    if [[ ! -L "$wavedir/$CD/$f" ]]; then
+      ln -sf $file $wavedir/$CD/$f
+    fi
+  done
+done
+
+#figure out the proper sox command line
+#the flac will be converted on the fly
+(
+  for w in `find $wavedir -name *.wav` ; do
+    base=`basename $w .wav`
+    fullpath=`utils/make_absolute.sh $w`
+    echo "$base sox $fullpath -r 16000 -t wav - |"
+  done
+
+  for w in `find $wavedir -name *.flac` ; do
+    base=`basename $w .flac`
+    fullpath=`utils/make_absolute.sh $w`
+    echo "$base sox $fullpath -r 16000 -t wav - |"
+  done
+)  | sort -u > $galeData/wav.scp
+
+#clean
+rm -fr $galeData/id$$ $galeData/wav$$
+echo "$0: data prep audio succeded"
+
+exit 0
+
diff --git a/egs/mandarin_bn_bc/s5/local/gale_data_prep_split.sh b/egs/mandarin_bn_bc/s5/local/gale_data_prep_split.sh
new file mode 100755
index 00000000000..b580bb5b76e
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_data_prep_split.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 (author: Ahmed Ali, Hainan Xu)
+# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
+# Copyright 2019 Johns Hopkins Univeersity (author: Jinyi Yang)
+# Apache 2.0
+
+if [ $# -ne 2 ]; then
+   echo "Arguments should be the <gale folder> <local gale folder>"; exit 1
+fi
+
+set -e -o pipefail
+#data will data/local
+mkdir -p $2
+galeData=$(utils/make_absolute.sh $1)
+dir=$(utils/make_absolute.sh $2)
+
+
+# some problem with the text data; same utt id but different transcription
+cat $galeData/all | awk '{print$2}' | \
+  sort | uniq -c | awk '{if($1!="1")print$2}' > $galeData/dup.list
+
+# same time duration but different transcription (multiple speaker speaks at same time)
+cat $galeData/all | awk '{print $1" "$3" "$4}' | \
+	sort | uniq -c | awk '{if($1!="1")print $2" "$3" "$4}' > $galeData/dup.segment
+awk 'NR==FNR{a[$1$2$3];next} $1$3$4 in a {print $2}' $galeData/dup.segment $galeData/all >> $galeData/dup.list
+
+utils/filter_scp.pl --exclude -f 2 \
+  $galeData/dup.list $galeData/all > $galeData/all.nodup
+
+mv $galeData/all $galeData/all.orig
+mv $galeData/all.nodup $galeData/all
+
+diff <(awk '{print $1}' $galeData/all | sort | uniq) \
+	<(awk '{print $1}' $galeData/wav.scp | sort | uniq) |\
+	 grep '>\|<' | cut -d " " -f2- > $galeData/bad_utts
+grep    -f <(cat local/gale_dev/test.LDC*) $galeData/all | grep -v -F -f $galeData/bad_utts  > $galeData/all.dev
+
+grep    -f <(cat local/gale_eval/test.LDC*) $galeData/all | grep -v -F -f $galeData/bad_utts  > $galeData/all.eval
+
+# Only parts of the eval transcriptions will be used. We select them from the given segmentation information
+mv $galeData/all.eval $galeData/all.eval.tmp
+cat local/gale_eval/test.*.segment > $galeData/eval.segments.dur
+awk 'NR==FNR{a[$1$2$3];next} $1$3$4 in a {print $0}' $galeData/eval.segments.dur $galeData/all.eval.tmp \
+	> $galeData/all.eval
+rm $galeData/all.eval.tmp
+
+grep -v -f <(cat local/gale_dev/test.LDC*) $galeData/all |\
+  grep -v -f <(cat local/gale_eval/test.LDC*) |\
+  grep -v -F -f $galeData/bad_utts  > $galeData/all.train
+
+cat $galeData/all.dev | awk '{print$2}' > $galeData/dev_utt_list
+cat $galeData/all.eval | awk '{print$2}' > $galeData/eval_utt_list
+cat $galeData/all.train | awk '{print$2}' > $galeData/train_utt_list
+
+mkdir -p $dir/dev
+mkdir -p $dir/eval
+mkdir -p $dir/train
+utils/filter_scp.pl -f 1 $galeData/dev_utt_list $galeData/utt2spk > $dir/dev/utt2spk
+utils/utt2spk_to_spk2utt.pl $dir/dev/utt2spk | sort -u > $dir/dev/spk2utt
+
+utils/filter_scp.pl -f 1 $galeData/eval_utt_list $galeData/utt2spk > $dir/eval/utt2spk
+utils/utt2spk_to_spk2utt.pl $dir/eval/utt2spk | sort -u > $dir/eval/spk2utt
+
+utils/filter_scp.pl -f 1 $galeData/train_utt_list $galeData/utt2spk > $dir/train/utt2spk
+utils/utt2spk_to_spk2utt.pl $dir/train/utt2spk | sort -u > $dir/train/spk2utt
+
+for x in dev eval train; do
+ outdir=$dir/$x
+ file=$galeData/all.$x
+ mkdir -p $outdir
+ awk '{print $2 " " $1 " " $3 " " $4}' $file  | sort -u > $outdir/segments
+ awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $file | sort -u > $outdir/text
+done
+
+cat $dir/dev/segments | awk '{print$2}' | sort -u > $galeData/dev.wav.list
+cat $dir/eval/segments | awk '{print$2}' | sort -u > $galeData/eval.wav.list
+cat $dir/train/segments | awk '{print$2}' | sort -u > $galeData/train.wav.list
+
+utils/filter_scp.pl -f 1 $galeData/dev.wav.list $galeData/wav.scp > $dir/dev/wav.scp
+utils/filter_scp.pl -f 1 $galeData/eval.wav.list $galeData/wav.scp > $dir/eval/wav.scp
+utils/filter_scp.pl -f 1 $galeData/train.wav.list $galeData/wav.scp > $dir/train/wav.scp
+
+cat $galeData/wav.scp | awk -v seg=$dir/train/segments 'BEGIN{while((getline<seg) >0) {seen[$2]=1;}}
+ {if (seen[$1]) { print $0}}' > $dir/train/wav.scp
+
+
+echo Gale data prep split succeeded
diff --git a/egs/mandarin_bn_bc/s5/local/gale_data_prep_txt.sh b/egs/mandarin_bn_bc/s5/local/gale_data_prep_txt.sh
new file mode 100755
index 00000000000..53d391f88a6
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_data_prep_txt.sh
@@ -0,0 +1,126 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 (author: Ahmed Ali, Hainan Xu)
+# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
+# Apache 2.0
+
+echo $0 "$@"
+export LC_ALL=C
+
+galeData=$(utils/make_absolute.sh "${@: -1}" );
+
+length=$(($#-1))
+args=${@:1:$length}
+
+top_pwd=`pwd`
+txtdir=$galeData/txt
+mkdir -p $txtdir
+
+cd $txtdir
+
+for cdx in ${args[@]}; do
+  echo "Preparing $cdx"
+  if [[ $cdx  == *.tgz ]] ; then
+     tar -xvf $cdx
+  elif [  -d "$cdx" ]; then
+    tgt=$(basename $cdx)
+    test -x $tgt || ln -s $cdx `basename $tgt`
+  else
+    echo "I don't really know what I shall do with $cdx " >&2
+  fi
+done
+
+find -L . -type f -name *.tdf | while read file; do
+sed '1,3d' $file
+done > all.tmp
+
+perl -e '
+    ($inFile,$idFile,$txtFile,$spk,$mapf)= split /\s+/, $ARGV[0];
+    open(IN, "$inFile");
+    open(ID, ">$idFile");
+    open(TXT, ">$txtFile");
+    open(SPK, ">$spk");
+    open(MAP, ">$mapf");
+    while (<IN>) {
+      @arr= split /\t/,$_;
+      $arr[4] =~ s/ //g;
+      $arr[4] = sprintf("%020s", $arr[4]);
+      $spkid = "$arr[0]_$arr[4]";
+      $spkfix = sprintf("%080s", $spkid);
+
+      $start=sprintf ("%0.3f",$arr[2]);
+      $rStart=$start;
+      $start=~s/\.//;
+      $start=~s/^0+$/0/;
+      $start=~s/^0+([^0])/$1/; # remove zeros at the beginning
+      $start = sprintf("%09s", $start);
+
+      $end=sprintf ("%0.3f",$arr[3]);
+      $rEnd=$end;
+      $end=~s/^0+([^0])/$1/;
+      $end=~s/\.//;
+      $end = sprintf("%09s", $end);
+
+      $id="$arr[11] $arr[0] ${spkfix}_$arr[0]_${start}_${end} $rStart $rEnd\n";
+      next if ($rStart == $rEnd);
+      $id =~ s/.sph//g;
+      print ID $id;
+      print TXT "$arr[7]\n";
+      print SPK "${spkfix}_$arr[0]_${start}_${end} ${spkfix}\n";
+      print MAP "$arr[0] ${spkfix}_$arr[0]\n";
+ }' "all.tmp allid.tmp contentall.tmp utt2spk.tmp map.tmp"
+
+perl -p -i -e 's=/.$==g' contentall.tmp
+
+cd $top_pwd
+
+
+pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'`
+export PYTHONPATH=$PYTHONPATH:`pwd`/tools/mmseg-1.3.0/lib/python${pyver}/site-packages
+if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+  echo "--- Downloading mmseg-1.3.0 ..."
+  echo "NOTE: it assumes that you have Python, Setuptools installed on your system!"
+  wget -P tools http://pypi.python.org/packages/source/m/mmseg/mmseg-1.3.0.tar.gz
+  tar xf tools/mmseg-1.3.0.tar.gz -C tools
+  cd tools/mmseg-1.3.0
+  mkdir -p lib/python${pyver}/site-packages
+  CC=gcc CXX=g++ python setup.py build
+  python setup.py install --prefix=.
+  cd ../..
+  if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+    echo "mmseg is not found - installation failed?"
+    exit 1
+  fi
+fi
+
+cat $txtdir/contentall.tmp |\
+  sed -e 's/,//g' |\
+  sed -e 's/<foreign language=\"[a-zA-Z]\+\">/ /g' |\
+  sed -e 's/<foreign lang=\"[a-zA-Z]\+\">/ /g' |\
+  sed -e 's/<\/foreign>/ /g' |\
+  perl -pe 's/<Event.*?>/ /g' |\
+  sed -e 's/\[NS\]//g' |\
+  sed -e 's/\[ns\]//g' |\
+  sed -e 's/<noise>\(.\+\)<\/noise>/\1/g' |\
+  sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\
+  perl local/mandarin_text_normalize.pl | \
+  python local/mandarin_segment.py > $txtdir/text || exit 1;
+
+paste $txtdir/allid.tmp $txtdir/text | sed 's: $::' | awk '{if (NF>5) {print
+$0}}'  > $txtdir/all_1.tmp
+
+awk '{print $3}' $txtdir/all_1.tmp > $txtdir/uttid
+cut -d " " -f6- $txtdir/all_1.tmp > $txtdir/text
+
+awk '{$1="";print $0}' $txtdir/all_1.tmp | sed 's:^ ::' > $txtdir/../all
+
+cat $txtdir/utt2spk.tmp | awk 'NR==FNR{a[$1];next} $1 in a{print $0}' $txtdir/uttid - |\
+  sort -u > $txtdir/../utt2spk
+cat $txtdir/map.tmp | awk 'NR==FNR{a[$1];next} $2 in a{print $0}' $txtdir/uttid -|\
+  sort -u > $txtdir/../map
+
+sort -c $txtdir/../utt2spk
+
+utils/utt2spk_to_spk2utt.pl $txtdir/../utt2spk | sort -u > $txtdir/../spk2utt
+
+echo "Gale data prep text succeeded !"
diff --git a/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S04 b/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S04
new file mode 100644
index 00000000000..92f69180735
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S04
@@ -0,0 +1,7 @@
+CCTV4_ACROSSSTRAIT_CMN_20070108_073033
+PHOENIX_NEWSLINE_CMN_20070101_114800
+CCTV4_TDYFOCUS_CMN_20070111_082801
+CCTV2_ECONOMYANDLAW_CMN_20070126_203005
+PHOENIX_BEHINDHL_CMN_20061004_052800
+PHOENIX_NEWSHACK_CMN_20060923_212301
+PHOENIX_NEWSLINE_CMN_20070102_114800
diff --git a/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S08 b/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S08
new file mode 100644
index 00000000000..75868edcf85
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S08
@@ -0,0 +1,7 @@
+CCTV4_DAILYNEWS_CMN_20061023_135801
+CCTV4_DAILYNEWS_CMN_20060923_135800
+PHOENIX_PHNXWRLD_CMN_20070101_111800
+CCTV4_NEWS3_CMN_20060921_085800
+CCTV7_MILITARYNEWS1_CMN_20070102_193006
+PHOENIX_PHNXWRLD_CMN_20061024_112500
+CCTV7_MILITARYNEWS1_CMN_20070113_193011
diff --git a/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03 b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03
new file mode 100644
index 00000000000..f7506dcb290
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03
@@ -0,0 +1,109 @@
+BEIJING_TWOWAYLANES_CMN_20080322_130507
+BEIJING_TWOWAYLANES_CMN_20080412_130002
+CCTV1_LEGALREPORT_CMN_20080311_123601
+CCTV1_LEGALREPORT_CMN_20080312_123601
+CCTV1_LEGALREPORT_CMN_20080319_123601
+CCTV1_LEGALREPORT_CMN_20080320_123601
+CCTV1_LEGALREPORT_CMN_20080321_123602
+CCTV1_LEGALREPORT_CMN_20080323_123601
+CCTV1_LEGALREPORT_CMN_20080324_123601
+CCTV1_LEGALREPORT_CMN_20080325_123601
+CCTV1_LEGALREPORT_CMN_20080326_123601
+CCTV1_LEGALREPORT_CMN_20080327_123801
+CCTV1_LEGALREPORT_CMN_20080328_123802
+CCTV1_LEGALREPORT_CMN_20080329_123802
+CCTV1_LEGALREPORT_CMN_20080330_123801
+CCTV1_LEGALREPORT_CMN_20080407_123801
+CCTV1_LEGALREPORT_CMN_20080408_123801
+CCTV1_LEGALREPORT_CMN_20080410_123801
+CCTV1_LEGALREPORT_CMN_20080422_123801
+CCTV2_ACROSSSTRAIT_CMN_20080312_073000
+CCTV2_BUSINESSHOUR_CMN_20080326_220802
+CCTV2_DIALOG_CMN_20080309_222803
+CCTV2_DIALOG_CMN_20080316_214834
+CCTV2_DIALOG_CMN_20080323_220801
+CCTV2_DIALOG_CMN_20080330_220803
+CCTV2_DIALOG_CMN_20080413_220801
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802
+CCTV2_ECONOMYANDLAW_CMN_20080322_202802
+CCTV2_ECONOMYANDLAW_CMN_20080324_202802
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802
+CCTV2_ECONOMYANDLAW_CMN_20080326_203035
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821
+CCTV2_ECONOMYANDLAW_CMN_20080328_202802
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815
+CCTV2_ECONOMYANDLAW_CMN_20080401_202802
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815
+CCTV2_ECONOMYANDLAW_CMN_20080422_202802
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000
+CCTV4_ACROSSSTRAIT_CMN_20080322_073002
+CCTV4_ACROSSSTRAIT_CMN_20080323_073002
+CCTV4_ACROSSSTRAIT_CMN_20080324_073002
+CCTV4_ACROSSSTRAIT_CMN_20080325_073002
+CCTV4_ACROSSSTRAIT_CMN_20080326_073002
+CCTV4_ACROSSSTRAIT_CMN_20080327_073002
+CCTV4_ACROSSSTRAIT_CMN_20080328_073002
+CCTV4_ACROSSSTRAIT_CMN_20080329_073002
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002
+CCTV4_ACROSSSTRAIT_CMN_20080331_073002
+CCTV4_ACROSSSTRAIT_CMN_20080401_073002
+CCTV4_ACROSSSTRAIT_CMN_20080402_073002
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002
+CCTV4_ACROSSSTRAIT_CMN_20080425_073002
+CCTV4_ACROSSSTRAIT_CMN_20080426_073002
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201
+CCTVNEWS_PEOPLEINNEWS_CMN_20080324_202401
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701
+CCTVNEWS_PEOPLEINNEWS_CMN_20080422_202701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701
+HUBEI_COMMUNICATE_CMN_20080330_230009
+VOA_FOCUSDIALOGUE_CMN_20080405_210500
+VOA_FOCUSDIALOGUE_CMN_20080406_160500
+VOA_FOCUSDIALOGUE_CMN_20080414_160500
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500
+VOA_LISTENERSHOTLINE_CMN_20080403_223000
+VOA_LISTENERSHOTLINE_CMN_20080404_223000
+VOA_LISTENERSHOTLINE_CMN_20080412_223000
+VOA_LISTENERSHOTLINE_CMN_20080418_223000
+VOA_LISTENERSHOTLINE_CMN_20080423_223000
+VOA_STRAITSTALK_CMN_20080407_210500
+VOA_STRAITSTALK_CMN_20080414_210500
diff --git a/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03.segment b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03.segment
new file mode 100644
index 00000000000..3ef21919d23
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03.segment
@@ -0,0 +1,1504 @@
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 175.122 186.816
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 268.681 274.885
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 268.681 274.885
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 316.040 323.775
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 333.528 336.982
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 340.838 346.705
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 387.065 393.495
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 424.507 427.475
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 509.798 522.713
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 834.360 842.314
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 2419.176 2425.717
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 2790.157 2802.478
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 2887.429 2895.022
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 2895.022 2896.194
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 2923.216 2926.875
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 3017.943 3020.333
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 3095.726 3098.319
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 501.872 516.874
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 550.503 582.685
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 709.541 718.738
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 823.793 833.518
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 976.397 1003.134
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1048.942 1063.660
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1082.157 1092.840
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1092.840 1113.463
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1156.777 1168.181
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1221.170 1239.972
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1399.487 1439.237
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1657.955 1677.021
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000 356.875 366.830
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000 436.514 447.247
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000 741.081 756.331
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000 1015.560 1024.623
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000 1024.623 1027.060
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815 184.594 188.331
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815 243.090 252.093
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815 452.760 464.257
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815 521.680 534.700
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815 870.502 881.245
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 436.477 450.154
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 521.177 529.219
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 592.976 609.487
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 609.487 618.415
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 624.097 632.426
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 1294.040 1298.243
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 1446.220 1450.297
+CCTV1_LEGALREPORT_CMN_20080329_123802 209.918 221.517
+CCTV1_LEGALREPORT_CMN_20080329_123802 331.055 344.508
+CCTV1_LEGALREPORT_CMN_20080329_123802 363.914 371.628
+CCTV1_LEGALREPORT_CMN_20080329_123802 385.517 398.432
+CCTV1_LEGALREPORT_CMN_20080329_123802 605.849 611.269
+CCTV1_LEGALREPORT_CMN_20080329_123802 667.416 692.940
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 322.505 335.743
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 371.421 386.885
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 386.885 396.340
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 460.052 478.842
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 500.649 504.018
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 726.393 737.829
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 797.180 806.942
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 806.942 819.195
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 835.576 849.122
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 853.810 862.075
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 920.349 932.207
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 932.207 949.097
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 932.207 949.097
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1224.115 1232.158
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1318.505 1336.569
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1336.569 1349.707
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1349.707 1358.084
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1414.359 1418.694
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1443.803 1452.569
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1452.569 1470.357
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1507.575 1518.723
+CCTV2_ECONOMYANDLAW_CMN_20080328_202802 230.150 244.815
+CCTV2_ECONOMYANDLAW_CMN_20080328_202802 968.564 976.612
+CCTV2_ECONOMYANDLAW_CMN_20080328_202802 1027.831 1040.143
+CCTV2_ECONOMYANDLAW_CMN_20080328_202802 1373.596 1385.986
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 1213.176 1244.700
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 1350.627 1370.834
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 1479.523 1491.229
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 1818.545 1837.272
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2073.163 2088.414
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2225.131 2256.343
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2431.393 2452.251
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2569.068 2590.218
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2617.970 2633.241
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2691.662 2716.147
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2810.594 2828.706
+CCTV1_LEGALREPORT_CMN_20080326_123601 370.283 378.662
+CCTV1_LEGALREPORT_CMN_20080326_123601 378.662 385.080
+CCTV1_LEGALREPORT_CMN_20080326_123601 525.547 536.532
+CCTV1_LEGALREPORT_CMN_20080326_123601 683.887 691.825
+CCTV1_LEGALREPORT_CMN_20080326_123601 703.684 712.489
+CCTV1_LEGALREPORT_CMN_20080326_123601 712.489 714.363
+CCTV1_LEGALREPORT_CMN_20080326_123601 714.363 719.825
+CCTV1_LEGALREPORT_CMN_20080326_123601 1094.555 1102.118
+CCTV1_LEGALREPORT_CMN_20080325_123601 348.364 354.020
+CCTV1_LEGALREPORT_CMN_20080325_123601 381.976 386.163
+CCTV1_LEGALREPORT_CMN_20080325_123601 386.163 403.078
+CCTV1_LEGALREPORT_CMN_20080325_123601 519.053 529.472
+CCTV1_LEGALREPORT_CMN_20080325_123601 706.516 711.306
+CCTV1_LEGALREPORT_CMN_20080325_123601 730.473 743.987
+CCTV1_LEGALREPORT_CMN_20080325_123601 813.758 824.057
+CCTV1_LEGALREPORT_CMN_20080325_123601 896.534 907.227
+CCTV1_LEGALREPORT_CMN_20080325_123601 961.640 974.947
+CCTV1_LEGALREPORT_CMN_20080325_123601 1224.469 1237.017
+CCTV1_LEGALREPORT_CMN_20080330_123801 177.772 186.515
+CCTV1_LEGALREPORT_CMN_20080330_123801 388.078 394.066
+CCTV1_LEGALREPORT_CMN_20080330_123801 676.982 691.620
+CCTV1_LEGALREPORT_CMN_20080330_123801 676.982 691.620
+CCTV1_LEGALREPORT_CMN_20080330_123801 691.620 701.021
+CCTV1_LEGALREPORT_CMN_20080330_123801 709.371 729.012
+CCTV1_LEGALREPORT_CMN_20080330_123801 786.881 807.597
+CCTV1_LEGALREPORT_CMN_20080330_123801 858.310 871.889
+CCTV1_LEGALREPORT_CMN_20080330_123801 958.857 965.929
+CCTV1_LEGALREPORT_CMN_20080330_123801 965.929 974.725
+BEIJING_TWOWAYLANES_CMN_20080322_130507 189.791 200.072
+BEIJING_TWOWAYLANES_CMN_20080322_130507 515.508 517.368
+BEIJING_TWOWAYLANES_CMN_20080322_130507 848.607 853.279
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1284.486 1295.045
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1502.737 1506.305
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1545.449 1553.996
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1909.202 1913.634
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1932.385 1933.977
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1950.594 1955.406
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1977.556 1988.722
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1993.191 2004.354
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2004.354 2008.436
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2028.425 2045.672
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2045.672 2050.609
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2061.847 2069.863
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2069.863 2076.926
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2076.926 2084.004
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2084.004 2091.457
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2118.061 2127.717
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2310.792 2315.495
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2337.853 2340.386
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2418.575 2424.775
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2433.823 2451.474
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2509.194 2515.251
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2596.556 2600.540
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2818.236 2827.875
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2830.328 2833.359
+CCTV4_ACROSSSTRAIT_CMN_20080325_073002 1303.243 1313.932
+CCTV4_ACROSSSTRAIT_CMN_20080325_073002 1313.932 1327.431
+CCTV4_ACROSSSTRAIT_CMN_20080325_073002 1370.507 1384.105
+CCTV4_ACROSSSTRAIT_CMN_20080327_073002 476.312 483.187
+CCTV4_ACROSSSTRAIT_CMN_20080327_073002 1151.028 1157.778
+CCTV4_ACROSSSTRAIT_CMN_20080327_073002 1563.053 1575.412
+CCTV4_ACROSSSTRAIT_CMN_20080327_073002 1613.240 1615.584
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 390.532 406.190
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 1452.594 1465.941
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 1736.290 1745.379
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 1745.379 1753.239
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 1914.223 1939.397
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2022.052 2043.373
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2043.373 2059.456
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2290.993 2312.234
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2326.150 2342.674
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2393.123 2409.854
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2409.854 2433.531
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2433.531 2446.850
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2446.850 2464.390
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2464.390 2483.911
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2464.390 2483.911
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2483.911 2492.621
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2492.621 2521.981
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2599.502 2617.167
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2736.103 2763.048
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2875.835 2905.697
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2905.697 2922.428
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2922.428 2941.697
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1268.022 1292.130
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1521.012 1531.695
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1734.963 1743.161
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1791.739 1808.611
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1926.230 1950.898
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1977.120 1986.478
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1986.478 2005.264
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 2600.275 2619.773
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 2663.954 2685.162
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 2685.162 2697.102
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 2819.708 2836.259
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 2849.786 2859.241
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 3287.512 3301.847
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002 451.597 464.083
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002 884.562 896.566
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002 954.839 965.200
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002 1118.568 1133.694
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002 1562.402 1578.469
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 242.313 251.167
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 374.326 383.123
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 849.574 865.909
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 985.771 1002.684
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 1510.644 1520.045
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 2023.706 2044.991
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 2438.502 2447.484
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 2532.259 2562.747
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 2858.064 2868.390
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 3622.217 3658.111
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 3905.538 3913.397
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 4583.963 4593.681
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 4716.259 4730.538
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 4773.794 4792.400
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 4792.400 4802.263
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 4848.270 4866.075
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5012.069 5021.111
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5402.471 5420.594
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5608.358 5622.041
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5622.041 5634.466
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5684.122 5690.419
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5845.904 5854.806
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5923.051 5938.561
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6253.688 6269.497
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6540.433 6562.602
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6633.971 6647.208
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6735.663 6749.122
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6828.984 6850.257
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6850.257 6864.801
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6864.801 6881.641
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6935.763 6958.069
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6958.069 6979.206
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7121.465 7136.116
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7316.780 7336.376
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7336.376 7351.460
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7362.747 7375.791
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7454.403 7470.365
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7470.365 7482.534
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7482.534 7492.815
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7545.048 7552.965
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7817.409 7828.917
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7817.409 7828.917
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 813.841 823.998
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1110.610 1130.382
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1342.236 1347.360
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1506.751 1512.111
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1600.563 1605.783
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1631.606 1633.496
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1636.120 1638.370
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1722.302 1724.240
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1762.947 1772.695
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 2180.772 2181.803
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 3018.694 3024.146
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 3049.587 3056.446
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 3342.701 3347.139
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 112.562 118.937
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 396.743 407.196
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 455.434 456.887
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 690.132 694.366
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 716.672 745.827
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 1791.892 1802.500
+CCTV4_ACROSSSTRAIT_CMN_20080331_073002 261.849 269.722
+CCTV4_ACROSSSTRAIT_CMN_20080331_073002 269.722 275.255
+CCTV4_ACROSSSTRAIT_CMN_20080331_073002 388.273 403.194
+CCTV4_ACROSSSTRAIT_CMN_20080331_073002 1328.176 1333.800
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 386.764 390.763
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 1219.366 1224.765
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 1700.658 1706.846
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 1706.846 1710.734
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 1995.640 2006.396
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2013.664 2021.148
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2048.784 2051.480
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2055.567 2064.911
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2127.521 2130.131
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2155.458 2157.430
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2167.002 2169.581
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2177.838 2180.921
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2244.506 2248.865
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2258.896 2261.897
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2320.942 2324.067
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2524.349 2533.138
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2570.895 2580.961
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2670.623 2676.713
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2715.575 2723.482
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2723.482 2735.437
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2744.936 2763.378
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2770.861 2790.642
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2790.642 2801.001
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2958.823 2973.141
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 3145.175 3149.330
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 4320.758 4328.274
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 4331.322 4338.416
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 4425.932 4434.973
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 4434.973 4443.240
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 4555.869 4564.317
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 5923.558 5930.710
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 5935.203 5944.050
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 5978.989 5990.241
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 6005.959 6016.216
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 6386.011 6394.027
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 6906.547 6930.592
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 7682.765 7695.852
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 7746.172 7765.267
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 8117.121 8135.107
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 8139.571 8143.124
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 8870.212 8913.647
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 9171.797 9181.172
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 9587.497 9588.599
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 9947.700 9966.294
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 10137.827 10144.843
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 10224.900 10234.801
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 10506.088 10524.616
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11102.139 11117.127
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11166.248 11185.279
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11478.559 11484.372
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11674.677 11677.802
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11791.333 11811.611
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11913.901 11927.976
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 12043.723 12053.566
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 12053.566 12072.666
+CCTVNEWS_PEOPLEINNEWS_CMN_20080422_202701 1545.087 1576.993
+CCTV4_ACROSSSTRAIT_CMN_20080402_073002 195.748 214.724
+CCTV4_ACROSSSTRAIT_CMN_20080402_073002 1316.568 1322.112
+CCTV4_ACROSSSTRAIT_CMN_20080402_073002 1434.190 1437.221
+CCTV4_ACROSSSTRAIT_CMN_20080326_073002 289.797 295.688
+CCTV4_ACROSSSTRAIT_CMN_20080326_073002 333.937 340.312
+CCTV4_ACROSSSTRAIT_CMN_20080326_073002 729.948 739.229
+CCTV4_ACROSSSTRAIT_CMN_20080326_073002 818.680 828.212
+CCTV2_ECONOMYANDLAW_CMN_20080326_203035 678.014 696.608
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000 193.294 214.275
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000 1171.570 1192.304
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000 1217.656 1225.168
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000 1284.391 1300.479
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000 1421.296 1434.660
+VOA_LISTENERSHOTLINE_CMN_20080412_223000 1373.501 1382.689
+VOA_LISTENERSHOTLINE_CMN_20080412_223000 1422.296 1436.219
+VOA_LISTENERSHOTLINE_CMN_20080412_223000 1700.244 1707.820
+CCTV1_LEGALREPORT_CMN_20080320_123601 431.623 444.016
+CCTV1_LEGALREPORT_CMN_20080320_123601 453.717 460.124
+CCTV1_LEGALREPORT_CMN_20080320_123601 558.567 573.807
+CCTV1_LEGALREPORT_CMN_20080320_123601 616.918 619.132
+CCTV1_LEGALREPORT_CMN_20080320_123601 619.132 629.042
+CCTV1_LEGALREPORT_CMN_20080320_123601 756.415 765.851
+CCTV1_LEGALREPORT_CMN_20080320_123601 1159.064 1170.441
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 161.218 177.642
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 217.235 228.614
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 234.552 243.484
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 281.512 297.070
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 445.770 455.248
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 463.595 495.687
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 524.817 531.863
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 806.934 819.015
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 945.741 959.398
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 1094.420 1102.763
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 1182.793 1201.808
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 1344.563 1358.234
+CCTV4_ACROSSSTRAIT_CMN_20080329_073002 109.647 123.693
+CCTV4_ACROSSSTRAIT_CMN_20080329_073002 305.320 315.380
+CCTV4_ACROSSSTRAIT_CMN_20080329_073002 505.101 518.634
+CCTV1_LEGALREPORT_CMN_20080328_123802 237.445 258.287
+CCTV1_LEGALREPORT_CMN_20080328_123802 289.455 300.247
+CCTV1_LEGALREPORT_CMN_20080328_123802 549.949 570.632
+CCTV1_LEGALREPORT_CMN_20080328_123802 638.909 652.586
+CCTV1_LEGALREPORT_CMN_20080328_123802 899.675 905.777
+CCTV1_LEGALREPORT_CMN_20080328_123802 1016.378 1036.368
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802 260.556 269.589
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802 269.589 280.849
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802 1034.323 1042.029
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802 1127.101 1140.004
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802 1156.362 1176.296
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 356.523 367.797
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 632.675 640.199
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 652.924 664.640
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 998.153 1006.076
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 1367.896 1388.299
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 1606.557 1613.244
+CCTV2_DIALOG_CMN_20080323_220801 1321.827 1341.319
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701 391.159 403.915
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701 777.048 797.124
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701 833.582 855.372
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701 1624.624 1649.703
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701 1649.703 1668.778
+VOA_STRAITSTALK_CMN_20080414_210500 207.242 215.715
+VOA_STRAITSTALK_CMN_20080414_210500 262.527 266.868
+VOA_STRAITSTALK_CMN_20080414_210500 494.710 503.125
+VOA_STRAITSTALK_CMN_20080414_210500 511.748 523.276
+VOA_STRAITSTALK_CMN_20080414_210500 525.401 533.066
+VOA_STRAITSTALK_CMN_20080414_210500 606.323 613.539
+VOA_STRAITSTALK_CMN_20080414_210500 613.539 625.240
+VOA_STRAITSTALK_CMN_20080414_210500 1078.325 1096.189
+VOA_STRAITSTALK_CMN_20080414_210500 1353.950 1366.936
+VOA_STRAITSTALK_CMN_20080414_210500 1366.936 1380.502
+VOA_STRAITSTALK_CMN_20080414_210500 1380.502 1392.322
+VOA_STRAITSTALK_CMN_20080414_210500 1392.322 1400.851
+VOA_STRAITSTALK_CMN_20080414_210500 1400.851 1418.149
+VOA_STRAITSTALK_CMN_20080414_210500 1418.149 1426.755
+VOA_STRAITSTALK_CMN_20080414_210500 1440.688 1442.814
+VOA_STRAITSTALK_CMN_20080414_210500 1442.814 1463.053
+VOA_STRAITSTALK_CMN_20080414_210500 1475.375 1496.954
+VOA_STRAITSTALK_CMN_20080414_210500 1541.927 1566.969
+VOA_STRAITSTALK_CMN_20080414_210500 1839.847 1841.566
+VOA_STRAITSTALK_CMN_20080414_210500 1841.566 1868.241
+VOA_STRAITSTALK_CMN_20080414_210500 1945.631 1957.812
+VOA_STRAITSTALK_CMN_20080414_210500 2125.735 2146.839
+VOA_STRAITSTALK_CMN_20080414_210500 2732.199 2733.777
+VOA_STRAITSTALK_CMN_20080414_210500 2770.946 2782.647
+VOA_STRAITSTALK_CMN_20080414_210500 2863.110 2885.080
+VOA_STRAITSTALK_CMN_20080414_210500 2903.230 2923.793
+VOA_STRAITSTALK_CMN_20080414_210500 3234.231 3256.558
+VOA_STRAITSTALK_CMN_20080414_210500 3309.376 3315.509
+VOA_STRAITSTALK_CMN_20080414_210500 3315.509 3324.023
+VOA_STRAITSTALK_CMN_20080414_210500 3324.023 3329.514
+VOA_STRAITSTALK_CMN_20080414_210500 3337.754 3351.358
+CCTV2_ECONOMYANDLAW_CMN_20080324_202802 993.159 1004.075
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 100.038 110.008
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 110.008 121.148
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 302.278 311.481
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 317.325 326.842
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 326.842 344.304
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 532.746 540.916
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 804.269 815.848
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 1174.986 1187.501
+CCTV4_ACROSSSTRAIT_CMN_20080328_073002 606.003 612.222
+CCTV4_ACROSSSTRAIT_CMN_20080328_073002 1317.238 1323.004
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 434.957 442.007
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 908.187 931.355
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 1003.670 1027.287
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 1761.105 1763.366
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 1794.368 1819.904
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 2096.000 2111.144
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 2281.960 2299.738
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 2675.919 2678.237
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 2990.851 3018.085
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 3075.112 3086.675
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 3586.194 3604.157
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 3636.939 3650.526
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 3675.993 3698.023
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 3832.037 3849.222
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4050.684 4086.249
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4121.887 4143.019
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4167.714 4190.852
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4409.823 4413.900
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4454.639 4475.409
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4501.449 4518.812
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4561.903 4601.809
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4670.951 4678.490
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4724.543 4742.142
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 5316.415 5337.715
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 5564.584 5568.411
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 5594.680 5628.696
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6168.428 6193.103
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6219.478 6251.299
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6330.311 6362.012
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6362.012 6379.281
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6420.252 6439.408
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6502.285 6508.326
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6567.228 6589.508
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6589.508 6613.843
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6769.366 6798.919
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6954.252 6956.138
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 7261.811 7277.763
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 7513.530 7529.476
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 7946.686 7963.716
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 7978.291 8008.271
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 8060.732 8063.906
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 8363.042 8387.243
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 580.303 581.475
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 639.793 644.230
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 900.720 908.033
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1089.250 1090.500
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1320.160 1334.416
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1455.962 1460.055
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1528.445 1540.915
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1696.343 1704.232
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1880.324 1893.434
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1893.434 1895.027
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 2014.865 2028.896
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 2266.555 2284.280
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 2296.756 2312.357
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 2323.628 2329.175
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 2364.044 2365.763
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 934.479 951.520
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1374.975 1398.172
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1436.432 1459.595
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1489.211 1503.057
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1503.057 1511.084
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1511.084 1528.239
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1528.239 1544.999
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1584.729 1606.307
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1730.993 1745.976
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1745.976 1760.489
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 2677.717 2699.815
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 2779.102 2792.660
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 2800.172 2821.770
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 2840.379 2854.828
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 3262.386 3277.658
+CCTV2_DIALOG_CMN_20080330_220803 329.331 337.048
+CCTV2_DIALOG_CMN_20080330_220803 1340.264 1354.585
+CCTV2_DIALOG_CMN_20080330_220803 2037.464 2060.272
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 39.211 55.912
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 154.736 163.311
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 229.580 233.019
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 305.849 316.342
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 334.495 341.214
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 489.437 493.193
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 493.193 498.482
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 546.570 560.759
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 690.803 714.930
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 885.507 914.481
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 914.481 921.876
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 957.556 976.807
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1023.889 1031.400
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1046.247 1055.460
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1462.323 1466.401
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1514.674 1519.347
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1519.347 1534.440
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1569.442 1575.180
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1575.180 1586.358
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1592.419 1601.278
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1603.309 1617.558
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1652.855 1657.838
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1706.099 1715.443
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1832.383 1849.260
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1943.812 1961.752
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1987.723 1997.474
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2192.136 2201.472
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2201.472 2214.638
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2265.606 2282.279
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2341.502 2351.265
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2351.265 2372.341
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2913.550 2930.035
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2913.550 2930.035
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2940.160 2945.847
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2993.306 3016.286
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 3078.481 3085.172
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 3240.077 3248.175
+CCTVNEWS_PEOPLEINNEWS_CMN_20080324_202401 449.873 475.376
+CCTVNEWS_PEOPLEINNEWS_CMN_20080324_202401 475.376 482.915
+CCTVNEWS_PEOPLEINNEWS_CMN_20080324_202401 1795.081 1815.246
+CCTV1_LEGALREPORT_CMN_20080327_123801 212.113 225.544
+CCTV1_LEGALREPORT_CMN_20080327_123801 391.267 397.627
+CCTV1_LEGALREPORT_CMN_20080327_123801 528.989 532.005
+CCTV1_LEGALREPORT_CMN_20080327_123801 633.707 643.174
+CCTV1_LEGALREPORT_CMN_20080327_123801 679.053 691.652
+CCTV1_LEGALREPORT_CMN_20080327_123801 859.370 866.885
+CCTV1_LEGALREPORT_CMN_20080327_123801 866.885 869.120
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 510.751 520.993
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 610.606 618.001
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 656.414 658.449
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 963.038 966.990
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 971.865 980.608
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1123.676 1142.150
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1298.431 1313.459
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1471.075 1483.669
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1588.058 1612.689
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1681.798 1689.103
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1948.055 1957.682
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 871.846 881.393
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1152.716 1162.886
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1211.639 1212.483
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1295.780 1309.767
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1319.256 1326.348
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1326.348 1328.036
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1381.529 1396.124
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1430.419 1444.154
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1461.602 1474.280
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1504.783 1516.135
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1581.733 1593.486
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1597.385 1601.932
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1607.870 1611.136
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1659.150 1668.416
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1731.216 1743.893
+CCTV4_ACROSSSTRAIT_CMN_20080324_073002 324.632 333.882
+CCTV4_ACROSSSTRAIT_CMN_20080324_073002 1295.605 1303.229
+CCTV4_ACROSSSTRAIT_CMN_20080324_073002 1493.629 1499.050
+CCTV4_ACROSSSTRAIT_CMN_20080323_073002 339.387 352.600
+CCTV4_ACROSSSTRAIT_CMN_20080323_073002 675.952 683.920
+CCTV4_ACROSSSTRAIT_CMN_20080323_073002 1354.442 1365.441
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 358.704 388.240
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 689.092 709.688
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 883.248 891.546
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 928.874 939.373
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 980.662 995.710
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1154.951 1168.191
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1250.547 1255.864
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1275.017 1296.901
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1446.622 1462.387
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1465.590 1479.606
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1562.994 1578.157
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1578.157 1593.026
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1665.124 1685.334
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1775.037 1791.295
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1869.655 1888.414
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1930.774 1941.211
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1966.507 1988.760
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1988.760 1998.510
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2062.122 2075.225
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2364.435 2401.100
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2650.012 2659.744
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2695.642 2709.517
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2845.166 2859.008
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2845.166 2859.008
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3000.085 3017.908
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3129.338 3139.494
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3150.374 3157.250
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3207.737 3222.463
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3207.737 3222.463
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3283.250 3286.850
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3785.698 3805.119
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3805.119 3807.572
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3826.777 3842.763
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3855.673 3868.749
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3868.749 3881.129
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3911.574 3929.486
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3911.574 3929.486
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3929.486 3937.799
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4000.645 4017.610
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4000.645 4017.610
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4115.458 4121.444
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4126.843 4129.546
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4148.038 4163.709
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4317.698 4332.466
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4338.082 4356.603
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4363.355 4375.331
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4718.604 4727.144
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4756.968 4766.803
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4828.812 4841.125
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5086.733 5092.952
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5204.183 5241.880
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5271.774 5276.930
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5331.146 5345.381
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5385.226 5396.328
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5513.497 5521.697
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5634.856 5653.647
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5720.917 5730.558
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5736.001 5745.114
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5825.338 5829.379
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5901.056 5906.089
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5989.401 5996.148
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6050.515 6059.616
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6082.801 6090.678
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6217.539 6233.863
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6233.863 6238.812
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6238.812 6244.592
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6244.592 6254.992
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6254.992 6261.593
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6261.593 6267.408
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6267.408 6274.281
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6274.281 6283.473
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6283.473 6290.991
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6290.991 6302.665
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6302.665 6307.238
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6338.240 6350.432
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6473.181 6478.790
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7041.786 7048.548
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7129.548 7147.933
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7343.837 7351.456
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7623.514 7639.319
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7663.208 7665.166
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7705.454 7712.604
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7733.204 7739.282
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7739.282 7749.278
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7749.278 7758.568
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7758.568 7764.571
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7767.249 7785.326
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7810.096 7822.536
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 8180.257 8204.346
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 8222.251 8240.932
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 8308.543 8314.292
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 461.577 463.280
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 496.855 501.899
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 734.845 740.452
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 740.452 752.619
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 1541.085 1545.725
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 1561.275 1569.775
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 1652.401 1658.635
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 148.919 162.499
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 179.492 185.534
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 185.534 192.136
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 192.136 203.015
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 226.707 235.340
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 235.340 241.668
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 268.434 273.787
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 325.047 335.829
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 462.554 475.640
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 485.007 492.730
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 542.532 568.781
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 576.362 589.995
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 840.064 851.230
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 851.230 857.978
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 857.978 864.943
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 870.879 881.875
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 965.317 977.338
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 1004.188 1015.549
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 1331.932 1341.729
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 1389.462 1395.305
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 1414.874 1420.906
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 1450.275 1457.837
+CCTV1_LEGALREPORT_CMN_20080422_123801 422.064 425.148
+CCTV1_LEGALREPORT_CMN_20080422_123801 882.276 906.163
+CCTV1_LEGALREPORT_CMN_20080422_123801 1048.379 1069.002
+CCTV1_LEGALREPORT_CMN_20080422_123801 1078.158 1098.167
+CCTV2_DIALOG_CMN_20080309_222803 523.446 544.410
+CCTV2_DIALOG_CMN_20080309_222803 544.410 563.206
+CCTV2_DIALOG_CMN_20080309_222803 584.423 601.345
+CCTV2_DIALOG_CMN_20080309_222803 888.358 902.456
+CCTV2_DIALOG_CMN_20080309_222803 1379.715 1392.733
+CCTV2_DIALOG_CMN_20080309_222803 1443.541 1476.184
+CCTV2_DIALOG_CMN_20080309_222803 1679.190 1712.581
+CCTV2_DIALOG_CMN_20080309_222803 1992.685 2009.277
+CCTV2_DIALOG_CMN_20080309_222803 2464.131 2483.586
+CCTV2_DIALOG_CMN_20080309_222803 2524.525 2558.650
+CCTV2_DIALOG_CMN_20080309_222803 2588.577 2608.491
+CCTV2_DIALOG_CMN_20080309_222803 2836.204 2852.105
+CCTV2_DIALOG_CMN_20080309_222803 2852.105 2881.348
+CCTV2_DIALOG_CMN_20080309_222803 2938.325 2975.540
+CCTV2_DIALOG_CMN_20080309_222803 2975.540 2983.827
+CCTV2_DIALOG_CMN_20080309_222803 3051.037 3071.396
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701 657.445 661.414
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701 1041.427 1049.925
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701 1069.942 1075.973
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701 1716.267 1726.892
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701 1766.782 1773.907
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 212.260 243.932
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 547.222 565.187
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 641.691 645.517
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 659.667 674.024
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 1838.611 1857.279
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 1879.746 1883.782
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 1960.658 1980.371
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2363.964 2389.355
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2389.355 2412.008
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2467.097 2481.788
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2547.292 2571.964
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2623.927 2639.370
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2659.574 2685.916
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 5414.746 5441.790
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 8200.062 8218.732
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 8287.655 8305.972
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 8498.341 8512.467
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 8713.764 8728.212
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9167.094 9183.286
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9183.286 9199.448
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9199.448 9214.358
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9433.436 9454.735
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9481.236 9496.020
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9496.020 9498.619
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9535.504 9557.075
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9603.183 9612.738
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9665.241 9680.894
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9836.623 9865.334
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9927.451 9959.428
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10325.673 10343.062
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10325.673 10343.062
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10509.403 10524.718
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10558.347 10593.337
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10622.808 10642.622
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10622.808 10642.622
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10642.622 10674.657
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10968.320 10998.969
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 11360.345 11393.217
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 11411.325 11440.152
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 11448.280 11474.076
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 11690.929 11705.594
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 12129.332 12147.527
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 12169.093 12201.839
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 330.583 337.026
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 345.789 353.633
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 353.633 371.526
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 390.851 411.783
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 485.641 498.839
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 498.839 524.213
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 528.095 545.345
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 528.095 545.345
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 812.006 831.431
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 847.094 854.429
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1096.875 1105.438
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1096.875 1105.438
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1134.545 1147.533
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1450.162 1460.877
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1660.093 1684.357
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1877.724 1887.478
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 2248.026 2254.432
+CCTV1_LEGALREPORT_CMN_20080324_123601 365.563 372.204
+CCTV1_LEGALREPORT_CMN_20080324_123601 1054.872 1061.662
+CCTV1_LEGALREPORT_CMN_20080324_123601 1294.882 1323.696
+CCTV4_ACROSSSTRAIT_CMN_20080401_073002 267.346 292.051
+CCTV4_ACROSSSTRAIT_CMN_20080401_073002 340.648 353.161
+CCTV4_ACROSSSTRAIT_CMN_20080401_073002 964.927 973.614
+CCTV4_ACROSSSTRAIT_CMN_20080401_073002 1527.876 1543.970
+HUBEI_COMMUNICATE_CMN_20080330_230009 106.754 127.639
+HUBEI_COMMUNICATE_CMN_20080330_230009 510.440 526.802
+HUBEI_COMMUNICATE_CMN_20080330_230009 546.030 569.138
+HUBEI_COMMUNICATE_CMN_20080330_230009 598.702 622.698
+HUBEI_COMMUNICATE_CMN_20080330_230009 918.941 939.460
+HUBEI_COMMUNICATE_CMN_20080330_230009 1028.755 1046.414
+HUBEI_COMMUNICATE_CMN_20080330_230009 1046.414 1058.953
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 620.246 639.075
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 639.075 653.275
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 682.899 689.031
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 689.031 714.166
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 747.337 760.989
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 1036.875 1053.519
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 1429.272 1462.301
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 1563.238 1570.890
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 1613.387 1634.821
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 1655.430 1683.376
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 176.872 189.801
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 296.455 302.169
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 391.798 412.470
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 577.881 589.846
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 1035.900 1045.744
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 1045.744 1050.229
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 1088.227 1097.618
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 1137.329 1147.414
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 1467.699 1490.511
+CCTV2_ECONOMYANDLAW_CMN_20080422_202802 150.841 163.027
+CCTV2_ECONOMYANDLAW_CMN_20080422_202802 184.162 198.770
+CCTV2_ECONOMYANDLAW_CMN_20080422_202802 438.806 449.046
+CCTV2_ECONOMYANDLAW_CMN_20080422_202802 740.471 758.222
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 1085.429 1102.523
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 1250.244 1257.399
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 1525.719 1532.060
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 1562.654 1573.454
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 1587.438 1602.251
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 2011.591 2014.060
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 2131.905 2134.608
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 2166.622 2170.341
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 2279.746 2298.318
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 2362.475 2367.569
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 119.934 127.263
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 140.561 144.645
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 189.389 192.646
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 375.679 382.461
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 440.982 452.507
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 665.216 678.739
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 852.910 865.480
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 877.416 892.030
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 892.030 903.796
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1335.685 1355.715
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1450.033 1458.253
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1588.432 1606.172
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1606.172 1609.838
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1609.838 1616.153
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1798.932 1808.603
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 2089.485 2093.061
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 2093.061 2109.322
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 2194.211 2206.002
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 2521.753 2525.707
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 2674.648 2684.234
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3169.278 3177.745
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3169.278 3177.745
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3199.498 3205.959
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3219.352 3233.277
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3240.981 3248.680
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3269.990 3278.023
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3278.023 3291.211
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3294.320 3298.587
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3445.988 3454.317
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3574.777 3583.610
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3646.704 3662.393
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3662.393 3667.501
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3662.393 3667.501
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3667.501 3673.861
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3686.654 3696.441
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3705.340 3710.605
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3719.716 3722.979
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3719.716 3722.979
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3754.156 3757.702
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3757.702 3763.549
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3757.702 3763.549
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3789.032 3807.102
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3807.102 3817.533
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3959.937 3964.015
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4092.714 4100.559
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4102.061 4105.295
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4144.488 4153.911
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4202.960 4213.402
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4202.960 4213.402
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4263.955 4274.142
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4284.767 4300.948
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4309.355 4319.858
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4497.661 4512.302
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4534.477 4544.984
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4582.461 4596.896
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4610.165 4615.947
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4676.278 4688.855
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4696.355 4703.995
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4703.995 4713.315
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4713.315 4731.835
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4731.835 4738.225
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4738.225 4745.872
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4745.872 4754.991
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4761.834 4772.444
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4772.444 4776.303
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4776.303 4787.538
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4796.191 4807.547
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4819.579 4831.924
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4882.167 4904.612
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4913.109 4936.477
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4985.323 4990.719
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4990.719 4996.335
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5112.626 5128.524
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5618.208 5626.100
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5705.375 5728.384
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5784.708 5795.738
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5795.738 5810.401
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5810.401 5823.610
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5845.642 5855.019
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5917.685 5922.733
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5970.424 5974.713
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6103.472 6121.632
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6121.632 6147.005
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6239.148 6246.958
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6431.822 6452.078
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6646.314 6650.785
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6646.314 6650.785
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6667.789 6692.398
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6701.907 6718.850
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6726.942 6736.590
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6736.590 6747.732
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6762.223 6779.183
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6796.095 6806.688
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6981.422 6988.998
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7201.297 7233.808
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7267.161 7277.896
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7284.124 7295.476
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7309.305 7326.401
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7344.303 7358.798
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7378.571 7386.441
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 282.541 311.145
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 449.138 459.997
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 494.593 514.625
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 707.915 731.377
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 1513.257 1525.570
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 1856.251 1857.173
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 1866.059 1868.634
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 2033.643 2036.409
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 2236.470 2255.264
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 3101.208 3115.669
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 3422.377 3441.161
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 3474.049 3495.959
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 3495.959 3512.231
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 4562.052 4569.830
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 4749.950 4760.827
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 6085.863 6097.073
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 8801.231 8807.016
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 9645.243 9650.423
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 10556.680 10572.742
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 10620.072 10633.936
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 10633.936 10648.341
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 271.884 278.118
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 300.854 307.198
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 458.963 464.792
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 510.711 521.134
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 597.853 606.103
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 636.506 642.631
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 1076.912 1096.786
+CCTV2_DIALOG_CMN_20080413_220801 394.231 427.503
+CCTV2_DIALOG_CMN_20080413_220801 526.611 550.643
+CCTV2_DIALOG_CMN_20080413_220801 634.145 636.385
+CCTV2_DIALOG_CMN_20080413_220801 726.432 743.871
+CCTV2_DIALOG_CMN_20080413_220801 1596.005 1612.419
+CCTV2_DIALOG_CMN_20080413_220801 1856.240 1863.515
+CCTV2_DIALOG_CMN_20080413_220801 1863.515 1873.066
+CCTV2_DIALOG_CMN_20080413_220801 1876.354 1878.474
+CCTV2_DIALOG_CMN_20080413_220801 1892.994 1901.345
+CCTV2_DIALOG_CMN_20080413_220801 1892.994 1901.345
+CCTV2_DIALOG_CMN_20080413_220801 1943.532 1949.267
+CCTV2_DIALOG_CMN_20080413_220801 1985.331 1989.398
+CCTV2_DIALOG_CMN_20080413_220801 1995.382 2014.866
+CCTV2_DIALOG_CMN_20080413_220801 2102.298 2108.204
+CCTV2_DIALOG_CMN_20080413_220801 2314.775 2332.179
+CCTV2_DIALOG_CMN_20080413_220801 2999.507 3009.181
+CCTV2_DIALOG_CMN_20080413_220801 3030.684 3044.239
+CCTV2_DIALOG_CMN_20080413_220801 3030.684 3044.239
+CCTV2_DIALOG_CMN_20080413_220801 3054.797 3061.767
+CCTV2_DIALOG_CMN_20080413_220801 3054.797 3061.767
+CCTV2_DIALOG_CMN_20080413_220801 3084.319 3100.712
+CCTV2_DIALOG_CMN_20080413_220801 3100.712 3114.330
+CCTV2_DIALOG_CMN_20080413_220801 3100.712 3114.330
+CCTV1_LEGALREPORT_CMN_20080312_123601 898.991 907.458
+CCTV1_LEGALREPORT_CMN_20080312_123601 1273.836 1282.213
+CCTV1_LEGALREPORT_CMN_20080312_123601 1348.560 1360.570
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 734.245 753.784
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1144.712 1160.126
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1184.559 1186.684
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1461.929 1484.215
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1756.860 1770.844
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1770.844 1785.969
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1785.969 1805.792
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1830.878 1843.282
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 2028.871 2051.359
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 2446.226 2461.879
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 2566.118 2582.370
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 3156.066 3164.719
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 319.367 328.179
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 420.445 444.684
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 475.750 489.683
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 489.683 511.155
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 520.999 536.855
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 714.464 722.912
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 843.133 869.528
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 869.528 896.901
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 946.012 962.184
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1055.517 1071.673
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1107.259 1130.214
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1186.197 1210.293
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1219.489 1227.890
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1227.890 1233.531
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1320.483 1323.619
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1344.737 1361.056
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1467.794 1470.281
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1693.338 1708.062
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1711.968 1740.837
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1921.061 1947.367
+CCTV4_ACROSSSTRAIT_CMN_20080425_073002 424.066 438.812
+CCTV4_ACROSSSTRAIT_CMN_20080425_073002 1308.407 1332.950
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 721.538 731.767
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 731.767 736.767
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 1411.444 1419.835
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 1860.402 1877.555
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 1884.208 1884.849
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 1981.351 1998.883
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 1998.883 2016.543
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2130.535 2143.553
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2300.049 2323.432
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2397.635 2410.859
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2441.316 2459.076
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2702.466 2735.265
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2954.623 2980.818
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 3173.815 3194.406
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 372.148 391.957
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 458.578 459.831
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 549.374 564.519
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 573.314 581.489
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 709.526 711.619
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 709.526 711.619
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 800.846 807.307
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 807.307 819.933
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 819.933 826.527
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 836.339 841.324
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 854.105 863.283
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 863.283 878.428
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 878.428 883.431
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 896.017 898.600
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 909.740 916.519
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 983.479 985.273
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 1368.295 1378.492
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 1378.492 1394.545
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 1401.809 1410.152
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 1466.307 1481.028
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 1637.819 1654.100
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 118.759 123.463
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 461.086 465.770
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 505.139 518.550
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 603.583 622.194
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 661.499 677.136
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 717.974 724.350
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1296.998 1302.202
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1402.782 1412.267
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1438.642 1458.299
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1871.944 1879.909
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1871.944 1879.909
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1929.924 1944.124
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1954.888 1968.183
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1968.183 1979.980
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2078.712 2085.517
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2121.449 2141.315
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2141.315 2146.521
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2184.152 2194.384
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2243.583 2257.505
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2243.583 2257.505
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2307.717 2318.201
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2318.201 2333.791
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2603.971 2615.633
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3010.665 3025.781
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3034.317 3040.625
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3112.937 3121.219
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3159.185 3180.651
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3180.651 3198.315
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3251.429 3256.671
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3366.509 3385.014
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3366.509 3385.014
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3437.281 3451.077
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3590.330 3612.073
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3717.095 3725.798
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3869.792 3877.934
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3977.480 4003.700
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4136.474 4150.118
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4223.030 4245.253
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4586.006 4606.640
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4642.949 4658.521
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4873.998 4884.052
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4873.998 4884.052
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4884.052 4893.666
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5078.411 5080.646
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5250.414 5270.384
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5324.836 5343.723
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5690.857 5701.040
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5775.280 5791.706
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5831.776 5837.448
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5878.440 5888.998
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5908.640 5922.303
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5994.440 6012.444
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6033.989 6038.208
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6038.208 6046.152
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6463.797 6475.924
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6475.924 6481.079
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6536.481 6545.904
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6624.408 6633.608
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6633.608 6645.438
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6809.859 6821.217
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6888.586 6898.022
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7173.669 7184.547
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7419.657 7434.242
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7448.758 7470.281
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7448.758 7470.281
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7470.281 7484.549
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7517.118 7525.783
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7561.369 7592.201
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7739.110 7755.344
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7904.540 7910.273
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7904.540 7910.273
+CCTV1_LEGALREPORT_CMN_20080407_123801 503.360 537.155
+CCTV1_LEGALREPORT_CMN_20080407_123801 626.944 641.090
+VOA_FOCUSDIALOGUE_CMN_20080414_160500 236.828 249.206
+VOA_FOCUSDIALOGUE_CMN_20080414_160500 270.339 279.214
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 497.209 502.787
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 1237.886 1248.959
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 1260.299 1265.690
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 1437.291 1448.510
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 1448.510 1457.197
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 1599.872 1606.003
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 162.148 166.196
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 448.073 449.292
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 642.519 644.832
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 1225.349 1233.350
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 1284.343 1289.686
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 1459.978 1485.741
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 1662.847 1665.911
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 1962.247 1974.435
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 2322.207 2323.712
+CCTV4_ACROSSSTRAIT_CMN_20080426_073002 191.190 198.363
+CCTV4_ACROSSSTRAIT_CMN_20080426_073002 1147.443 1160.662
+CCTV1_LEGALREPORT_CMN_20080410_123801 307.221 316.518
+CCTV1_LEGALREPORT_CMN_20080410_123801 327.548 329.438
+CCTV1_LEGALREPORT_CMN_20080410_123801 1055.519 1060.114
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 123.220 128.033
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 268.970 282.579
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 1121.300 1128.300
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 1152.252 1153.737
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 1235.841 1246.200
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 1392.284 1396.143
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 2285.846 2294.557
+CCTV1_LEGALREPORT_CMN_20080319_123601 633.814 646.627
+CCTV1_LEGALREPORT_CMN_20080319_123601 723.763 731.218
+CCTV1_LEGALREPORT_CMN_20080319_123601 833.512 843.482
+CCTV1_LEGALREPORT_CMN_20080319_123601 921.703 954.679
+CCTV1_LEGALREPORT_CMN_20080319_123601 1020.289 1035.552
+CCTV2_ECONOMYANDLAW_CMN_20080322_202802 392.841 404.325
+CCTV2_ECONOMYANDLAW_CMN_20080322_202802 1001.460 1014.883
+CCTV1_LEGALREPORT_CMN_20080323_123601 314.957 321.963
+CCTV1_LEGALREPORT_CMN_20080323_123601 407.000 414.969
+CCTV1_LEGALREPORT_CMN_20080323_123601 430.047 439.765
+CCTV1_LEGALREPORT_CMN_20080323_123601 527.075 533.701
+CCTV1_LEGALREPORT_CMN_20080323_123601 1057.525 1059.771
+CCTV1_LEGALREPORT_CMN_20080323_123601 1123.912 1135.762
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800 136.367 143.410
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800 287.991 310.200
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800 616.468 632.029
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800 958.975 971.798
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800 1148.657 1159.251
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 391.785 397.550
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 457.656 467.209
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 562.053 583.450
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 599.589 601.086
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 744.461 747.874
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 1319.993 1329.117
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 2490.714 2501.539
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 2524.039 2544.126
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 2602.265 2619.834
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 2914.896 2934.487
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 3010.481 3014.903
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 3520.733 3542.487
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 3932.922 3933.650
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 4130.133 4161.057
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 4321.267 4345.098
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 4753.824 4774.614
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 4794.758 4796.159
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 4915.194 4919.101
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 5587.197 5638.012
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 6091.453 6101.226
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 7208.003 7235.345
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 7235.345 7272.083
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 7981.705 8010.668
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 9184.985 9208.172
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 10021.625 10022.718
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 10442.648 10467.546
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 10486.012 10501.515
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 10501.515 10531.964
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 11579.117 11583.296
+CCTV1_LEGALREPORT_CMN_20080408_123801 346.889 354.124
+CCTV1_LEGALREPORT_CMN_20080408_123801 444.523 466.588
+CCTV1_LEGALREPORT_CMN_20080408_123801 466.588 474.707
+CCTV1_LEGALREPORT_CMN_20080408_123801 487.629 495.058
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 1245.650 1250.712
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 1659.799 1667.285
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 1702.301 1703.911
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 2349.141 2355.708
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 2349.141 2355.708
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 2355.708 2359.271
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 299.907 317.860
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 405.438 413.282
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 481.287 487.840
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 621.940 637.105
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 816.038 818.179
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 1013.115 1018.171
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 1525.335 1529.163
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 1840.424 1844.314
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 2059.493 2063.040
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 2170.942 2181.869
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 2405.366 2415.834
+BEIJING_TWOWAYLANES_CMN_20080412_130002 176.566 177.754
+BEIJING_TWOWAYLANES_CMN_20080412_130002 177.754 178.988
+BEIJING_TWOWAYLANES_CMN_20080412_130002 191.952 192.843
+BEIJING_TWOWAYLANES_CMN_20080412_130002 200.952 203.718
+BEIJING_TWOWAYLANES_CMN_20080412_130002 487.921 492.405
+BEIJING_TWOWAYLANES_CMN_20080412_130002 797.239 813.498
+BEIJING_TWOWAYLANES_CMN_20080412_130002 813.498 826.230
+BEIJING_TWOWAYLANES_CMN_20080412_130002 975.014 981.538
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1353.149 1358.056
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1407.301 1413.293
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1453.499 1458.273
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1477.649 1480.617
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1491.351 1493.288
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1566.656 1577.059
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1578.137 1581.246
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1672.954 1676.313
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 198.267 222.928
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 300.901 318.742
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 512.973 522.999
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 635.639 644.839
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 684.635 699.428
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 684.635 699.428
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 899.453 920.398
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 953.360 969.367
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 953.360 969.367
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1311.152 1322.329
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1453.792 1469.543
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1469.543 1478.247
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1488.358 1518.195
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1518.195 1539.297
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1539.297 1555.666
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 2162.997 2178.590
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 2785.563 2792.383
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 3189.655 3209.026
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 3314.451 3329.888
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 3872.413 3891.838
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 3905.932 3923.261
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 4000.534 4022.840
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 4117.817 4135.570
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 4433.878 4447.704
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 4691.977 4711.600
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 7486.080 7502.900
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8213.955 8225.734
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8407.659 8428.642
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8450.744 8479.864
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8712.892 8737.796
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8814.301 8835.930
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8895.465 8913.788
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 9608.681 9625.125
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 10021.458 10033.824
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 10356.773 10381.454
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 10521.366 10536.851
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 10850.162 10872.187
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 12390.477 12407.842
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 12661.400 12673.940
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 12661.400 12673.940
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 12919.282 12931.671
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 12994.184 13017.972
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 13186.526 13198.159
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 13239.402 13260.472
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 13329.121 13344.916
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 13887.314 13902.082
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 14018.529 14033.380
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701 840.708 853.880
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701 1920.531 1936.749
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701 2408.951 2412.795
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701 2503.551 2508.895
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701 2515.551 2522.164
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821 193.534 200.284
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821 764.696 770.243
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821 1071.841 1082.060
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821 1177.841 1183.888
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821 1205.278 1209.309
+CCTV1_LEGALREPORT_CMN_20080321_123602 426.139 433.281
+CCTV2_ECONOMYANDLAW_CMN_20080401_202802 208.545 212.374
+CCTV2_ECONOMYANDLAW_CMN_20080401_202802 936.349 941.557
+CCTV2_ECONOMYANDLAW_CMN_20080401_202802 1367.649 1378.079
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 346.199 362.290
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 362.290 368.774
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 410.105 416.876
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 467.490 472.031
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 467.490 472.031
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 498.115 507.232
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 595.858 611.176
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 611.176 616.529
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 611.176 616.529
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 681.799 695.546
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 766.201 778.375
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 793.250 798.031
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 938.170 944.266
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 944.266 964.753
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 973.644 993.565
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1003.970 1037.143
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1204.394 1215.280
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1204.394 1215.280
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1249.087 1267.718
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1352.325 1371.420
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1505.167 1509.967
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1570.502 1580.729
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1613.045 1623.846
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1942.809 1951.246
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2036.397 2050.135
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2614.535 2626.425
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2657.608 2674.175
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2808.029 2811.566
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2811.566 2816.597
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2840.395 2851.054
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2871.330 2889.000
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2896.157 2899.569
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2958.738 2974.705
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3002.655 3015.083
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3081.621 3086.228
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3112.140 3120.602
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3112.140 3120.602
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3177.154 3186.591
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3252.260 3260.692
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3322.053 3331.395
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3351.192 3357.614
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3374.632 3379.866
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3502.375 3505.896
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3531.533 3546.092
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3549.691 3572.817
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3748.575 3754.234
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3853.778 3872.724
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3901.051 3909.682
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4187.987 4190.300
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4190.300 4196.924
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4295.235 4302.294
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4529.393 4539.554
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4652.269 4669.863
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4788.794 4799.310
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4827.815 4843.164
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4851.293 4858.996
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4858.996 4880.929
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4893.102 4909.773
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4893.102 4909.773
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5187.221 5196.533
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5376.896 5388.964
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5554.819 5572.218
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5572.218 5581.719
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5581.719 5588.858
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5591.061 5603.518
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5603.518 5615.292
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5615.292 5633.867
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5633.867 5637.258
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5637.258 5641.009
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5657.379 5666.880
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5666.880 5670.755
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5670.755 5679.520
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5686.114 5691.051
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5723.021 5738.273
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5738.273 5759.396
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5787.842 5794.232
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5799.576 5821.514
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5834.453 5846.734
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5863.462 5886.898
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5925.428 5941.566
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5941.566 5959.503
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6178.666 6188.558
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6188.558 6195.766
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6229.538 6233.881
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6229.538 6233.881
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6245.265 6251.014
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6251.014 6258.689
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6433.140 6442.235
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6433.140 6442.235
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6470.937 6475.374
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6499.905 6524.048
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6557.085 6563.600
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6596.900 6605.796
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6657.418 6678.422
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6994.200 6999.833
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7043.181 7057.846
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7062.810 7068.050
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7085.414 7095.427
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7106.832 7113.657
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7117.970 7124.079
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7134.908 7136.612
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7136.612 7137.816
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7137.816 7140.448
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7152.999 7162.414
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7162.414 7167.460
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7237.708 7243.710
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7329.513 7341.308
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7393.610 7407.984
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7437.349 7458.191
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7575.564 7583.218
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7692.318 7697.834
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7889.134 7893.124
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8292.319 8297.244
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8292.319 8297.244
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8420.971 8422.803
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8508.250 8514.934
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8531.939 8544.436
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8575.956 8581.701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8581.701 8584.663
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8618.482 8630.718
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8634.844 8641.156
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8641.156 8646.321
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8647.141 8651.125
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8647.141 8651.125
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8651.125 8656.237
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8651.125 8656.237
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9233.874 9248.691
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9366.521 9375.377
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9375.377 9380.831
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9380.831 9389.770
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9444.440 9449.190
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9479.117 9481.336
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10056.245 10064.059
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10158.726 10166.135
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10176.842 10181.873
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10176.842 10181.873
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10264.827 10286.662
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10311.306 10328.322
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10358.390 10375.175
+CCTV2_DIALOG_CMN_20080316_214834 1019.619 1025.932
+CCTV2_DIALOG_CMN_20080316_214834 1098.676 1106.317
+CCTV2_DIALOG_CMN_20080316_214834 1428.152 1438.024
+CCTV2_DIALOG_CMN_20080316_214834 1462.452 1470.692
+CCTV2_DIALOG_CMN_20080316_214834 1818.570 1826.008
+CCTV2_DIALOG_CMN_20080316_214834 2145.158 2151.986
+CCTV1_LEGALREPORT_CMN_20080311_123601 519.532 524.157
+CCTV1_LEGALREPORT_CMN_20080311_123601 912.474 927.498
+CCTV1_LEGALREPORT_CMN_20080311_123601 1086.885 1098.946
+CCTV2_ACROSSSTRAIT_CMN_20080312_073000 236.970 244.359
+VOA_STRAITSTALK_CMN_20080407_210500 78.197 84.964
+VOA_STRAITSTALK_CMN_20080407_210500 380.380 407.182
+VOA_STRAITSTALK_CMN_20080407_210500 737.740 740.843
+VOA_STRAITSTALK_CMN_20080407_210500 795.439 806.815
+VOA_STRAITSTALK_CMN_20080407_210500 948.524 964.064
+VOA_STRAITSTALK_CMN_20080407_210500 1501.159 1526.216
+VOA_STRAITSTALK_CMN_20080407_210500 1597.801 1611.951
+VOA_STRAITSTALK_CMN_20080407_210500 1727.467 1743.423
+VOA_STRAITSTALK_CMN_20080407_210500 1805.738 1806.520
+VOA_STRAITSTALK_CMN_20080407_210500 1944.355 1963.072
+VOA_STRAITSTALK_CMN_20080407_210500 1995.601 2003.988
+VOA_STRAITSTALK_CMN_20080407_210500 2051.287 2055.501
+VOA_STRAITSTALK_CMN_20080407_210500 2061.349 2081.484
+VOA_STRAITSTALK_CMN_20080407_210500 2139.432 2151.020
+VOA_STRAITSTALK_CMN_20080407_210500 2209.583 2223.521
+VOA_STRAITSTALK_CMN_20080407_210500 2366.359 2367.760
+VOA_STRAITSTALK_CMN_20080407_210500 2957.323 2965.293
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 476.351 484.400
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 635.787 643.616
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 635.787 643.616
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 672.116 684.215
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 780.285 781.942
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 882.632 886.312
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 2127.338 2131.336
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 2304.524 2316.837
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 2469.692 2483.067
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 2546.683 2562.826
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 3092.559 3117.522
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 3238.544 3265.067
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 3756.116 3779.756
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 757.576 764.718
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 951.598 977.243
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 1262.048 1269.805
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 1297.397 1314.045
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 1560.342 1599.552
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 1560.342 1599.552
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 104.551 107.067
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 261.838 264.026
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 395.582 401.847
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 592.967 596.107
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 641.144 645.395
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 668.132 676.303
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 877.132 878.975
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 1223.588 1232.050
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 1347.089 1352.709
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 1890.190 1906.136
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 1890.190 1906.136
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 2055.559 2060.655
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 2275.375 2279.172
+CCTV4_ACROSSSTRAIT_CMN_20080322_073002 227.639 242.544
+CCTV4_ACROSSSTRAIT_CMN_20080322_073002 776.373 782.966
+CCTV4_ACROSSSTRAIT_CMN_20080322_073002 1015.042 1021.543
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 573.428 595.324
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 635.144 655.937
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 701.713 720.737
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 742.143 754.648
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 816.851 838.195
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 889.613 903.057
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 940.372 960.383
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1181.564 1196.749
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1196.749 1232.755
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1278.786 1300.476
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1325.810 1343.728
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1391.018 1404.904
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1577.436 1597.010
diff --git a/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25 b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25
new file mode 100644
index 00000000000..79cf4135556
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25
@@ -0,0 +1,40 @@
+CCTV1_30MINNEWS_CMN_20080328_115902
+CCTV1_30MINNEWS_CMN_20080329_115901
+CCTV1_30MINNEWS_CMN_20080331_115901
+CCTV1_30MINNEWS_CMN_20080401_115901
+CCTV1_30MINNEWS_CMN_20080407_115901
+CCTV1_30MINNEWS_CMN_20080412_115901
+CCTV2_ECON30MIN_CMN_20080406_213518
+CCTV2_ECON30MIN_CMN_20080410_213502
+CCTV2_ECON30MIN_CMN_20080411_213502
+CCTV2_ECON30MIN_CMN_20080412_213501
+CCTV2_ECON30MIN_CMN_20080413_213502
+CCTV2_ECON30MIN_CMN_20080420_213502
+CCTV2_ECON30MIN_CMN_20080423_213501
+CCTV2_ECON30MIN_CMN_20080425_213502
+CCTV2_ECON30MIN_CMN_20080426_213501
+CCTV2_ECON30MIN_CMN_20080429_213502
+CCTV2_NEWSLIST_CMN_20080407_114902
+CCTV2_NEWSLIST_CMN_20080415_114902
+CCTV2_NEWSLIST_CMN_20080416_114902
+CCTV7_MILITARYNEWS1_CMN_20080325_100502
+CCTV7_MILITARYNEWS1_CMN_20080327_100812
+CCTV7_MILITARYNEWS1_CMN_20080330_100520
+CCTV7_MILITARYNEWS1_CMN_20080407_100502
+CCTV7_MILITARYNEWS1_CMN_20080416_100502
+CCTV7_MILITARYNEWS1_CMN_20080420_100515
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000
+VOA_INTNLNEWS_CMN_20080402_210000
+VOA_INTNLNEWS_CMN_20080405_210000
+VOA_INTNLNEWS_CMN_20080407_210000
+VOA_INTNLNEWS_CMN_20080410_210000
+VOA_INTNLNEWS_CMN_20080412_210000
+VOA_INTNLNEWS_CMN_20080414_210000
diff --git a/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25.segment b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25.segment
new file mode 100644
index 00000000000..0fc4387b9c1
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25.segment
@@ -0,0 +1,4026 @@
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 21.463 32.355
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 32.355 39.415
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 45.494 54.198
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 54.198 57.120
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 57.120 58.964
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 58.964 70.684
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 70.684 81.638
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 81.638 101.782
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 138.357 146.560
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 146.560 156.884
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 156.884 164.757
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 171.188 182.903
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 182.903 198.838
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 198.838 205.748
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 205.748 232.942
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 232.942 246.753
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 246.753 259.112
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 259.112 264.815
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 304.346 317.839
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 317.839 339.164
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 357.075 367.565
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 367.565 380.972
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 392.074 401.948
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 401.948 412.906
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 433.324 445.133
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 455.727 476.728
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 496.285 504.535
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 538.718 548.786
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 548.786 556.808
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 556.808 565.542
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 565.542 575.950
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 575.950 584.168
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 584.168 589.641
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 589.641 598.497
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 598.497 624.088
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 624.088 631.864
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 631.864 666.366
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 666.366 678.567
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 714.149 732.313
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 732.313 745.214
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 745.214 750.238
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 770.222 786.636
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 818.556 841.036
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 841.036 863.175
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 863.175 865.624
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 934.986 946.288
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 946.288 948.905
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 967.731 976.654
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1012.924 1033.778
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1053.328 1055.388
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1082.532 1092.085
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1105.869 1114.512
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1114.512 1119.941
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1143.248 1161.570
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1161.570 1174.396
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1174.396 1180.159
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1198.649 1205.819
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1212.239 1225.726
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1225.726 1238.265
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1246.695 1259.250
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1270.621 1279.500
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1290.220 1302.389
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1302.389 1306.289
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1374.030 1386.299
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1386.299 1389.580
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1389.580 1390.455
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 67.359 70.217
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 70.217 72.249
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 72.249 77.811
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 77.811 85.933
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 85.933 94.636
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 94.636 98.823
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 103.583 105.981
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 124.906 134.814
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 134.814 141.503
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 141.503 156.314
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 156.314 165.092
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 165.092 170.883
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 173.616 176.600
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 176.600 181.951
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 181.951 190.809
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 190.809 199.713
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 199.713 205.354
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 205.354 212.139
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 212.139 219.841
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 219.841 227.718
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 227.718 234.781
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 238.517 252.115
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 252.115 261.844
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 274.926 287.084
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 287.084 310.010
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 310.010 326.862
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 326.862 335.741
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 335.741 343.800
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 343.800 348.613
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 348.613 362.942
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 362.942 368.480
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 368.480 379.481
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 379.481 388.292
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 388.292 393.665
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 393.665 400.261
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 400.261 407.416
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 416.077 421.255
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 421.255 431.592
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 431.592 437.936
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 437.936 449.077
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 449.077 457.855
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 457.855 470.923
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 479.159 484.972
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 484.972 490.426
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 498.469 510.872
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 510.872 519.337
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 528.538 540.782
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 540.782 549.100
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 549.100 558.216
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 558.216 570.439
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 570.439 574.470
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 574.470 578.861
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 578.861 582.711
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 582.711 593.428
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 602.694 612.956
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 612.956 617.537
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 617.537 625.309
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 625.309 630.855
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 630.855 641.811
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 660.532 667.639
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 667.639 679.810
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 679.810 685.016
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 685.016 687.610
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 687.610 693.415
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 702.979 710.241
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 723.754 729.739
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 729.739 733.523
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 733.523 737.475
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 737.475 743.742
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 751.406 763.986
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 763.986 775.358
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 775.358 784.393
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 784.393 798.534
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 798.534 809.328
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 809.328 820.462
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 820.462 829.017
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 842.141 850.582
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 861.677 867.582
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 867.582 888.830
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 888.830 896.232
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 900.075 909.620
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 909.620 920.433
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 920.433 934.204
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 934.204 950.207
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 950.207 961.708
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 961.708 970.456
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 970.456 977.025
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 977.025 986.414
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 986.414 993.227
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 993.227 998.462
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 998.462 1016.931
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1016.931 1019.422
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1019.422 1026.000
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1026.000 1034.860
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1034.860 1041.111
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1041.111 1047.782
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1047.782 1058.523
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1058.523 1064.417
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1064.417 1073.178
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1073.178 1082.940
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1082.940 1089.033
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1089.033 1097.488
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1097.488 1100.904
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1113.768 1119.747
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1119.747 1122.895
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1122.895 1131.545
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1131.545 1136.447
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1142.001 1155.917
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1155.917 1174.762
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1174.762 1187.288
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1187.288 1197.992
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1206.264 1208.347
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1208.347 1212.540
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1212.540 1218.175
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1218.175 1231.904
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1251.072 1258.989
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1258.989 1264.612
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1264.612 1271.258
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1277.118 1290.228
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1298.668 1304.963
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1304.963 1319.471
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1319.471 1336.043
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1336.043 1343.185
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1343.185 1351.127
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1351.127 1357.886
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1357.886 1367.701
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1375.738 1383.423
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1383.423 1395.263
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1395.263 1397.972
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1397.972 1399.414
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1399.414 1400.936
+CCTV2_ECON30MIN_CMN_20080420_213502 234.355 237.652
+CCTV2_ECON30MIN_CMN_20080420_213502 253.229 262.635
+CCTV2_ECON30MIN_CMN_20080420_213502 295.066 302.269
+CCTV2_ECON30MIN_CMN_20080420_213502 302.269 312.255
+CCTV2_ECON30MIN_CMN_20080420_213502 312.255 315.359
+CCTV2_ECON30MIN_CMN_20080420_213502 315.359 318.891
+CCTV2_ECON30MIN_CMN_20080420_213502 318.891 327.688
+CCTV2_ECON30MIN_CMN_20080420_213502 327.688 333.687
+CCTV2_ECON30MIN_CMN_20080420_213502 333.687 345.022
+CCTV2_ECON30MIN_CMN_20080420_213502 345.022 351.772
+CCTV2_ECON30MIN_CMN_20080420_213502 351.772 357.444
+CCTV2_ECON30MIN_CMN_20080420_213502 357.444 363.975
+CCTV2_ECON30MIN_CMN_20080420_213502 363.975 374.241
+CCTV2_ECON30MIN_CMN_20080420_213502 374.241 380.538
+CCTV2_ECON30MIN_CMN_20080420_213502 380.538 390.917
+CCTV2_ECON30MIN_CMN_20080420_213502 390.917 399.729
+CCTV2_ECON30MIN_CMN_20080420_213502 399.729 413.464
+CCTV2_ECON30MIN_CMN_20080420_213502 440.591 443.950
+CCTV2_ECON30MIN_CMN_20080420_213502 443.950 450.341
+CCTV2_ECON30MIN_CMN_20080420_213502 450.341 460.060
+CCTV2_ECON30MIN_CMN_20080420_213502 466.707 473.692
+CCTV2_ECON30MIN_CMN_20080420_213502 481.238 490.567
+CCTV2_ECON30MIN_CMN_20080420_213502 490.567 499.833
+CCTV2_ECON30MIN_CMN_20080420_213502 499.833 509.130
+CCTV2_ECON30MIN_CMN_20080420_213502 509.130 512.140
+CCTV2_ECON30MIN_CMN_20080420_213502 512.140 513.203
+CCTV2_ECON30MIN_CMN_20080420_213502 513.203 525.047
+CCTV2_ECON30MIN_CMN_20080420_213502 525.047 534.235
+CCTV2_ECON30MIN_CMN_20080420_213502 534.235 545.579
+CCTV2_ECON30MIN_CMN_20080420_213502 545.579 553.836
+CCTV2_ECON30MIN_CMN_20080420_213502 678.405 679.968
+CCTV2_ECON30MIN_CMN_20080420_213502 679.968 685.609
+CCTV2_ECON30MIN_CMN_20080420_213502 693.842 699.920
+CCTV2_ECON30MIN_CMN_20080420_213502 706.217 713.801
+CCTV2_ECON30MIN_CMN_20080420_213502 714.879 716.160
+CCTV2_ECON30MIN_CMN_20080420_213502 716.160 716.738
+CCTV2_ECON30MIN_CMN_20080420_213502 718.644 719.535
+CCTV2_ECON30MIN_CMN_20080420_213502 738.755 741.364
+CCTV2_ECON30MIN_CMN_20080420_213502 741.364 743.239
+CCTV2_ECON30MIN_CMN_20080420_213502 743.239 751.224
+CCTV2_ECON30MIN_CMN_20080420_213502 760.460 765.007
+CCTV2_ECON30MIN_CMN_20080420_213502 765.007 770.960
+CCTV2_ECON30MIN_CMN_20080420_213502 784.289 794.633
+CCTV2_ECON30MIN_CMN_20080420_213502 794.633 798.930
+CCTV2_ECON30MIN_CMN_20080420_213502 798.930 806.088
+CCTV2_ECON30MIN_CMN_20080420_213502 806.088 808.682
+CCTV2_ECON30MIN_CMN_20080420_213502 808.682 815.813
+CCTV2_ECON30MIN_CMN_20080420_213502 815.813 820.000
+CCTV2_ECON30MIN_CMN_20080420_213502 820.000 828.171
+CCTV2_ECON30MIN_CMN_20080420_213502 828.171 834.726
+CCTV2_ECON30MIN_CMN_20080420_213502 834.726 837.633
+CCTV2_ECON30MIN_CMN_20080420_213502 837.633 842.242
+CCTV2_ECON30MIN_CMN_20080420_213502 851.836 869.664
+CCTV2_ECON30MIN_CMN_20080420_213502 875.867 888.602
+CCTV2_ECON30MIN_CMN_20080420_213502 888.602 895.439
+CCTV2_ECON30MIN_CMN_20080420_213502 895.439 900.517
+CCTV2_ECON30MIN_CMN_20080420_213502 900.517 914.238
+CCTV2_ECON30MIN_CMN_20080420_213502 914.238 918.551
+CCTV2_ECON30MIN_CMN_20080420_213502 914.238 918.551
+CCTV2_ECON30MIN_CMN_20080420_213502 924.645 930.004
+CCTV2_ECON30MIN_CMN_20080420_213502 951.003 970.987
+CCTV2_ECON30MIN_CMN_20080420_213502 1003.269 1013.660
+CCTV2_ECON30MIN_CMN_20080420_213502 1020.583 1024.286
+CCTV2_ECON30MIN_CMN_20080420_213502 1020.583 1024.286
+CCTV2_ECON30MIN_CMN_20080420_213502 1024.286 1039.083
+CCTV2_ECON30MIN_CMN_20080420_213502 1039.083 1045.585
+CCTV2_ECON30MIN_CMN_20080420_213502 1039.083 1045.585
+CCTV2_ECON30MIN_CMN_20080420_213502 1049.677 1052.021
+CCTV2_ECON30MIN_CMN_20080420_213502 1052.021 1052.505
+CCTV2_ECON30MIN_CMN_20080420_213502 1052.505 1053.037
+CCTV2_ECON30MIN_CMN_20080420_213502 1053.037 1058.031
+CCTV2_ECON30MIN_CMN_20080420_213502 1067.374 1072.187
+CCTV2_ECON30MIN_CMN_20080420_213502 1072.187 1074.999
+CCTV2_ECON30MIN_CMN_20080420_213502 1074.999 1075.670
+CCTV2_ECON30MIN_CMN_20080420_213502 1075.670 1079.870
+CCTV2_ECON30MIN_CMN_20080420_213502 1075.670 1079.870
+CCTV2_ECON30MIN_CMN_20080420_213502 1079.870 1082.073
+CCTV2_ECON30MIN_CMN_20080420_213502 1082.073 1085.730
+CCTV2_ECON30MIN_CMN_20080420_213502 1082.073 1085.730
+CCTV2_ECON30MIN_CMN_20080420_213502 1088.996 1094.089
+CCTV2_ECON30MIN_CMN_20080420_213502 1094.089 1102.260
+CCTV2_ECON30MIN_CMN_20080420_213502 1102.260 1112.476
+CCTV2_ECON30MIN_CMN_20080420_213502 1123.991 1127.070
+CCTV2_ECON30MIN_CMN_20080420_213502 1141.019 1152.161
+CCTV2_ECON30MIN_CMN_20080420_213502 1178.428 1191.506
+CCTV2_ECON30MIN_CMN_20080420_213502 1283.084 1284.287
+CCTV2_ECON30MIN_CMN_20080420_213502 1301.895 1308.302
+CCTV2_ECON30MIN_CMN_20080420_213502 1308.302 1313.927
+CCTV2_ECON30MIN_CMN_20080420_213502 1313.927 1319.068
+CCTV2_ECON30MIN_CMN_20080420_213502 1319.068 1330.584
+CCTV2_ECON30MIN_CMN_20080420_213502 1330.584 1338.693
+CCTV2_ECON30MIN_CMN_20080420_213502 1338.693 1346.021
+CCTV2_ECON30MIN_CMN_20080420_213502 1346.021 1356.834
+CCTV2_ECON30MIN_CMN_20080420_213502 1371.297 1375.642
+CCTV2_ECON30MIN_CMN_20080420_213502 1375.642 1380.486
+CCTV2_ECON30MIN_CMN_20080420_213502 1404.392 1412.094
+CCTV2_ECON30MIN_CMN_20080420_213502 1412.094 1423.359
+CCTV2_ECON30MIN_CMN_20080420_213502 1423.359 1432.343
+CCTV2_ECON30MIN_CMN_20080420_213502 1439.640 1460.031
+CCTV2_ECON30MIN_CMN_20080420_213502 1460.031 1468.906
+CCTV2_ECON30MIN_CMN_20080420_213502 1483.329 1491.032
+CCTV2_ECON30MIN_CMN_20080420_213502 1501.140 1518.047
+CCTV2_ECON30MIN_CMN_20080420_213502 1518.047 1529.531
+CCTV2_ECON30MIN_CMN_20080420_213502 1529.531 1555.125
+CCTV2_ECON30MIN_CMN_20080420_213502 1555.125 1559.343
+CCTV2_ECON30MIN_CMN_20080420_213502 1559.343 1562.843
+CCTV2_ECON30MIN_CMN_20080420_213502 1590.906 1603.857
+CCTV2_ECON30MIN_CMN_20080420_213502 1613.122 1620.508
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 29.638 41.174
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 61.174 74.985
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 74.985 80.069
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 112.640 135.703
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 135.703 150.481
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 150.481 161.646
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 161.646 174.035
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 174.035 185.040
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 185.040 193.175
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 193.175 208.438
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 222.324 234.931
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 234.931 261.970
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 261.970 269.748
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 269.748 295.797
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 309.989 316.211
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 362.786 370.488
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 370.488 386.259
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 386.259 405.897
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 429.675 455.188
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 455.188 464.853
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 489.754 515.497
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 545.652 560.815
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 560.815 570.212
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 570.212 580.775
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 580.775 598.261
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 598.261 608.680
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 656.132 664.402
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 664.402 674.431
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 695.381 704.483
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 723.729 736.418
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 759.826 777.547
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 777.547 795.385
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 843.623 853.577
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 874.685 886.505
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 886.505 896.745
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 896.745 910.092
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 910.092 925.252
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 925.252 938.143
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 938.143 951.745
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 951.745 964.943
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1019.141 1032.339
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1032.339 1034.788
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1034.788 1054.070
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1054.070 1080.835
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1141.402 1158.671
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1158.671 1173.639
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1195.100 1198.573
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1236.256 1249.507
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1260.542 1271.225
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1304.935 1307.384
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1379.623 1385.280
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1379.623 1385.280
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 24.820 31.664
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 31.664 41.447
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 41.447 49.400
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 49.400 55.977
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 59.165 61.150
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 61.150 69.807
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 69.807 74.543
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 74.543 81.496
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 81.496 91.651
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 91.651 100.649
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 106.211 109.954
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 109.954 128.138
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 128.138 138.529
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 138.529 144.545
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 144.545 154.059
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 164.200 173.216
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 173.216 178.609
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 178.609 188.421
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 203.187 210.031
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 215.388 219.389
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 219.389 225.622
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 231.748 239.497
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 239.497 244.855
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 244.855 259.219
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 286.970 292.313
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 292.313 304.779
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 304.779 316.361
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 334.019 341.924
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 346.205 351.517
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 351.517 362.363
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 362.363 375.754
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 375.754 393.754
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 393.754 401.285
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 401.285 407.723
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 407.723 418.677
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 425.287 432.099
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 432.099 439.629
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 448.833 456.051
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 468.770 477.646
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 477.646 484.442
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 484.442 496.784
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 512.971 518.628
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 518.628 533.084
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 533.084 541.646
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 541.646 554.100
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 567.085 571.460
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 571.460 577.288
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 577.288 584.523
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 584.523 598.348
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 598.348 607.426
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 616.401 627.699
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 627.699 639.104
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 639.104 644.307
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 653.398 658.383
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 658.383 669.929
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 669.929 680.272
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 691.523 699.460
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 710.741 725.602
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 725.602 743.573
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 743.573 752.479
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 752.479 759.448
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 759.448 763.341
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 763.341 777.146
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 777.146 786.369
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 786.369 793.150
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 793.150 802.042
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 802.042 810.840
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 810.840 828.825
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 828.825 844.043
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 844.043 857.419
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 857.419 869.544
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 869.544 879.935
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 891.044 896.794
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 896.794 901.638
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 918.416 924.494
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 924.494 931.103
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 931.103 947.803
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 960.943 965.792
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 965.792 970.370
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 970.370 977.778
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 977.778 986.621
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 986.621 993.606
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 993.606 995.294
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 995.294 1004.564
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1004.564 1012.970
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1044.924 1056.846
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1056.846 1065.345
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1065.345 1077.720
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1077.720 1084.360
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1084.360 1097.843
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1097.843 1100.546
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1100.546 1111.858
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1123.731 1127.075
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1127.075 1130.165
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1130.165 1139.275
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1139.275 1146.377
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1146.377 1153.737
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1153.737 1166.565
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1166.565 1174.330
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1174.330 1183.486
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1183.486 1194.515
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1194.515 1206.375
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1221.953 1234.161
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1234.161 1245.160
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1245.160 1247.482
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1247.482 1252.655
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1252.655 1258.607
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1258.607 1270.308
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1270.308 1273.192
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1273.192 1275.348
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1275.348 1281.520
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1281.520 1287.723
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1287.723 1293.458
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1293.458 1301.148
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1301.148 1304.054
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1309.820 1313.960
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1313.960 1319.772
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1319.772 1325.506
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1336.008 1342.774
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1352.726 1355.571
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1355.571 1357.634
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1357.634 1358.399
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 229.698 233.901
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 233.901 236.698
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 236.698 241.120
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 241.120 253.027
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 259.840 272.480
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 272.480 279.824
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 279.824 290.626
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 290.626 293.516
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 293.516 298.189
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 298.189 303.830
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 303.830 309.580
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 309.580 324.471
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 324.471 333.659
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 333.659 338.299
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 338.299 344.471
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 352.721 360.597
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 360.597 367.222
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 367.222 387.378
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 387.378 394.925
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 394.925 408.081
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 408.081 417.862
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 417.862 428.112
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 428.112 435.549
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 446.502 460.080
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 460.080 467.664
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 467.664 473.774
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 482.742 490.898
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 490.898 501.641
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 501.641 507.602
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 521.617 529.083
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 529.083 532.910
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 569.628 576.175
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 576.175 578.535
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 587.816 592.222
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 592.222 597.658
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 597.658 609.347
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 609.347 612.191
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 612.191 622.551
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 622.551 638.989
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 638.989 655.614
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 655.614 663.083
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 663.083 679.717
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 679.717 687.311
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 687.311 699.060
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 699.060 711.888
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 711.888 721.310
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 721.310 731.825
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 731.825 735.887
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 735.887 747.120
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 747.120 757.760
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 767.323 770.141
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 770.141 772.751
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 772.751 779.206
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 779.206 791.472
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 791.472 795.878
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 795.878 804.477
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 809.214 816.348
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 816.348 826.409
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 826.409 834.424
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 834.424 858.586
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 864.811 868.545
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 868.545 876.872
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 876.872 887.715
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 887.715 890.324
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 890.324 897.903
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 915.543 922.574
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 928.370 939.182
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 939.182 948.620
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 948.620 952.651
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 952.651 959.635
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 959.635 972.447
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 972.447 979.823
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 979.823 992.214
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 999.647 1004.788
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1004.788 1011.929
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1011.929 1021.773
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1021.773 1030.931
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1030.931 1037.574
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1037.574 1042.027
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1042.027 1049.402
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1049.402 1065.739
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1073.693 1079.005
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1079.005 1083.989
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1083.989 1094.707
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1099.969 1102.843
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1102.843 1117.965
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1117.965 1120.372
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1120.372 1124.121
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1124.121 1134.073
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1141.483 1149.718
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1149.718 1151.546
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1157.608 1166.676
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1166.676 1172.729
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1172.729 1181.213
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1181.213 1190.837
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1190.837 1196.618
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1196.618 1206.540
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1206.540 1225.071
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1237.382 1246.320
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1246.320 1258.509
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1258.509 1272.603
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1272.603 1278.286
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1295.029 1298.826
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1298.826 1305.873
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1326.209 1332.709
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1332.709 1344.586
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1344.586 1364.199
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1364.199 1374.852
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1374.852 1382.713
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1382.713 1388.166
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1388.166 1399.198
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1399.198 1405.495
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1419.548 1424.094
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1424.094 1430.015
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1437.439 1456.752
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1456.752 1469.403
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1469.403 1474.446
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1485.195 1488.335
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1488.335 1490.304
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1490.304 1502.615
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1502.615 1506.209
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1524.692 1530.378
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1591.894 1600.097
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1603.821 1621.586
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1621.586 1636.617
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1636.617 1644.055
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1644.055 1649.180
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1656.711 1672.165
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1672.165 1683.040
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1683.040 1693.837
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1693.837 1700.096
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1722.760 1736.510
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1747.322 1767.208
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1770.834 1783.287
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1783.287 1791.913
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1806.014 1809.387
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1818.301 1820.847
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1820.847 1827.761
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1827.761 1831.870
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1835.588 1845.681
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1845.681 1858.778
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1864.946 1872.040
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1872.040 1883.883
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1883.883 1891.540
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1914.196 1921.415
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1921.415 1925.306
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1949.601 1957.945
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1957.945 1965.928
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1965.928 1978.022
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1978.022 1983.444
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1983.444 1988.616
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1988.616 1998.663
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 2005.082 2011.787
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 2011.787 2014.521
+CCTV2_ECON30MIN_CMN_20080429_213502 19.671 28.109
+CCTV2_ECON30MIN_CMN_20080429_213502 32.374 39.546
+CCTV2_ECON30MIN_CMN_20080429_213502 43.156 46.593
+CCTV2_ECON30MIN_CMN_20080429_213502 214.123 217.170
+CCTV2_ECON30MIN_CMN_20080429_213502 247.155 255.302
+CCTV2_ECON30MIN_CMN_20080429_213502 255.302 257.568
+CCTV2_ECON30MIN_CMN_20080429_213502 257.568 270.660
+CCTV2_ECON30MIN_CMN_20080429_213502 278.498 288.025
+CCTV2_ECON30MIN_CMN_20080429_213502 294.276 300.214
+CCTV2_ECON30MIN_CMN_20080429_213502 300.214 305.404
+CCTV2_ECON30MIN_CMN_20080429_213502 305.404 312.544
+CCTV2_ECON30MIN_CMN_20080429_213502 312.544 316.873
+CCTV2_ECON30MIN_CMN_20080429_213502 316.873 323.029
+CCTV2_ECON30MIN_CMN_20080429_213502 329.513 334.809
+CCTV2_ECON30MIN_CMN_20080429_213502 334.809 342.387
+CCTV2_ECON30MIN_CMN_20080429_213502 342.387 348.278
+CCTV2_ECON30MIN_CMN_20080429_213502 368.839 377.885
+CCTV2_ECON30MIN_CMN_20080429_213502 394.995 400.071
+CCTV2_ECON30MIN_CMN_20080429_213502 400.071 407.493
+CCTV2_ECON30MIN_CMN_20080429_213502 407.493 409.946
+CCTV2_ECON30MIN_CMN_20080429_213502 409.946 424.618
+CCTV2_ECON30MIN_CMN_20080429_213502 424.618 431.478
+CCTV2_ECON30MIN_CMN_20080429_213502 431.478 435.525
+CCTV2_ECON30MIN_CMN_20080429_213502 435.525 436.353
+CCTV2_ECON30MIN_CMN_20080429_213502 436.353 438.994
+CCTV2_ECON30MIN_CMN_20080429_213502 438.994 442.838
+CCTV2_ECON30MIN_CMN_20080429_213502 442.838 452.707
+CCTV2_ECON30MIN_CMN_20080429_213502 452.707 458.924
+CCTV2_ECON30MIN_CMN_20080429_213502 474.127 479.940
+CCTV2_ECON30MIN_CMN_20080429_213502 484.533 491.939
+CCTV2_ECON30MIN_CMN_20080429_213502 491.939 502.158
+CCTV2_ECON30MIN_CMN_20080429_213502 502.158 510.080
+CCTV2_ECON30MIN_CMN_20080429_213502 510.080 515.580
+CCTV2_ECON30MIN_CMN_20080429_213502 515.580 522.471
+CCTV2_ECON30MIN_CMN_20080429_213502 522.471 532.784
+CCTV2_ECON30MIN_CMN_20080429_213502 532.784 550.785
+CCTV2_ECON30MIN_CMN_20080429_213502 558.926 561.187
+CCTV2_ECON30MIN_CMN_20080429_213502 568.074 569.731
+CCTV2_ECON30MIN_CMN_20080429_213502 719.770 721.099
+CCTV2_ECON30MIN_CMN_20080429_213502 730.082 744.393
+CCTV2_ECON30MIN_CMN_20080429_213502 751.236 767.189
+CCTV2_ECON30MIN_CMN_20080429_213502 767.189 769.470
+CCTV2_ECON30MIN_CMN_20080429_213502 769.470 770.048
+CCTV2_ECON30MIN_CMN_20080429_213502 770.048 771.798
+CCTV2_ECON30MIN_CMN_20080429_213502 772.360 773.766
+CCTV2_ECON30MIN_CMN_20080429_213502 773.766 775.781
+CCTV2_ECON30MIN_CMN_20080429_213502 773.766 775.781
+CCTV2_ECON30MIN_CMN_20080429_213502 775.781 776.844
+CCTV2_ECON30MIN_CMN_20080429_213502 775.781 776.844
+CCTV2_ECON30MIN_CMN_20080429_213502 787.500 792.624
+CCTV2_ECON30MIN_CMN_20080429_213502 792.624 796.859
+CCTV2_ECON30MIN_CMN_20080429_213502 803.247 808.903
+CCTV2_ECON30MIN_CMN_20080429_213502 830.185 833.748
+CCTV2_ECON30MIN_CMN_20080429_213502 833.748 841.405
+CCTV2_ECON30MIN_CMN_20080429_213502 841.405 847.702
+CCTV2_ECON30MIN_CMN_20080429_213502 847.702 852.170
+CCTV2_ECON30MIN_CMN_20080429_213502 852.170 861.421
+CCTV2_ECON30MIN_CMN_20080429_213502 861.421 873.507
+CCTV2_ECON30MIN_CMN_20080429_213502 880.819 888.139
+CCTV2_ECON30MIN_CMN_20080429_213502 888.139 895.515
+CCTV2_ECON30MIN_CMN_20080429_213502 895.515 902.406
+CCTV2_ECON30MIN_CMN_20080429_213502 919.296 925.437
+CCTV2_ECON30MIN_CMN_20080429_213502 925.437 932.267
+CCTV2_ECON30MIN_CMN_20080429_213502 932.267 939.831
+CCTV2_ECON30MIN_CMN_20080429_213502 939.831 950.347
+CCTV2_ECON30MIN_CMN_20080429_213502 950.347 962.268
+CCTV2_ECON30MIN_CMN_20080429_213502 962.268 972.596
+CCTV2_ECON30MIN_CMN_20080429_213502 984.611 988.248
+CCTV2_ECON30MIN_CMN_20080429_213502 988.248 1008.414
+CCTV2_ECON30MIN_CMN_20080429_213502 1036.446 1042.134
+CCTV2_ECON30MIN_CMN_20080429_213502 1042.134 1049.055
+CCTV2_ECON30MIN_CMN_20080429_213502 1049.055 1064.962
+CCTV2_ECON30MIN_CMN_20080429_213502 1064.962 1074.275
+CCTV2_ECON30MIN_CMN_20080429_213502 1074.275 1081.603
+CCTV2_ECON30MIN_CMN_20080429_213502 1081.603 1084.759
+CCTV2_ECON30MIN_CMN_20080429_213502 1084.759 1092.087
+CCTV2_ECON30MIN_CMN_20080429_213502 1092.087 1098.899
+CCTV2_ECON30MIN_CMN_20080429_213502 1098.899 1099.524
+CCTV2_ECON30MIN_CMN_20080429_213502 1099.524 1103.743
+CCTV2_ECON30MIN_CMN_20080429_213502 1109.039 1112.492
+CCTV2_ECON30MIN_CMN_20080429_213502 1139.524 1155.108
+CCTV2_ECON30MIN_CMN_20080429_213502 1155.108 1161.890
+CCTV2_ECON30MIN_CMN_20080429_213502 1178.277 1183.918
+CCTV2_ECON30MIN_CMN_20080429_213502 1183.918 1189.671
+CCTV2_ECON30MIN_CMN_20080429_213502 1183.918 1189.671
+CCTV2_ECON30MIN_CMN_20080429_213502 1189.671 1202.827
+CCTV2_ECON30MIN_CMN_20080429_213502 1202.827 1206.671
+CCTV2_ECON30MIN_CMN_20080429_213502 1225.414 1233.134
+CCTV2_ECON30MIN_CMN_20080429_213502 1243.461 1267.804
+CCTV2_ECON30MIN_CMN_20080429_213502 1267.804 1270.663
+CCTV2_ECON30MIN_CMN_20080429_213502 1427.799 1429.627
+CCTV2_ECON30MIN_CMN_20080429_213502 1429.627 1435.033
+CCTV2_ECON30MIN_CMN_20080429_213502 1435.033 1456.200
+CCTV2_ECON30MIN_CMN_20080429_213502 1456.200 1467.698
+CCTV2_ECON30MIN_CMN_20080429_213502 1475.542 1486.964
+CCTV2_ECON30MIN_CMN_20080429_213502 1486.964 1493.449
+CCTV2_ECON30MIN_CMN_20080429_213502 1504.028 1519.482
+CCTV2_ECON30MIN_CMN_20080429_213502 1519.482 1532.405
+CCTV2_ECON30MIN_CMN_20080429_213502 1532.405 1534.343
+CCTV2_ECON30MIN_CMN_20080429_213502 1553.046 1560.554
+CCTV2_ECON30MIN_CMN_20080429_213502 1600.757 1610.648
+CCTV2_ECON30MIN_CMN_20080429_213502 1610.648 1615.944
+CCTV2_ECON30MIN_CMN_20080429_213502 1615.944 1620.664
+CCTV1_30MINNEWS_CMN_20080331_115901 73.617 74.726
+CCTV1_30MINNEWS_CMN_20080331_115901 80.039 81.273
+CCTV1_30MINNEWS_CMN_20080331_115901 81.273 83.273
+CCTV1_30MINNEWS_CMN_20080331_115901 83.273 91.837
+CCTV1_30MINNEWS_CMN_20080331_115901 91.837 104.369
+CCTV1_30MINNEWS_CMN_20080331_115901 104.369 115.744
+CCTV1_30MINNEWS_CMN_20080331_115901 119.213 129.463
+CCTV1_30MINNEWS_CMN_20080331_115901 160.767 172.268
+CCTV1_30MINNEWS_CMN_20080331_115901 172.268 180.705
+CCTV1_30MINNEWS_CMN_20080331_115901 241.745 256.432
+CCTV1_30MINNEWS_CMN_20080331_115901 263.026 281.104
+CCTV1_30MINNEWS_CMN_20080331_115901 317.540 326.435
+CCTV1_30MINNEWS_CMN_20080331_115901 326.435 330.040
+CCTV1_30MINNEWS_CMN_20080331_115901 330.040 342.509
+CCTV1_30MINNEWS_CMN_20080331_115901 358.868 368.258
+CCTV1_30MINNEWS_CMN_20080331_115901 368.258 374.914
+CCTV1_30MINNEWS_CMN_20080331_115901 374.914 379.211
+CCTV1_30MINNEWS_CMN_20080331_115901 379.211 385.023
+CCTV1_30MINNEWS_CMN_20080331_115901 385.023 393.261
+CCTV1_30MINNEWS_CMN_20080331_115901 393.261 399.898
+CCTV1_30MINNEWS_CMN_20080331_115901 399.898 407.117
+CCTV1_30MINNEWS_CMN_20080331_115901 407.117 411.743
+CCTV1_30MINNEWS_CMN_20080331_115901 411.743 418.165
+CCTV1_30MINNEWS_CMN_20080331_115901 431.499 437.077
+CCTV1_30MINNEWS_CMN_20080331_115901 459.808 464.745
+CCTV1_30MINNEWS_CMN_20080331_115901 489.702 498.905
+CCTV1_30MINNEWS_CMN_20080331_115901 505.249 523.030
+CCTV1_30MINNEWS_CMN_20080331_115901 523.030 525.812
+CCTV1_30MINNEWS_CMN_20080331_115901 525.812 529.171
+CCTV1_30MINNEWS_CMN_20080331_115901 529.171 536.467
+CCTV1_30MINNEWS_CMN_20080331_115901 544.186 547.390
+CCTV1_30MINNEWS_CMN_20080331_115901 547.390 552.718
+CCTV1_30MINNEWS_CMN_20080331_115901 559.780 562.702
+CCTV1_30MINNEWS_CMN_20080331_115901 562.702 571.076
+CCTV1_30MINNEWS_CMN_20080331_115901 579.545 581.358
+CCTV1_30MINNEWS_CMN_20080331_115901 595.700 602.029
+CCTV1_30MINNEWS_CMN_20080331_115901 602.029 622.670
+CCTV1_30MINNEWS_CMN_20080331_115901 622.670 626.592
+CCTV1_30MINNEWS_CMN_20080331_115901 626.592 642.310
+CCTV1_30MINNEWS_CMN_20080331_115901 642.310 647.277
+CCTV1_30MINNEWS_CMN_20080331_115901 657.824 672.325
+CCTV1_30MINNEWS_CMN_20080331_115901 672.325 683.684
+CCTV1_30MINNEWS_CMN_20080331_115901 691.637 704.684
+CCTV1_30MINNEWS_CMN_20080331_115901 704.684 716.388
+CCTV1_30MINNEWS_CMN_20080331_115901 727.200 734.200
+CCTV1_30MINNEWS_CMN_20080331_115901 734.200 739.404
+CCTV1_30MINNEWS_CMN_20080331_115901 739.404 753.747
+CCTV1_30MINNEWS_CMN_20080331_115901 753.747 769.246
+CCTV1_30MINNEWS_CMN_20080331_115901 769.246 776.356
+CCTV1_30MINNEWS_CMN_20080331_115901 776.356 782.591
+CCTV1_30MINNEWS_CMN_20080331_115901 782.591 792.498
+CCTV1_30MINNEWS_CMN_20080331_115901 798.516 808.045
+CCTV1_30MINNEWS_CMN_20080331_115901 808.045 817.280
+CCTV1_30MINNEWS_CMN_20080331_115901 821.077 826.984
+CCTV1_30MINNEWS_CMN_20080331_115901 826.984 833.734
+CCTV1_30MINNEWS_CMN_20080331_115901 833.734 851.468
+CCTV1_30MINNEWS_CMN_20080331_115901 851.468 862.796
+CCTV1_30MINNEWS_CMN_20080331_115901 862.796 866.515
+CCTV1_30MINNEWS_CMN_20080331_115901 873.765 881.358
+CCTV1_30MINNEWS_CMN_20080331_115901 881.358 886.076
+CCTV1_30MINNEWS_CMN_20080331_115901 901.453 909.078
+CCTV1_30MINNEWS_CMN_20080331_115901 909.078 914.546
+CCTV1_30MINNEWS_CMN_20080331_115901 914.546 928.921
+CCTV1_30MINNEWS_CMN_20080331_115901 928.921 932.890
+CCTV1_30MINNEWS_CMN_20080331_115901 932.890 944.984
+CCTV1_30MINNEWS_CMN_20080331_115901 944.984 955.482
+CCTV1_30MINNEWS_CMN_20080331_115901 955.482 963.498
+CCTV1_30MINNEWS_CMN_20080331_115901 963.498 967.934
+CCTV1_30MINNEWS_CMN_20080331_115901 981.646 989.476
+CCTV1_30MINNEWS_CMN_20080331_115901 989.476 994.055
+CCTV1_30MINNEWS_CMN_20080331_115901 994.055 999.685
+CCTV1_30MINNEWS_CMN_20080331_115901 999.685 1002.986
+CCTV1_30MINNEWS_CMN_20080331_115901 1002.986 1007.358
+CCTV1_30MINNEWS_CMN_20080331_115901 1022.343 1024.796
+CCTV1_30MINNEWS_CMN_20080331_115901 1024.796 1033.076
+CCTV1_30MINNEWS_CMN_20080331_115901 1033.076 1038.904
+CCTV1_30MINNEWS_CMN_20080331_115901 1170.476 1172.413
+CCTV1_30MINNEWS_CMN_20080331_115901 1172.413 1185.274
+CCTV1_30MINNEWS_CMN_20080331_115901 1185.274 1197.461
+CCTV1_30MINNEWS_CMN_20080331_115901 1197.461 1199.852
+CCTV1_30MINNEWS_CMN_20080331_115901 1199.852 1211.524
+CCTV1_30MINNEWS_CMN_20080331_115901 1211.524 1216.060
+CCTV1_30MINNEWS_CMN_20080331_115901 1216.060 1224.138
+CCTV1_30MINNEWS_CMN_20080331_115901 1224.138 1229.075
+CCTV1_30MINNEWS_CMN_20080331_115901 1238.449 1247.293
+CCTV1_30MINNEWS_CMN_20080331_115901 1247.293 1252.653
+CCTV1_30MINNEWS_CMN_20080331_115901 1252.653 1255.934
+CCTV1_30MINNEWS_CMN_20080331_115901 1255.934 1258.209
+CCTV1_30MINNEWS_CMN_20080331_115901 1258.209 1260.319
+CCTV1_30MINNEWS_CMN_20080331_115901 1260.319 1262.084
+CCTV1_30MINNEWS_CMN_20080331_115901 1262.084 1263.787
+CCTV1_30MINNEWS_CMN_20080331_115901 1263.787 1265.037
+CCTV1_30MINNEWS_CMN_20080331_115901 1265.037 1266.787
+CCTV1_30MINNEWS_CMN_20080331_115901 1266.787 1270.902
+CCTV1_30MINNEWS_CMN_20080331_115901 1270.902 1274.011
+CCTV1_30MINNEWS_CMN_20080331_115901 1274.011 1286.870
+CCTV1_30MINNEWS_CMN_20080331_115901 1286.870 1288.917
+CCTV1_30MINNEWS_CMN_20080331_115901 1288.917 1289.433
+CCTV1_30MINNEWS_CMN_20080331_115901 1289.433 1292.465
+CCTV1_30MINNEWS_CMN_20080331_115901 1292.465 1294.265
+CCTV1_30MINNEWS_CMN_20080331_115901 1294.265 1295.437
+CCTV1_30MINNEWS_CMN_20080331_115901 1295.437 1296.327
+CCTV1_30MINNEWS_CMN_20080331_115901 1296.327 1297.296
+CCTV1_30MINNEWS_CMN_20080331_115901 1297.296 1299.422
+CCTV1_30MINNEWS_CMN_20080331_115901 1299.422 1311.845
+CCTV1_30MINNEWS_CMN_20080331_115901 1311.845 1315.578
+CCTV1_30MINNEWS_CMN_20080331_115901 1315.578 1319.797
+CCTV1_30MINNEWS_CMN_20080331_115901 1319.797 1320.765
+CCTV1_30MINNEWS_CMN_20080331_115901 1319.797 1320.765
+CCTV1_30MINNEWS_CMN_20080331_115901 1320.765 1332.406
+CCTV1_30MINNEWS_CMN_20080331_115901 1320.765 1332.406
+CCTV1_30MINNEWS_CMN_20080331_115901 1332.406 1337.515
+CCTV1_30MINNEWS_CMN_20080331_115901 1337.515 1341.140
+CCTV1_30MINNEWS_CMN_20080331_115901 1343.702 1345.577
+CCTV1_30MINNEWS_CMN_20080331_115901 1345.577 1348.954
+CCTV1_30MINNEWS_CMN_20080331_115901 1351.329 1353.640
+CCTV1_30MINNEWS_CMN_20080331_115901 1353.640 1355.233
+CCTV1_30MINNEWS_CMN_20080331_115901 1355.233 1368.092
+CCTV1_30MINNEWS_CMN_20080331_115901 1368.092 1375.092
+CCTV1_30MINNEWS_CMN_20080331_115901 1375.092 1383.467
+CCTV1_30MINNEWS_CMN_20080331_115901 1393.358 1397.264
+CCTV1_30MINNEWS_CMN_20080331_115901 1397.264 1400.952
+CCTV1_30MINNEWS_CMN_20080331_115901 1400.952 1404.608
+CCTV1_30MINNEWS_CMN_20080331_115901 1404.608 1407.062
+CCTV1_30MINNEWS_CMN_20080331_115901 1407.062 1417.687
+CCTV1_30MINNEWS_CMN_20080331_115901 1417.687 1428.921
+CCTV1_30MINNEWS_CMN_20080331_115901 1428.921 1443.156
+CCTV1_30MINNEWS_CMN_20080331_115901 1443.156 1461.169
+CCTV1_30MINNEWS_CMN_20080331_115901 1461.169 1474.654
+CCTV1_30MINNEWS_CMN_20080331_115901 1489.887 1498.260
+CCTV1_30MINNEWS_CMN_20080331_115901 1498.260 1504.838
+CCTV1_30MINNEWS_CMN_20080331_115901 1504.838 1519.034
+CCTV1_30MINNEWS_CMN_20080331_115901 1519.034 1535.248
+CCTV1_30MINNEWS_CMN_20080331_115901 1547.687 1549.578
+CCTV1_30MINNEWS_CMN_20080331_115901 1549.578 1554.985
+CCTV1_30MINNEWS_CMN_20080331_115901 1561.313 1566.219
+CCTV1_30MINNEWS_CMN_20080331_115901 1566.219 1570.172
+CCTV1_30MINNEWS_CMN_20080331_115901 1570.172 1581.719
+CCTV1_30MINNEWS_CMN_20080331_115901 1581.719 1588.672
+CCTV1_30MINNEWS_CMN_20080331_115901 1588.672 1601.765
+CCTV1_30MINNEWS_CMN_20080331_115901 1601.765 1608.937
+CCTV1_30MINNEWS_CMN_20080331_115901 1608.937 1616.234
+CCTV1_30MINNEWS_CMN_20080331_115901 1616.234 1625.828
+CCTV1_30MINNEWS_CMN_20080331_115901 1846.629 1848.832
+CCTV1_30MINNEWS_CMN_20080331_115901 1850.125 1850.781
+VOA_INTNLNEWS_CMN_20080405_210000 241.323 247.150
+VOA_INTNLNEWS_CMN_20080405_210000 247.150 251.744
+VOA_INTNLNEWS_CMN_20080405_210000 251.744 255.400
+VOA_INTNLNEWS_CMN_20080405_210000 255.400 257.431
+VOA_INTNLNEWS_CMN_20080405_210000 257.431 267.384
+VOA_INTNLNEWS_CMN_20080405_210000 278.525 287.353
+VOA_INTNLNEWS_CMN_20080405_210000 300.697 314.009
+VOA_INTNLNEWS_CMN_20080405_210000 314.009 324.435
+VOA_INTNLNEWS_CMN_20080405_210000 324.435 336.433
+VOA_INTNLNEWS_CMN_20080405_210000 345.128 356.003
+VOA_INTNLNEWS_CMN_20080405_210000 356.003 364.050
+VOA_INTNLNEWS_CMN_20080405_210000 388.740 394.638
+VOA_INTNLNEWS_CMN_20080405_210000 394.638 403.500
+VOA_INTNLNEWS_CMN_20080405_210000 410.141 417.594
+VOA_INTNLNEWS_CMN_20080405_210000 417.594 433.219
+VOA_INTNLNEWS_CMN_20080405_210000 433.219 444.904
+VOA_INTNLNEWS_CMN_20080405_210000 451.888 456.731
+VOA_INTNLNEWS_CMN_20080405_210000 456.731 465.545
+VOA_INTNLNEWS_CMN_20080405_210000 474.531 480.203
+VOA_INTNLNEWS_CMN_20080405_210000 480.203 492.173
+VOA_INTNLNEWS_CMN_20080405_210000 492.173 498.877
+VOA_INTNLNEWS_CMN_20080405_210000 508.627 515.190
+VOA_INTNLNEWS_CMN_20080405_210000 515.190 526.337
+VOA_INTNLNEWS_CMN_20080405_210000 533.728 537.589
+VOA_INTNLNEWS_CMN_20080405_210000 537.589 541.542
+VOA_INTNLNEWS_CMN_20080405_210000 541.542 546.667
+VOA_INTNLNEWS_CMN_20080405_210000 546.667 554.995
+VOA_INTNLNEWS_CMN_20080405_210000 554.995 561.245
+VOA_INTNLNEWS_CMN_20080405_210000 561.245 574.462
+VOA_INTNLNEWS_CMN_20080405_210000 574.462 580.541
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 194.217 196.091
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 224.759 232.647
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 232.647 236.478
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 236.478 246.296
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 246.296 257.193
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 257.193 263.645
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 306.465 318.667
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 334.963 345.449
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 345.449 357.893
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 357.893 366.346
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 374.234 381.156
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 381.156 394.735
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 394.735 403.516
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 403.516 407.391
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 407.391 414.399
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 429.306 434.618
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 434.618 442.602
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 442.602 452.473
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 452.473 456.395
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 456.395 467.000
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 476.077 481.061
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 494.779 501.368
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 501.368 511.505
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 511.505 513.373
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 513.373 515.929
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 515.929 517.534
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 515.929 517.534
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 520.732 537.795
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 520.732 537.795
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 556.955 563.067
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 572.161 591.108
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 591.108 601.233
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 605.483 614.831
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 614.831 617.626
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 617.626 618.724
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 618.724 634.905
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 634.905 641.994
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 641.994 648.898
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 648.898 658.165
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 658.165 663.817
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 663.817 672.755
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 692.757 708.900
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 708.900 716.876
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 716.876 722.007
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 716.876 722.007
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 722.007 736.251
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 722.007 736.251
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 753.188 759.204
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 759.204 769.884
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 769.884 779.588
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 779.588 790.806
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 790.806 808.312
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 858.909 877.177
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 877.177 880.099
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 880.099 886.446
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 886.446 894.568
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 902.255 913.428
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 918.992 933.258
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 938.398 941.008
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 941.008 948.540
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 948.540 952.509
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 984.493 994.446
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 994.446 1001.087
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1001.087 1007.149
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1013.260 1020.865
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1020.865 1026.865
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1026.865 1032.937
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1039.890 1050.215
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1059.761 1092.511
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1092.511 1102.448
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1102.448 1106.449
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1117.543 1127.884
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1142.950 1146.668
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1146.668 1161.809
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1161.809 1173.591
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1173.591 1178.669
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1178.669 1194.075
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1209.637 1211.934
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1211.934 1222.203
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1222.203 1234.531
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1242.343 1246.827
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1246.827 1255.561
+CCTV2_ECON30MIN_CMN_20080413_213502 208.213 211.369
+CCTV2_ECON30MIN_CMN_20080413_213502 211.369 218.374
+CCTV2_ECON30MIN_CMN_20080413_213502 233.129 242.004
+CCTV2_ECON30MIN_CMN_20080413_213502 242.004 246.754
+CCTV2_ECON30MIN_CMN_20080413_213502 246.754 264.056
+CCTV2_ECON30MIN_CMN_20080413_213502 273.068 288.756
+CCTV2_ECON30MIN_CMN_20080413_213502 298.741 312.650
+CCTV2_ECON30MIN_CMN_20080413_213502 312.650 326.290
+CCTV2_ECON30MIN_CMN_20080413_213502 326.290 336.873
+CCTV2_ECON30MIN_CMN_20080413_213502 346.589 355.948
+CCTV2_ECON30MIN_CMN_20080413_213502 362.400 371.540
+CCTV2_ECON30MIN_CMN_20080413_213502 371.540 386.955
+CCTV2_ECON30MIN_CMN_20080413_213502 405.161 421.808
+CCTV2_ECON30MIN_CMN_20080413_213502 421.808 430.292
+CCTV2_ECON30MIN_CMN_20080413_213502 660.711 672.368
+CCTV2_ECON30MIN_CMN_20080413_213502 672.368 682.375
+CCTV2_ECON30MIN_CMN_20080413_213502 682.375 688.956
+CCTV2_ECON30MIN_CMN_20080413_213502 688.956 698.549
+CCTV2_ECON30MIN_CMN_20080413_213502 720.636 732.885
+CCTV2_ECON30MIN_CMN_20080413_213502 732.885 743.069
+CCTV2_ECON30MIN_CMN_20080413_213502 743.069 756.537
+CCTV2_ECON30MIN_CMN_20080413_213502 756.537 768.318
+CCTV2_ECON30MIN_CMN_20080413_213502 768.318 775.506
+CCTV2_ECON30MIN_CMN_20080413_213502 795.163 806.256
+CCTV2_ECON30MIN_CMN_20080413_213502 806.256 821.639
+CCTV2_ECON30MIN_CMN_20080413_213502 821.639 831.932
+CCTV2_ECON30MIN_CMN_20080413_213502 831.932 837.499
+CCTV2_ECON30MIN_CMN_20080413_213502 854.461 868.133
+CCTV2_ECON30MIN_CMN_20080413_213502 906.759 914.539
+CCTV2_ECON30MIN_CMN_20080413_213502 941.805 949.137
+CCTV2_ECON30MIN_CMN_20080413_213502 959.950 967.309
+CCTV2_ECON30MIN_CMN_20080413_213502 967.309 973.907
+CCTV2_ECON30MIN_CMN_20080413_213502 973.907 981.032
+CCTV2_ECON30MIN_CMN_20080413_213502 981.032 991.667
+CCTV2_ECON30MIN_CMN_20080413_213502 1002.235 1019.177
+CCTV2_ECON30MIN_CMN_20080413_213502 1019.177 1024.302
+CCTV2_ECON30MIN_CMN_20080413_213502 1047.938 1061.093
+CCTV2_ECON30MIN_CMN_20080413_213502 1061.093 1073.890
+CCTV2_ECON30MIN_CMN_20080413_213502 1073.890 1094.129
+CCTV2_ECON30MIN_CMN_20080413_213502 1094.129 1108.023
+CCTV2_ECON30MIN_CMN_20080413_213502 1264.879 1276.133
+CCTV2_ECON30MIN_CMN_20080413_213502 1276.133 1287.357
+CCTV2_ECON30MIN_CMN_20080413_213502 1287.357 1301.355
+CCTV2_ECON30MIN_CMN_20080413_213502 1329.068 1334.870
+CCTV2_ECON30MIN_CMN_20080413_213502 1361.145 1373.848
+CCTV2_ECON30MIN_CMN_20080413_213502 1390.971 1397.675
+CCTV2_ECON30MIN_CMN_20080413_213502 1397.675 1415.110
+CCTV2_ECON30MIN_CMN_20080413_213502 1415.110 1424.236
+CCTV2_ECON30MIN_CMN_20080413_213502 1424.236 1438.064
+CCTV2_ECON30MIN_CMN_20080413_213502 1456.827 1464.000
+CCTV2_ECON30MIN_CMN_20080413_213502 1464.000 1486.858
+CCTV2_ECON30MIN_CMN_20080413_213502 1486.858 1499.305
+CCTV2_ECON30MIN_CMN_20080413_213502 1499.305 1515.107
+CCTV2_ECON30MIN_CMN_20080413_213502 1515.107 1520.123
+CCTV2_ECON30MIN_CMN_20080413_213502 1520.123 1530.536
+CCTV2_ECON30MIN_CMN_20080413_213502 1530.536 1540.050
+CCTV2_ECON30MIN_CMN_20080413_213502 1540.050 1545.190
+CCTV2_ECON30MIN_CMN_20080413_213502 1566.258 1575.446
+CCTV2_ECON30MIN_CMN_20080413_213502 1575.446 1581.649
+CCTV2_ECON30MIN_CMN_20080413_213502 1581.649 1587.055
+CCTV2_ECON30MIN_CMN_20080413_213502 1587.055 1596.101
+CCTV2_ECON30MIN_CMN_20080413_213502 1596.101 1609.832
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 41.494 55.430
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 63.633 70.334
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 70.334 82.217
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 85.069 86.918
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 86.918 104.433
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 104.433 122.361
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 122.361 139.830
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 139.830 161.504
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 182.247 190.984
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 214.840 233.859
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 233.859 252.835
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 252.835 271.994
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 271.994 280.401
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 280.401 294.922
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 294.922 309.979
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 309.979 321.865
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 321.865 333.689
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 360.506 369.039
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 386.794 410.411
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 410.411 423.219
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 423.219 431.124
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 431.124 444.651
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 444.651 454.196
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 454.196 476.136
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 476.136 484.636
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 484.636 498.139
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 498.139 512.507
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 512.507 520.292
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 520.292 541.753
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 541.753 554.984
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 577.799 583.837
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 583.837 592.310
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 631.139 636.376
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 636.376 648.503
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 648.503 665.774
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 683.499 693.733
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 710.185 723.930
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 741.368 754.566
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 754.566 770.758
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 770.758 788.656
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 832.151 840.624
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 867.918 877.762
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 877.762 894.080
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 894.080 909.343
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 909.343 918.295
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 918.295 927.038
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 927.038 938.559
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 938.559 949.571
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 997.048 1006.323
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1006.323 1016.137
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1016.137 1023.473
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1048.644 1059.284
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1059.284 1062.182
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1062.182 1079.547
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1079.547 1093.676
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1093.676 1112.846
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1112.846 1128.445
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1128.445 1140.062
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1140.062 1142.520
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1224.588 1240.750
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1240.750 1254.433
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1254.433 1261.433
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1307.295 1314.985
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1336.393 1351.139
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1398.387 1404.010
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1398.387 1404.010
+CCTV1_30MINNEWS_CMN_20080412_115901 80.926 83.940
+CCTV1_30MINNEWS_CMN_20080412_115901 83.940 86.956
+CCTV1_30MINNEWS_CMN_20080412_115901 86.956 95.722
+CCTV1_30MINNEWS_CMN_20080412_115901 95.722 102.331
+CCTV1_30MINNEWS_CMN_20080412_115901 102.331 104.050
+CCTV1_30MINNEWS_CMN_20080412_115901 104.050 104.738
+CCTV1_30MINNEWS_CMN_20080412_115901 108.866 109.679
+CCTV1_30MINNEWS_CMN_20080412_115901 109.679 112.710
+CCTV1_30MINNEWS_CMN_20080412_115901 112.710 119.007
+CCTV1_30MINNEWS_CMN_20080412_115901 126.272 129.179
+CCTV1_30MINNEWS_CMN_20080412_115901 129.179 133.695
+CCTV1_30MINNEWS_CMN_20080412_115901 133.695 145.680
+CCTV1_30MINNEWS_CMN_20080412_115901 163.445 174.538
+CCTV1_30MINNEWS_CMN_20080412_115901 174.538 184.553
+CCTV1_30MINNEWS_CMN_20080412_115901 184.553 197.568
+CCTV1_30MINNEWS_CMN_20080412_115901 197.568 205.833
+CCTV1_30MINNEWS_CMN_20080412_115901 205.833 213.754
+CCTV1_30MINNEWS_CMN_20080412_115901 213.754 218.723
+CCTV1_30MINNEWS_CMN_20080412_115901 218.723 225.676
+CCTV1_30MINNEWS_CMN_20080412_115901 225.676 233.880
+CCTV1_30MINNEWS_CMN_20080412_115901 233.880 253.645
+CCTV1_30MINNEWS_CMN_20080412_115901 253.645 263.879
+CCTV1_30MINNEWS_CMN_20080412_115901 263.879 271.129
+CCTV1_30MINNEWS_CMN_20080412_115901 271.129 282.957
+CCTV1_30MINNEWS_CMN_20080412_115901 319.911 330.457
+CCTV1_30MINNEWS_CMN_20080412_115901 330.457 338.597
+CCTV1_30MINNEWS_CMN_20080412_115901 345.644 349.644
+CCTV1_30MINNEWS_CMN_20080412_115901 349.644 360.925
+CCTV1_30MINNEWS_CMN_20080412_115901 360.925 367.784
+CCTV1_30MINNEWS_CMN_20080412_115901 367.784 371.175
+CCTV1_30MINNEWS_CMN_20080412_115901 371.175 378.488
+CCTV1_30MINNEWS_CMN_20080412_115901 378.488 386.160
+CCTV1_30MINNEWS_CMN_20080412_115901 386.160 397.328
+CCTV1_30MINNEWS_CMN_20080412_115901 397.328 401.468
+CCTV1_30MINNEWS_CMN_20080412_115901 401.468 408.671
+CCTV1_30MINNEWS_CMN_20080412_115901 408.671 424.587
+CCTV1_30MINNEWS_CMN_20080412_115901 431.477 441.383
+CCTV1_30MINNEWS_CMN_20080412_115901 441.383 446.402
+CCTV1_30MINNEWS_CMN_20080412_115901 446.402 458.857
+CCTV1_30MINNEWS_CMN_20080412_115901 458.857 472.053
+CCTV1_30MINNEWS_CMN_20080412_115901 481.927 496.896
+CCTV1_30MINNEWS_CMN_20080412_115901 506.767 513.564
+CCTV1_30MINNEWS_CMN_20080412_115901 513.564 523.861
+CCTV1_30MINNEWS_CMN_20080412_115901 571.548 580.423
+CCTV1_30MINNEWS_CMN_20080412_115901 580.423 605.876
+CCTV1_30MINNEWS_CMN_20080412_115901 615.611 627.142
+CCTV1_30MINNEWS_CMN_20080412_115901 627.142 635.502
+CCTV1_30MINNEWS_CMN_20080412_115901 649.205 658.799
+CCTV1_30MINNEWS_CMN_20080412_115901 658.799 676.893
+CCTV1_30MINNEWS_CMN_20080412_115901 685.096 702.752
+CCTV1_30MINNEWS_CMN_20080412_115901 702.752 715.221
+CCTV1_30MINNEWS_CMN_20080412_115901 715.221 727.174
+CCTV1_30MINNEWS_CMN_20080412_115901 745.080 772.361
+CCTV1_30MINNEWS_CMN_20080412_115901 791.486 795.361
+CCTV1_30MINNEWS_CMN_20080412_115901 833.033 851.267
+CCTV1_30MINNEWS_CMN_20080412_115901 851.267 875.314
+CCTV1_30MINNEWS_CMN_20080412_115901 875.314 886.939
+CCTV1_30MINNEWS_CMN_20080412_115901 912.139 917.780
+CCTV1_30MINNEWS_CMN_20080412_115901 917.780 929.780
+CCTV1_30MINNEWS_CMN_20080412_115901 933.608 949.881
+CCTV1_30MINNEWS_CMN_20080412_115901 958.408 965.627
+CCTV1_30MINNEWS_CMN_20080412_115901 965.627 976.409
+CCTV1_30MINNEWS_CMN_20080412_115901 976.409 984.925
+CCTV1_30MINNEWS_CMN_20080412_115901 984.925 995.519
+CCTV1_30MINNEWS_CMN_20080412_115901 1000.409 1005.034
+CCTV1_30MINNEWS_CMN_20080412_115901 1005.034 1018.659
+CCTV1_30MINNEWS_CMN_20080412_115901 1018.659 1030.800
+CCTV1_30MINNEWS_CMN_20080412_115901 1045.160 1051.316
+CCTV1_30MINNEWS_CMN_20080412_115901 1051.316 1059.441
+CCTV1_30MINNEWS_CMN_20080412_115901 1089.312 1101.710
+CCTV1_30MINNEWS_CMN_20080412_115901 1101.710 1112.179
+CCTV1_30MINNEWS_CMN_20080412_115901 1112.179 1118.024
+CCTV1_30MINNEWS_CMN_20080412_115901 1124.539 1147.571
+CCTV1_30MINNEWS_CMN_20080412_115901 1147.571 1158.104
+CCTV1_30MINNEWS_CMN_20080412_115901 1158.104 1171.181
+CCTV1_30MINNEWS_CMN_20080412_115901 1171.181 1180.818
+CCTV1_30MINNEWS_CMN_20080412_115901 1193.083 1197.263
+CCTV1_30MINNEWS_CMN_20080412_115901 1197.263 1204.529
+CCTV1_30MINNEWS_CMN_20080412_115901 1204.529 1218.545
+CCTV1_30MINNEWS_CMN_20080412_115901 1218.545 1231.561
+CCTV1_30MINNEWS_CMN_20080412_115901 1252.429 1260.523
+CCTV1_30MINNEWS_CMN_20080412_115901 1272.023 1275.383
+CCTV1_30MINNEWS_CMN_20080412_115901 1298.226 1305.149
+CCTV1_30MINNEWS_CMN_20080412_115901 1305.149 1316.836
+CCTV1_30MINNEWS_CMN_20080412_115901 1323.367 1332.101
+CCTV1_30MINNEWS_CMN_20080412_115901 1332.101 1342.210
+CCTV1_30MINNEWS_CMN_20080412_115901 1342.210 1349.007
+CCTV1_30MINNEWS_CMN_20080412_115901 1366.695 1382.445
+CCTV1_30MINNEWS_CMN_20080412_115901 1540.370 1546.854
+CCTV1_30MINNEWS_CMN_20080412_115901 1546.854 1550.791
+CCTV1_30MINNEWS_CMN_20080412_115901 1550.791 1561.557
+CCTV1_30MINNEWS_CMN_20080412_115901 1568.588 1571.698
+CCTV1_30MINNEWS_CMN_20080412_115901 1571.698 1580.170
+CCTV1_30MINNEWS_CMN_20080412_115901 1580.170 1588.310
+CCTV1_30MINNEWS_CMN_20080412_115901 1588.310 1600.342
+CCTV1_30MINNEWS_CMN_20080412_115901 1600.342 1610.046
+CCTV1_30MINNEWS_CMN_20080412_115901 1610.046 1613.703
+CCTV1_30MINNEWS_CMN_20080412_115901 1619.016 1623.672
+CCTV1_30MINNEWS_CMN_20080412_115901 1623.672 1629.328
+CCTV1_30MINNEWS_CMN_20080412_115901 1629.328 1637.673
+CCTV1_30MINNEWS_CMN_20080412_115901 1637.673 1648.391
+CCTV1_30MINNEWS_CMN_20080412_115901 1854.095 1856.252
+CCTV1_30MINNEWS_CMN_20080412_115901 1856.252 1857.267
+CCTV1_30MINNEWS_CMN_20080412_115901 1857.267 1857.783
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 0.000 6.209
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 6.209 14.137
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 14.137 25.409
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 56.230 67.279
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 67.279 75.786
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 75.786 91.538
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 91.538 97.838
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 107.527 112.089
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 112.089 117.851
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 177.413 187.973
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 187.973 197.117
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 197.117 206.431
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 216.552 222.820
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 232.880 244.537
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 244.537 257.031
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 257.031 271.126
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 271.126 277.080
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 277.080 284.303
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 284.303 292.725
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 292.725 299.345
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 299.345 307.728
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 307.728 316.964
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 330.867 339.701
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 339.701 348.106
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 383.836 394.883
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 394.883 403.519
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 403.519 411.191
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 417.562 422.476
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 422.476 428.471
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 428.471 434.140
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 439.906 446.165
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 460.084 471.696
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 471.696 483.546
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 489.277 492.839
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 492.839 496.932
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 496.932 499.181
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 506.400 510.542
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 510.542 522.075
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 532.889 543.500
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 553.763 561.616
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 561.616 565.194
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 565.194 576.406
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 576.406 582.604
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 582.604 585.370
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 585.370 590.525
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 590.525 594.384
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 594.384 600.650
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 600.650 605.541
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 605.541 612.902
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 612.902 627.800
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 648.252 660.430
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 660.430 666.944
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 666.944 680.285
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 680.285 685.957
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 685.957 690.644
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 701.901 707.461
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 707.461 716.447
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 716.447 730.499
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 730.499 738.106
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 738.106 752.569
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 754.147 764.707
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 764.707 783.005
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 783.005 790.243
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 795.550 799.011
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 799.011 806.711
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 806.711 815.031
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 815.031 818.687
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 818.687 826.091
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 826.091 836.154
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 847.478 854.207
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 865.210 870.939
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 870.939 881.520
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 881.520 894.535
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 929.264 935.125
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 935.125 942.689
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 942.689 948.783
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 948.783 954.407
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 954.407 962.304
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 962.304 971.676
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 971.676 986.820
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 986.820 994.328
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 994.328 1001.110
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1001.110 1007.420
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1007.420 1015.874
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1015.874 1022.275
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1022.275 1028.525
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1028.525 1032.088
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1032.088 1037.024
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1037.024 1047.630
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1052.618 1059.477
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1059.477 1063.976
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1063.976 1070.377
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1077.798 1086.407
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1100.922 1114.396
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1114.396 1125.215
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1125.215 1129.683
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1129.683 1131.987
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1136.777 1140.322
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1140.322 1152.498
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1162.401 1170.247
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1170.247 1189.349
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1189.349 1196.433
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1196.433 1205.715
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1205.715 1212.479
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1212.479 1223.854
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1223.854 1230.425
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1230.425 1237.377
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1237.377 1244.424
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1244.424 1254.561
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1254.561 1265.790
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1265.790 1270.665
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1277.494 1280.134
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1289.368 1292.259
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1292.259 1298.733
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1298.733 1302.608
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1302.608 1316.293
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1316.293 1326.167
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1333.263 1338.404
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1338.404 1346.823
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1352.621 1356.664
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1356.664 1363.276
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1363.276 1372.490
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1372.490 1379.177
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1379.177 1383.066
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1383.066 1387.386
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1387.386 1397.544
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1404.185 1414.703
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1414.703 1422.074
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1422.074 1431.980
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1431.980 1441.341
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1441.341 1447.137
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1447.137 1459.038
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1459.038 1475.083
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1475.083 1483.076
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1493.375 1497.437
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1514.216 1524.013
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1524.013 1527.857
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1527.857 1539.279
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1565.558 1577.942
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1622.428 1632.069
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1632.069 1638.460
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1638.460 1650.273
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1650.273 1655.414
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1667.326 1674.123
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1674.123 1679.279
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1679.279 1688.012
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1688.012 1698.197
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1719.824 1726.402
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1726.402 1737.182
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1737.182 1748.083
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1748.083 1756.557
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1756.557 1766.777
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1775.551 1788.988
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1860.292 1864.465
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 229.180 237.415
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 243.947 254.619
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 264.244 276.026
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 288.369 294.651
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 308.307 313.432
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 313.432 320.619
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 320.619 329.103
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 332.337 344.759
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 344.759 354.103
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 364.118 375.884
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 375.884 385.353
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 406.227 413.322
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 413.322 424.729
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 424.729 433.920
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 452.153 460.502
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 460.502 466.299
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 466.299 469.565
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 469.565 474.753
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 474.753 488.612
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 488.612 498.597
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 498.597 504.144
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 504.144 508.097
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 525.810 533.163
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 533.163 537.476
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 537.476 546.600
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 546.600 553.570
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 560.402 568.074
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 580.003 582.206
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 590.860 595.842
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 595.842 609.405
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 609.405 620.888
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 620.888 632.544
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 632.544 642.998
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 642.998 653.451
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 653.451 667.795
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 687.561 695.092
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 695.092 699.733
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 699.733 715.342
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 730.780 744.987
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 744.987 755.440
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 796.736 800.174
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 800.174 817.909
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 817.909 824.753
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 824.753 836.908
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 836.908 846.471
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 846.471 861.268
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 861.268 874.314
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 884.834 888.397
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 888.397 895.982
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 895.982 899.138
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 899.138 903.748
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 903.748 913.935
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 913.935 919.154
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 919.154 922.607
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 922.607 929.652
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 929.652 934.232
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 934.232 943.607
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 943.607 950.935
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 950.935 957.185
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 957.185 973.558
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 973.558 977.152
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 977.152 980.980
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 985.996 989.731
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 989.731 998.090
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 998.090 1007.763
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1007.763 1009.638
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1009.638 1021.888
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1021.888 1033.873
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1033.873 1045.576
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1045.576 1050.295
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1086.728 1096.728
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1096.728 1103.462
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1103.462 1109.239
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1109.239 1118.910
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1118.910 1129.004
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1129.004 1142.035
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1142.035 1155.051
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1168.342 1171.998
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1171.998 1182.890
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1182.890 1191.624
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1191.624 1195.624
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1195.624 1199.534
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1199.534 1209.394
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1212.972 1218.441
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1218.441 1235.176
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1235.176 1241.364
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1248.768 1255.424
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1255.424 1266.361
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1266.361 1276.876
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1276.876 1292.220
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1292.220 1307.377
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1322.478 1328.166
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1328.166 1337.447
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1337.447 1345.728
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1345.728 1351.729
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1351.729 1358.275
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1379.307 1385.697
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1385.697 1390.916
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1390.916 1398.745
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1398.745 1412.479
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1412.479 1425.304
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1425.304 1440.275
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1448.205 1452.111
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1452.111 1457.689
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1457.689 1461.111
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1461.111 1476.548
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1476.548 1485.204
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1491.967 1499.498
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1499.498 1506.389
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1506.389 1520.295
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1520.295 1527.420
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1547.699 1551.184
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1551.184 1561.871
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1561.871 1568.934
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1568.934 1572.355
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1575.449 1579.027
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1579.027 1587.698
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1587.698 1604.995
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1604.995 1613.573
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1613.573 1621.761
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1621.761 1632.432
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1632.432 1641.151
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1641.151 1646.135
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1646.135 1658.603
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1673.385 1681.588
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1681.588 1686.761
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1686.761 1703.323
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1753.838 1762.369
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1786.430 1800.117
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1809.304 1821.382
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1830.975 1835.415
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1844.977 1853.587
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1853.587 1863.275
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1863.275 1871.963
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1883.900 1904.103
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1904.103 1914.353
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1914.353 1922.291
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1922.291 1935.119
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1935.119 1948.479
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1948.479 1950.854
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1967.191 1969.659
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1969.659 1972.737
+CCTV2_ECON30MIN_CMN_20080423_213501 257.111 259.656
+CCTV2_ECON30MIN_CMN_20080423_213501 270.987 293.998
+CCTV2_ECON30MIN_CMN_20080423_213501 293.998 295.295
+CCTV2_ECON30MIN_CMN_20080423_213501 295.295 314.875
+CCTV2_ECON30MIN_CMN_20080423_213501 314.875 329.497
+CCTV2_ECON30MIN_CMN_20080423_213501 329.497 338.712
+CCTV2_ECON30MIN_CMN_20080423_213501 356.732 361.929
+CCTV2_ECON30MIN_CMN_20080423_213501 426.477 440.252
+CCTV2_ECON30MIN_CMN_20080423_213501 440.252 456.741
+CCTV2_ECON30MIN_CMN_20080423_213501 456.741 469.049
+CCTV2_ECON30MIN_CMN_20080423_213501 469.049 475.314
+CCTV2_ECON30MIN_CMN_20080423_213501 508.324 525.129
+CCTV2_ECON30MIN_CMN_20080423_213501 525.129 537.675
+CCTV2_ECON30MIN_CMN_20080423_213501 553.289 558.181
+CCTV2_ECON30MIN_CMN_20080423_213501 572.881 587.284
+CCTV2_ECON30MIN_CMN_20080423_213501 817.822 832.679
+CCTV2_ECON30MIN_CMN_20080423_213501 832.679 841.440
+CCTV2_ECON30MIN_CMN_20080423_213501 841.440 847.785
+CCTV2_ECON30MIN_CMN_20080423_213501 890.420 901.191
+CCTV2_ECON30MIN_CMN_20080423_213501 901.191 912.491
+CCTV2_ECON30MIN_CMN_20080423_213501 912.491 918.909
+CCTV2_ECON30MIN_CMN_20080423_213501 918.909 930.183
+CCTV2_ECON30MIN_CMN_20080423_213501 930.183 937.214
+CCTV2_ECON30MIN_CMN_20080423_213501 937.214 953.527
+CCTV2_ECON30MIN_CMN_20080423_213501 968.529 986.358
+CCTV2_ECON30MIN_CMN_20080423_213501 986.358 994.568
+CCTV2_ECON30MIN_CMN_20080423_213501 1028.755 1038.023
+CCTV2_ECON30MIN_CMN_20080423_213501 1045.818 1057.145
+CCTV2_ECON30MIN_CMN_20080423_213501 1057.145 1078.709
+CCTV2_ECON30MIN_CMN_20080423_213501 1078.709 1097.516
+CCTV2_ECON30MIN_CMN_20080423_213501 1114.016 1121.094
+CCTV2_ECON30MIN_CMN_20080423_213501 1324.918 1340.824
+CCTV2_ECON30MIN_CMN_20080423_213501 1340.824 1353.066
+CCTV2_ECON30MIN_CMN_20080423_213501 1353.066 1359.097
+CCTV2_ECON30MIN_CMN_20080423_213501 1391.998 1399.365
+CCTV2_ECON30MIN_CMN_20080423_213501 1399.365 1406.887
+CCTV2_ECON30MIN_CMN_20080423_213501 1406.887 1410.143
+CCTV2_ECON30MIN_CMN_20080423_213501 1410.143 1418.096
+CCTV2_ECON30MIN_CMN_20080423_213501 1418.096 1428.893
+CCTV2_ECON30MIN_CMN_20080423_213501 1428.893 1433.531
+CCTV2_ECON30MIN_CMN_20080423_213501 1456.409 1462.994
+CCTV2_ECON30MIN_CMN_20080423_213501 1462.994 1481.916
+CCTV2_ECON30MIN_CMN_20080423_213501 1481.916 1488.323
+CCTV2_ECON30MIN_CMN_20080423_213501 1505.084 1515.523
+CCTV2_ECON30MIN_CMN_20080423_213501 1515.523 1530.069
+CCTV2_ECON30MIN_CMN_20080423_213501 1530.069 1535.456
+CCTV2_ECON30MIN_CMN_20080423_213501 1554.411 1568.680
+CCTV2_ECON30MIN_CMN_20080423_213501 1568.680 1583.993
+CCTV2_ECON30MIN_CMN_20080423_213501 1583.993 1593.065
+CCTV2_ECON30MIN_CMN_20080423_213501 1593.065 1600.186
+CCTV2_ECON30MIN_CMN_20080423_213501 1600.186 1609.109
+CCTV2_ECON30MIN_CMN_20080423_213501 1609.109 1620.586
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 228.670 235.140
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 235.140 241.281
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 241.281 256.851
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 256.851 268.256
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 268.256 271.662
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 271.662 280.334
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 280.334 287.334
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 287.334 302.708
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 342.838 352.322
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 371.353 379.040
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 393.133 404.040
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 404.040 411.118
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 411.118 419.876
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 419.876 427.518
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 443.440 453.533
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 453.533 465.909
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 477.347 493.488
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 493.488 505.346
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 505.346 507.502
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 524.803 532.773
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 532.773 536.740
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 536.740 544.603
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 544.603 552.325
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 574.973 583.386
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 583.386 585.324
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 595.953 601.206
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 610.646 616.245
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 616.245 620.483
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 620.483 625.968
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 632.408 641.799
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 641.799 656.066
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 656.066 665.494
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 665.494 674.462
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 674.462 686.311
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 686.311 702.718
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 702.718 712.045
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 712.045 721.967
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 721.967 732.983
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 732.983 740.842
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 740.842 748.546
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 748.546 758.374
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 768.404 774.435
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 774.435 793.214
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 793.214 802.782
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 802.782 809.888
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 809.888 823.026
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 823.026 839.997
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 844.840 862.114
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 862.114 869.449
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 875.199 877.730
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 902.958 906.661
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 906.661 926.504
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 926.504 932.162
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 956.694 966.225
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 985.527 993.902
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 993.902 997.292
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1001.995 1009.291
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1018.187 1037.327
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1037.327 1045.826
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1056.277 1065.386
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1065.386 1068.602
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1082.352 1086.228
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1086.228 1096.932
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1096.932 1116.244
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1116.244 1121.573
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1121.573 1130.073
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1130.073 1136.079
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1149.807 1153.057
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1153.057 1168.401
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1171.636 1182.964
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1182.964 1192.762
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1206.058 1217.215
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1217.215 1227.840
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1246.206 1260.893
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1260.893 1270.159
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1270.159 1280.331
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1290.518 1301.143
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1319.558 1322.651
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1322.651 1328.870
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1328.870 1336.168
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1336.168 1345.808
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1349.683 1356.731
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1356.731 1374.168
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1374.168 1382.510
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1389.009 1392.665
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1392.665 1402.228
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1409.462 1412.602
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1423.993 1429.801
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1459.942 1475.678
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1475.678 1488.006
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1513.202 1526.216
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1526.216 1539.716
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1568.467 1585.357
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1597.434 1612.745
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1612.745 1621.478
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1632.400 1639.025
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1655.683 1658.557
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1667.837 1675.163
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1691.413 1695.117
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1704.339 1707.356
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1707.356 1716.528
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1716.528 1729.587
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1732.477 1744.962
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1751.024 1757.102
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1768.321 1774.070
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1774.070 1789.398
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1789.398 1794.399
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1806.649 1817.460
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1831.199 1841.760
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1871.079 1883.047
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1897.064 1914.168
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1914.168 1925.184
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1942.262 1945.615
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1951.428 1959.475
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1959.475 1970.481
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1970.481 1975.716
+CCTV2_ECON30MIN_CMN_20080410_213502 225.090 227.481
+CCTV2_ECON30MIN_CMN_20080410_213502 227.481 235.622
+CCTV2_ECON30MIN_CMN_20080410_213502 240.606 249.762
+CCTV2_ECON30MIN_CMN_20080410_213502 257.513 265.842
+CCTV2_ECON30MIN_CMN_20080410_213502 265.842 272.701
+CCTV2_ECON30MIN_CMN_20080410_213502 272.701 277.638
+CCTV2_ECON30MIN_CMN_20080410_213502 277.638 285.185
+CCTV2_ECON30MIN_CMN_20080410_213502 285.185 290.951
+CCTV2_ECON30MIN_CMN_20080410_213502 311.701 317.263
+CCTV2_ECON30MIN_CMN_20080410_213502 317.263 327.200
+CCTV2_ECON30MIN_CMN_20080410_213502 327.200 335.013
+CCTV2_ECON30MIN_CMN_20080410_213502 335.013 340.045
+CCTV2_ECON30MIN_CMN_20080410_213502 340.045 347.374
+CCTV2_ECON30MIN_CMN_20080410_213502 347.374 358.265
+CCTV2_ECON30MIN_CMN_20080410_213502 365.786 369.505
+CCTV2_ECON30MIN_CMN_20080410_213502 369.505 374.177
+CCTV2_ECON30MIN_CMN_20080410_213502 388.365 393.114
+CCTV2_ECON30MIN_CMN_20080410_213502 393.114 401.630
+CCTV2_ECON30MIN_CMN_20080410_213502 401.630 406.193
+CCTV2_ECON30MIN_CMN_20080410_213502 406.193 414.770
+CCTV2_ECON30MIN_CMN_20080410_213502 414.770 426.552
+CCTV2_ECON30MIN_CMN_20080410_213502 426.552 433.099
+CCTV2_ECON30MIN_CMN_20080410_213502 433.099 443.490
+CCTV2_ECON30MIN_CMN_20080410_213502 462.037 477.474
+CCTV2_ECON30MIN_CMN_20080410_213502 492.693 513.350
+CCTV2_ECON30MIN_CMN_20080410_213502 696.780 698.296
+CCTV2_ECON30MIN_CMN_20080410_213502 698.296 710.390
+CCTV2_ECON30MIN_CMN_20080410_213502 710.390 719.952
+CCTV2_ECON30MIN_CMN_20080410_213502 719.952 723.077
+CCTV2_ECON30MIN_CMN_20080410_213502 734.647 742.085
+CCTV2_ECON30MIN_CMN_20080410_213502 752.757 765.305
+CCTV2_ECON30MIN_CMN_20080410_213502 765.305 774.820
+CCTV2_ECON30MIN_CMN_20080410_213502 774.820 788.883
+CCTV2_ECON30MIN_CMN_20080410_213502 803.226 809.507
+CCTV2_ECON30MIN_CMN_20080410_213502 809.507 827.668
+CCTV2_ECON30MIN_CMN_20080410_213502 827.668 838.527
+CCTV2_ECON30MIN_CMN_20080410_213502 838.527 857.199
+CCTV2_ECON30MIN_CMN_20080410_213502 857.199 871.230
+CCTV2_ECON30MIN_CMN_20080410_213502 871.230 886.668
+CCTV2_ECON30MIN_CMN_20080410_213502 886.668 896.292
+CCTV2_ECON30MIN_CMN_20080410_213502 896.292 908.886
+CCTV2_ECON30MIN_CMN_20080410_213502 913.808 926.136
+CCTV2_ECON30MIN_CMN_20080410_213502 930.855 932.043
+CCTV2_ECON30MIN_CMN_20080410_213502 932.043 936.637
+CCTV2_ECON30MIN_CMN_20080410_213502 936.637 945.698
+CCTV2_ECON30MIN_CMN_20080410_213502 957.105 966.277
+CCTV2_ECON30MIN_CMN_20080410_213502 988.324 993.496
+CCTV2_ECON30MIN_CMN_20080410_213502 993.496 1001.808
+CCTV2_ECON30MIN_CMN_20080410_213502 1001.808 1005.512
+CCTV2_ECON30MIN_CMN_20080410_213502 1005.512 1018.247
+CCTV2_ECON30MIN_CMN_20080410_213502 1018.247 1034.600
+CCTV2_ECON30MIN_CMN_20080410_213502 1034.600 1035.960
+CCTV2_ECON30MIN_CMN_20080410_213502 1035.960 1037.491
+CCTV2_ECON30MIN_CMN_20080410_213502 1037.491 1038.444
+CCTV2_ECON30MIN_CMN_20080410_213502 1038.444 1045.928
+CCTV2_ECON30MIN_CMN_20080410_213502 1045.928 1055.772
+CCTV2_ECON30MIN_CMN_20080410_213502 1055.772 1059.084
+CCTV2_ECON30MIN_CMN_20080410_213502 1059.084 1063.834
+CCTV2_ECON30MIN_CMN_20080410_213502 1063.834 1074.537
+CCTV2_ECON30MIN_CMN_20080410_213502 1074.537 1088.068
+CCTV2_ECON30MIN_CMN_20080410_213502 1088.068 1093.545
+CCTV2_ECON30MIN_CMN_20080410_213502 1220.024 1221.727
+CCTV2_ECON30MIN_CMN_20080410_213502 1221.727 1231.145
+CCTV2_ECON30MIN_CMN_20080410_213502 1239.350 1241.569
+CCTV2_ECON30MIN_CMN_20080410_213502 1241.569 1253.335
+CCTV2_ECON30MIN_CMN_20080410_213502 1253.335 1259.333
+CCTV2_ECON30MIN_CMN_20080410_213502 1266.469 1275.360
+CCTV2_ECON30MIN_CMN_20080410_213502 1275.360 1280.281
+CCTV2_ECON30MIN_CMN_20080410_213502 1280.281 1286.203
+CCTV2_ECON30MIN_CMN_20080410_213502 1286.203 1297.750
+CCTV2_ECON30MIN_CMN_20080410_213502 1297.750 1309.984
+CCTV2_ECON30MIN_CMN_20080410_213502 1317.016 1328.313
+CCTV2_ECON30MIN_CMN_20080410_213502 1328.313 1334.734
+CCTV2_ECON30MIN_CMN_20080410_213502 1361.375 1367.984
+CCTV2_ECON30MIN_CMN_20080410_213502 1367.984 1378.327
+CCTV2_ECON30MIN_CMN_20080410_213502 1386.014 1402.983
+CCTV2_ECON30MIN_CMN_20080410_213502 1402.983 1418.014
+CCTV2_ECON30MIN_CMN_20080410_213502 1418.014 1422.592
+CCTV2_ECON30MIN_CMN_20080410_213502 1422.592 1428.530
+CCTV2_ECON30MIN_CMN_20080410_213502 1428.530 1436.826
+CCTV2_ECON30MIN_CMN_20080410_213502 1436.826 1451.029
+CCTV2_ECON30MIN_CMN_20080410_213502 1459.560 1465.950
+CCTV2_ECON30MIN_CMN_20080410_213502 1465.950 1478.440
+CCTV2_ECON30MIN_CMN_20080410_213502 1495.753 1503.222
+CCTV2_ECON30MIN_CMN_20080410_213502 1503.222 1519.346
+CCTV2_ECON30MIN_CMN_20080410_213502 1519.346 1525.048
+CCTV2_ECON30MIN_CMN_20080410_213502 1525.048 1531.063
+CCTV2_ECON30MIN_CMN_20080410_213502 1531.063 1541.047
+CCTV2_ECON30MIN_CMN_20080410_213502 1544.984 1558.626
+CCTV2_ECON30MIN_CMN_20080410_213502 1574.580 1581.252
+CCTV2_ECON30MIN_CMN_20080410_213502 1581.252 1598.721
+CCTV2_ECON30MIN_CMN_20080410_213502 1598.721 1601.393
+CCTV2_ECON30MIN_CMN_20080410_213502 1601.393 1616.362
+CCTV2_ECON30MIN_CMN_20080410_213502 1616.362 1620.664
+VOA_INTNLNEWS_CMN_20080414_210000 82.073 91.340
+VOA_INTNLNEWS_CMN_20080414_210000 91.340 104.121
+VOA_INTNLNEWS_CMN_20080414_210000 104.121 117.911
+VOA_INTNLNEWS_CMN_20080414_210000 117.911 128.005
+VOA_INTNLNEWS_CMN_20080414_210000 141.247 146.906
+VOA_INTNLNEWS_CMN_20080414_210000 241.538 249.738
+VOA_INTNLNEWS_CMN_20080414_210000 251.238 258.783
+VOA_INTNLNEWS_CMN_20080414_210000 258.783 268.638
+VOA_INTNLNEWS_CMN_20080414_210000 284.448 289.745
+VOA_INTNLNEWS_CMN_20080414_210000 289.745 292.152
+VOA_INTNLNEWS_CMN_20080414_210000 292.152 303.074
+VOA_INTNLNEWS_CMN_20080414_210000 311.325 314.435
+VOA_INTNLNEWS_CMN_20080414_210000 314.435 322.060
+VOA_INTNLNEWS_CMN_20080414_210000 322.060 329.721
+VOA_INTNLNEWS_CMN_20080414_210000 329.721 340.456
+VOA_INTNLNEWS_CMN_20080414_210000 340.456 349.638
+VOA_INTNLNEWS_CMN_20080414_210000 349.638 356.842
+VOA_INTNLNEWS_CMN_20080414_210000 356.842 364.437
+VOA_INTNLNEWS_CMN_20080414_210000 372.703 380.018
+VOA_INTNLNEWS_CMN_20080414_210000 395.253 401.985
+VOA_INTNLNEWS_CMN_20080414_210000 401.985 411.500
+VOA_INTNLNEWS_CMN_20080414_210000 422.194 429.763
+VOA_INTNLNEWS_CMN_20080414_210000 452.385 458.816
+VOA_INTNLNEWS_CMN_20080414_210000 458.816 465.895
+VOA_INTNLNEWS_CMN_20080414_210000 465.895 472.326
+VOA_INTNLNEWS_CMN_20080414_210000 472.326 481.544
+VOA_INTNLNEWS_CMN_20080414_210000 481.544 486.544
+VOA_INTNLNEWS_CMN_20080414_210000 486.544 497.197
+VOA_INTNLNEWS_CMN_20080414_210000 507.341 515.887
+VOA_INTNLNEWS_CMN_20080414_210000 515.887 520.481
+VOA_INTNLNEWS_CMN_20080414_210000 527.916 532.573
+VOA_INTNLNEWS_CMN_20080414_210000 532.573 536.504
+VOA_INTNLNEWS_CMN_20080414_210000 536.504 549.290
+VOA_INTNLNEWS_CMN_20080414_210000 549.290 558.026
+VOA_INTNLNEWS_CMN_20080414_210000 558.026 562.568
+VOA_INTNLNEWS_CMN_20080414_210000 562.568 566.506
+VOA_INTNLNEWS_CMN_20080414_210000 578.866 584.798
+VOA_INTNLNEWS_CMN_20080414_210000 590.454 593.891
+VOA_INTNLNEWS_CMN_20080414_210000 593.891 603.734
+VOA_INTNLNEWS_CMN_20080414_210000 620.629 628.771
+VOA_INTNLNEWS_CMN_20080414_210000 628.771 636.857
+VOA_INTNLNEWS_CMN_20080414_210000 644.956 650.628
+VOA_INTNLNEWS_CMN_20080414_210000 658.018 666.769
+VOA_INTNLNEWS_CMN_20080414_210000 666.769 672.722
+VOA_INTNLNEWS_CMN_20080414_210000 685.177 692.764
+VOA_INTNLNEWS_CMN_20080414_210000 692.764 702.571
+VOA_INTNLNEWS_CMN_20080414_210000 708.654 711.794
+VOA_INTNLNEWS_CMN_20080414_210000 711.794 717.701
+VOA_INTNLNEWS_CMN_20080414_210000 717.701 724.168
+VOA_INTNLNEWS_CMN_20080414_210000 724.168 734.327
+VOA_INTNLNEWS_CMN_20080414_210000 734.327 737.387
+VOA_INTNLNEWS_CMN_20080414_210000 737.387 744.859
+VOA_INTNLNEWS_CMN_20080414_210000 744.859 747.671
+VOA_INTNLNEWS_CMN_20080414_210000 747.671 753.470
+VOA_INTNLNEWS_CMN_20080414_210000 764.262 769.788
+VOA_INTNLNEWS_CMN_20080414_210000 769.788 777.624
+VOA_INTNLNEWS_CMN_20080414_210000 777.624 781.690
+VOA_INTNLNEWS_CMN_20080414_210000 788.307 795.221
+VOA_INTNLNEWS_CMN_20080414_210000 795.221 803.125
+VOA_INTNLNEWS_CMN_20080414_210000 803.125 811.720
+VOA_INTNLNEWS_CMN_20080414_210000 811.720 823.220
+VOA_INTNLNEWS_CMN_20080414_210000 825.376 833.180
+VOA_INTNLNEWS_CMN_20080414_210000 833.180 838.941
+VOA_INTNLNEWS_CMN_20080414_210000 846.844 852.814
+VOA_INTNLNEWS_CMN_20080414_210000 857.455 861.693
+VOA_INTNLNEWS_CMN_20080414_210000 861.693 870.299
+VOA_INTNLNEWS_CMN_20080414_210000 876.556 881.322
+VOA_INTNLNEWS_CMN_20080414_210000 881.322 888.547
+VOA_INTNLNEWS_CMN_20080414_210000 893.971 900.658
+VOA_INTNLNEWS_CMN_20080414_210000 900.658 906.502
+VOA_INTNLNEWS_CMN_20080414_210000 906.502 913.487
+VOA_INTNLNEWS_CMN_20080414_210000 913.487 925.228
+VOA_INTNLNEWS_CMN_20080414_210000 925.228 927.650
+VOA_INTNLNEWS_CMN_20080414_210000 939.893 941.348
+VOA_INTNLNEWS_CMN_20080414_210000 941.348 947.683
+VOA_INTNLNEWS_CMN_20080414_210000 947.683 955.816
+VOA_INTNLNEWS_CMN_20080414_210000 955.816 960.000
+CCTV2_ECON30MIN_CMN_20080412_213501 230.003 232.570
+CCTV2_ECON30MIN_CMN_20080412_213501 241.633 249.273
+CCTV2_ECON30MIN_CMN_20080412_213501 249.273 258.554
+CCTV2_ECON30MIN_CMN_20080412_213501 310.601 315.507
+CCTV2_ECON30MIN_CMN_20080412_213501 328.208 333.644
+CCTV2_ECON30MIN_CMN_20080412_213501 333.644 338.456
+CCTV2_ECON30MIN_CMN_20080412_213501 338.456 349.418
+CCTV2_ECON30MIN_CMN_20080412_213501 366.356 377.754
+CCTV2_ECON30MIN_CMN_20080412_213501 377.754 394.637
+CCTV2_ECON30MIN_CMN_20080412_213501 394.637 397.372
+CCTV2_ECON30MIN_CMN_20080412_213501 397.372 414.310
+CCTV2_ECON30MIN_CMN_20080412_213501 414.310 431.075
+CCTV2_ECON30MIN_CMN_20080412_213501 460.897 471.147
+CCTV2_ECON30MIN_CMN_20080412_213501 481.710 485.970
+CCTV2_ECON30MIN_CMN_20080412_213501 491.599 494.084
+CCTV2_ECON30MIN_CMN_20080412_213501 507.515 518.140
+CCTV2_ECON30MIN_CMN_20080412_213501 523.233 538.499
+CCTV2_ECON30MIN_CMN_20080412_213501 538.499 546.655
+CCTV2_ECON30MIN_CMN_20080412_213501 546.655 555.311
+CCTV2_ECON30MIN_CMN_20080412_213501 562.240 570.115
+CCTV2_ECON30MIN_CMN_20080412_213501 591.646 595.943
+CCTV2_ECON30MIN_CMN_20080412_213501 790.444 791.757
+CCTV2_ECON30MIN_CMN_20080412_213501 791.757 799.741
+CCTV2_ECON30MIN_CMN_20080412_213501 799.741 814.960
+CCTV2_ECON30MIN_CMN_20080412_213501 814.960 823.022
+CCTV2_ECON30MIN_CMN_20080412_213501 823.022 825.616
+CCTV2_ECON30MIN_CMN_20080412_213501 825.616 835.084
+CCTV2_ECON30MIN_CMN_20080412_213501 841.258 842.164
+CCTV2_ECON30MIN_CMN_20080412_213501 861.010 869.166
+CCTV2_ECON30MIN_CMN_20080412_213501 869.166 876.838
+CCTV2_ECON30MIN_CMN_20080412_213501 876.838 889.775
+CCTV2_ECON30MIN_CMN_20080412_213501 948.904 956.123
+CCTV2_ECON30MIN_CMN_20080412_213501 993.843 1001.155
+CCTV2_ECON30MIN_CMN_20080412_213501 1001.155 1015.952
+CCTV2_ECON30MIN_CMN_20080412_213501 1051.327 1056.874
+CCTV2_ECON30MIN_CMN_20080412_213501 1056.874 1061.968
+CCTV2_ECON30MIN_CMN_20080412_213501 1061.968 1072.672
+CCTV2_ECON30MIN_CMN_20080412_213501 1072.672 1077.703
+CCTV2_ECON30MIN_CMN_20080412_213501 1077.703 1087.063
+CCTV2_ECON30MIN_CMN_20080412_213501 1087.063 1099.782
+CCTV2_ECON30MIN_CMN_20080412_213501 1115.376 1118.423
+CCTV2_ECON30MIN_CMN_20080412_213501 1118.423 1125.585
+CCTV2_ECON30MIN_CMN_20080412_213501 1125.585 1130.366
+CCTV2_ECON30MIN_CMN_20080412_213501 1149.351 1158.899
+CCTV2_ECON30MIN_CMN_20080412_213501 1158.899 1174.383
+CCTV2_ECON30MIN_CMN_20080412_213501 1174.383 1189.883
+CCTV2_ECON30MIN_CMN_20080412_213501 1189.883 1199.320
+CCTV2_ECON30MIN_CMN_20080412_213501 1208.457 1211.988
+CCTV2_ECON30MIN_CMN_20080412_213501 1211.988 1222.581
+CCTV2_ECON30MIN_CMN_20080412_213501 1389.942 1391.270
+CCTV2_ECON30MIN_CMN_20080412_213501 1391.270 1405.864
+CCTV2_ECON30MIN_CMN_20080412_213501 1405.864 1426.352
+CCTV2_ECON30MIN_CMN_20080412_213501 1426.352 1430.414
+CCTV2_ECON30MIN_CMN_20080412_213501 1430.414 1442.632
+CCTV2_ECON30MIN_CMN_20080412_213501 1442.632 1451.585
+CCTV2_ECON30MIN_CMN_20080412_213501 1451.585 1460.022
+CCTV2_ECON30MIN_CMN_20080412_213501 1460.022 1473.334
+CCTV2_ECON30MIN_CMN_20080412_213501 1473.334 1484.584
+CCTV2_ECON30MIN_CMN_20080412_213501 1484.584 1488.506
+CCTV2_ECON30MIN_CMN_20080412_213501 1488.506 1501.132
+CCTV2_ECON30MIN_CMN_20080412_213501 1501.132 1508.052
+CCTV2_ECON30MIN_CMN_20080412_213501 1516.572 1520.182
+CCTV2_ECON30MIN_CMN_20080412_213501 1520.182 1530.340
+CCTV2_ECON30MIN_CMN_20080412_213501 1530.340 1544.714
+CCTV2_ECON30MIN_CMN_20080412_213501 1544.714 1552.714
+CCTV2_ECON30MIN_CMN_20080412_213501 1561.079 1576.573
+CCTV2_ECON30MIN_CMN_20080412_213501 1576.573 1590.170
+CCTV2_ECON30MIN_CMN_20080412_213501 1598.543 1611.565
+CCTV2_ECON30MIN_CMN_20080412_213501 1611.565 1616.752
+CCTV2_ECON30MIN_CMN_20080412_213501 1616.752 1620.586
+CCTV1_30MINNEWS_CMN_20080407_115901 72.599 80.144
+CCTV1_30MINNEWS_CMN_20080407_115901 80.144 83.857
+CCTV1_30MINNEWS_CMN_20080407_115901 83.857 105.129
+CCTV1_30MINNEWS_CMN_20080407_115901 116.117 121.357
+CCTV1_30MINNEWS_CMN_20080407_115901 150.030 161.497
+CCTV1_30MINNEWS_CMN_20080407_115901 161.497 175.629
+CCTV1_30MINNEWS_CMN_20080407_115901 213.663 230.995
+CCTV1_30MINNEWS_CMN_20080407_115901 230.995 249.179
+CCTV1_30MINNEWS_CMN_20080407_115901 291.761 304.379
+CCTV1_30MINNEWS_CMN_20080407_115901 304.379 319.932
+CCTV1_30MINNEWS_CMN_20080407_115901 319.932 333.454
+CCTV1_30MINNEWS_CMN_20080407_115901 333.454 352.479
+CCTV1_30MINNEWS_CMN_20080407_115901 370.503 377.776
+CCTV1_30MINNEWS_CMN_20080407_115901 416.755 433.159
+CCTV1_30MINNEWS_CMN_20080407_115901 451.599 461.390
+CCTV1_30MINNEWS_CMN_20080407_115901 461.390 474.803
+CCTV1_30MINNEWS_CMN_20080407_115901 474.803 484.535
+CCTV1_30MINNEWS_CMN_20080407_115901 484.535 506.059
+CCTV1_30MINNEWS_CMN_20080407_115901 506.059 526.838
+CCTV1_30MINNEWS_CMN_20080407_115901 526.838 549.979
+CCTV1_30MINNEWS_CMN_20080407_115901 549.979 564.409
+CCTV1_30MINNEWS_CMN_20080407_115901 564.409 575.066
+CCTV1_30MINNEWS_CMN_20080407_115901 575.066 589.108
+CCTV1_30MINNEWS_CMN_20080407_115901 589.108 596.139
+CCTV1_30MINNEWS_CMN_20080407_115901 596.139 629.826
+CCTV1_30MINNEWS_CMN_20080407_115901 629.826 640.245
+CCTV1_30MINNEWS_CMN_20080407_115901 640.245 661.855
+CCTV1_30MINNEWS_CMN_20080407_115901 661.855 679.314
+CCTV1_30MINNEWS_CMN_20080407_115901 682.941 700.337
+CCTV1_30MINNEWS_CMN_20080407_115901 737.969 766.080
+CCTV1_30MINNEWS_CMN_20080407_115901 945.161 963.631
+CCTV1_30MINNEWS_CMN_20080407_115901 963.631 978.990
+CCTV1_30MINNEWS_CMN_20080407_115901 978.990 990.241
+CCTV1_30MINNEWS_CMN_20080407_115901 990.241 1013.970
+CCTV1_30MINNEWS_CMN_20080407_115901 1052.265 1064.960
+CCTV1_30MINNEWS_CMN_20080407_115901 1095.197 1111.780
+CCTV1_30MINNEWS_CMN_20080407_115901 1153.333 1174.020
+CCTV1_30MINNEWS_CMN_20080407_115901 1197.400 1213.209
+CCTV1_30MINNEWS_CMN_20080407_115901 1213.209 1226.592
+CCTV1_30MINNEWS_CMN_20080407_115901 1296.113 1312.308
+CCTV1_30MINNEWS_CMN_20080407_115901 1312.308 1324.524
+CCTV1_30MINNEWS_CMN_20080407_115901 1324.524 1333.815
+CCTV1_30MINNEWS_CMN_20080407_115901 1333.815 1345.312
+CCTV1_30MINNEWS_CMN_20080407_115901 1345.312 1362.742
+CCTV1_30MINNEWS_CMN_20080407_115901 1362.742 1378.300
+CCTV1_30MINNEWS_CMN_20080407_115901 1412.752 1430.177
+CCTV1_30MINNEWS_CMN_20080407_115901 1430.177 1438.558
+CCTV1_30MINNEWS_CMN_20080407_115901 1438.558 1453.235
+CCTV1_30MINNEWS_CMN_20080407_115901 1479.408 1492.582
+CCTV1_30MINNEWS_CMN_20080407_115901 1492.582 1501.444
+CCTV1_30MINNEWS_CMN_20080407_115901 1501.444 1516.022
+CCTV1_30MINNEWS_CMN_20080407_115901 1541.756 1561.663
+CCTV1_30MINNEWS_CMN_20080407_115901 1561.663 1576.635
+CCTV1_30MINNEWS_CMN_20080407_115901 1576.635 1589.839
+CCTV1_30MINNEWS_CMN_20080407_115901 1589.839 1606.458
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 211.536 215.364
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 215.364 218.114
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 218.114 237.536
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 248.239 257.426
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 257.426 269.504
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 273.606 279.903
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 285.113 296.675
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 309.998 318.529
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 318.529 327.607
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 327.607 337.982
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 337.982 350.607
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 350.607 357.904
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 357.904 368.388
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 368.388 373.263
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 373.263 380.278
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 380.278 392.778
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 392.778 404.935
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 404.935 410.279
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 424.810 441.669
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 463.857 470.404
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 492.464 498.496
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 498.496 504.325
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 504.325 510.313
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 510.313 523.688
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 534.907 545.517
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 545.517 556.939
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 566.173 574.610
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 574.610 582.485
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 582.485 589.923
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 589.923 604.111
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 604.111 609.908
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 609.908 626.459
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 647.974 662.880
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 666.207 679.000
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 679.000 685.766
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 685.766 694.974
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 694.974 706.099
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 712.365 718.364
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 718.364 729.161
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 729.161 733.521
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 740.478 752.133
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 752.133 761.336
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 761.336 764.571
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 764.571 772.056
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 772.056 780.962
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 794.321 802.523
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 802.523 806.929
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 806.929 812.507
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 812.507 823.068
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 823.068 835.519
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 835.519 840.581
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 840.581 852.097
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 852.097 858.505
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 858.505 870.925
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 870.925 872.691
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 886.238 895.894
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 895.894 901.003
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 901.003 911.518
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 911.518 921.706
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 921.706 930.159
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 930.159 952.471
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 952.471 961.782
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 961.782 972.300
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 973.527 979.090
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 979.090 991.168
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1000.308 1011.245
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1048.746 1056.356
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1073.090 1080.730
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1080.730 1095.590
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1095.590 1103.982
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1110.373 1116.202
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1116.202 1128.130
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1128.130 1140.724
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1140.724 1149.976
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1149.976 1154.414
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1154.414 1165.240
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1183.615 1191.177
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1197.880 1204.599
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1204.599 1211.990
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1211.990 1224.131
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1224.131 1226.459
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1226.459 1233.224
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1233.224 1240.630
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1245.504 1249.956
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1249.956 1257.472
+VOA_INTNLNEWS_CMN_20080410_210000 241.096 247.022
+VOA_INTNLNEWS_CMN_20080410_210000 247.022 248.349
+VOA_INTNLNEWS_CMN_20080410_210000 261.004 271.226
+VOA_INTNLNEWS_CMN_20080410_210000 271.226 281.664
+VOA_INTNLNEWS_CMN_20080410_210000 289.743 295.118
+VOA_INTNLNEWS_CMN_20080410_210000 295.118 297.196
+VOA_INTNLNEWS_CMN_20080410_210000 297.196 303.774
+VOA_INTNLNEWS_CMN_20080410_210000 303.774 309.774
+VOA_INTNLNEWS_CMN_20080410_210000 325.212 335.493
+VOA_INTNLNEWS_CMN_20080410_210000 335.493 341.461
+VOA_INTNLNEWS_CMN_20080410_210000 341.461 347.883
+VOA_INTNLNEWS_CMN_20080410_210000 347.883 356.164
+VOA_INTNLNEWS_CMN_20080410_210000 356.164 360.430
+VOA_INTNLNEWS_CMN_20080410_210000 360.430 369.056
+VOA_INTNLNEWS_CMN_20080410_210000 369.056 381.087
+VOA_INTNLNEWS_CMN_20080410_210000 381.087 386.040
+VOA_INTNLNEWS_CMN_20080410_210000 386.040 389.509
+VOA_INTNLNEWS_CMN_20080410_210000 389.509 397.385
+VOA_INTNLNEWS_CMN_20080410_210000 397.385 401.682
+VOA_INTNLNEWS_CMN_20080410_210000 401.682 408.708
+VOA_INTNLNEWS_CMN_20080410_210000 408.708 414.473
+VOA_INTNLNEWS_CMN_20080410_210000 414.473 418.441
+VOA_INTNLNEWS_CMN_20080410_210000 425.457 435.818
+VOA_INTNLNEWS_CMN_20080410_210000 435.818 442.693
+VOA_INTNLNEWS_CMN_20080410_210000 442.693 449.365
+VOA_INTNLNEWS_CMN_20080410_210000 449.365 456.240
+VOA_INTNLNEWS_CMN_20080410_210000 456.240 459.788
+VOA_INTNLNEWS_CMN_20080410_210000 459.788 464.226
+VOA_INTNLNEWS_CMN_20080410_210000 470.648 475.663
+VOA_INTNLNEWS_CMN_20080410_210000 475.663 482.178
+VOA_INTNLNEWS_CMN_20080410_210000 482.178 492.412
+VOA_INTNLNEWS_CMN_20080410_210000 492.412 497.100
+VOA_INTNLNEWS_CMN_20080410_210000 497.100 500.990
+VOA_INTNLNEWS_CMN_20080410_210000 500.990 503.850
+VOA_INTNLNEWS_CMN_20080410_210000 503.850 515.757
+VOA_INTNLNEWS_CMN_20080410_210000 515.757 525.945
+VOA_INTNLNEWS_CMN_20080410_210000 525.945 537.039
+VOA_INTNLNEWS_CMN_20080410_210000 537.039 545.070
+VOA_INTNLNEWS_CMN_20080410_210000 545.070 549.976
+VOA_INTNLNEWS_CMN_20080410_210000 557.008 562.032
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 228.180 236.133
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 236.133 244.843
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 255.810 259.701
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 259.701 268.261
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 278.715 292.809
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 312.904 317.155
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 334.781 341.536
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 341.536 351.799
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 351.799 360.435
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 360.435 366.748
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 376.030 389.032
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 389.032 399.781
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 399.781 408.548
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 408.548 414.331
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 414.331 422.518
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 422.518 426.022
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 426.022 432.701
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 432.701 440.420
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 440.420 453.372
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 461.998 470.404
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 470.404 475.420
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 475.420 484.810
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 484.810 490.248
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 490.248 500.982
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 500.982 508.403
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 521.637 527.780
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 527.780 532.997
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 532.997 544.717
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 544.717 551.822
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 551.822 553.995
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 560.145 564.427
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 564.427 575.878
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 575.878 580.002
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 580.002 586.055
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 586.055 592.181
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 592.181 595.181
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 595.181 598.946
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 598.946 613.235
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 621.049 634.460
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 634.460 642.522
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 642.522 649.698
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 649.698 657.807
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 657.807 662.156
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 662.156 668.308
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 668.308 681.335
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 695.461 701.961
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 701.961 712.410
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 712.410 718.802
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 718.802 725.024
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 744.309 750.042
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 750.042 761.379
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 770.348 786.365
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 791.958 799.987
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 799.987 805.999
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 815.813 824.181
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 824.181 839.867
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 839.867 845.243
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 845.243 856.615
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 867.381 870.021
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 870.021 872.920
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 879.896 892.784
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 906.782 910.734
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 910.734 924.841
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 924.841 935.031
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 935.031 957.200
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 969.511 978.637
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 978.637 983.601
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 983.601 993.537
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 993.537 1011.337
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1011.337 1026.450
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1026.450 1030.840
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1030.840 1050.755
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1050.755 1063.656
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1063.656 1072.571
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1072.571 1078.711
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1078.711 1086.983
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1086.983 1100.777
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1100.777 1114.079
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1114.079 1126.798
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1163.086 1172.149
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1186.789 1188.726
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1192.203 1196.211
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1196.211 1207.461
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1207.461 1214.070
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1226.879 1229.215
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1236.640 1242.171
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1242.171 1249.543
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1257.477 1264.526
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1264.526 1268.292
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1283.046 1287.238
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1287.238 1294.534
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1294.534 1305.347
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1305.347 1316.003
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1344.598 1350.644
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1375.571 1379.886
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1379.886 1389.964
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1389.964 1397.933
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1414.520 1422.927
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1429.053 1435.522
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1435.522 1440.163
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1440.163 1451.210
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1451.210 1457.773
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1457.773 1465.993
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1470.852 1475.711
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1483.711 1490.899
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1490.899 1497.868
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1514.887 1522.070
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1526.976 1537.617
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1537.617 1543.257
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1543.257 1549.522
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1569.299 1573.908
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1587.114 1596.333
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1596.333 1601.880
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1601.880 1608.978
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1608.978 1614.837
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1622.775 1625.540
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1634.554 1643.945
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1651.340 1666.658
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1666.658 1676.939
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1682.510 1691.625
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1691.625 1704.187
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1727.775 1740.884
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1740.884 1754.195
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1754.195 1758.586
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1766.540 1788.461
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1788.461 1797.320
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1832.570 1845.677
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1852.192 1861.661
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1861.661 1865.881
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1877.756 1890.644
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1905.985 1917.337
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1917.337 1933.369
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1953.813 1959.610
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1959.610 1962.781
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1962.781 1974.406
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1974.406 1980.155
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1980.155 1987.740
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2011.483 2013.780
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2013.780 2022.562
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2033.662 2037.700
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2037.700 2046.344
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2057.264 2071.271
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2090.929 2095.844
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2113.417 2120.572
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2120.572 2130.836
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2130.836 2139.166
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2139.166 2146.237
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 0.000 6.406
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 6.406 25.687
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 25.687 42.264
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 42.264 50.139
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 50.139 57.030
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 57.030 64.796
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 64.796 69.874
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 69.874 78.733
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 78.733 87.607
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 87.607 97.373
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 97.373 102.279
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 108.826 116.358
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 119.811 132.706
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 132.706 159.347
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 159.347 169.519
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 193.519 198.629
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 198.629 209.509
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 236.962 246.151
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 252.495 259.980
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 259.980 277.386
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 277.386 291.652
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 291.652 295.544
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 295.544 302.231
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 302.231 323.173
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 332.360 337.564
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 337.564 340.952
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 347.515 356.637
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 367.262 373.388
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 402.855 411.900
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 411.900 420.685
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 420.685 428.325
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 428.325 434.171
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 438.031 446.500
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 446.500 455.751
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 455.751 464.548
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 471.355 486.134
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 486.134 497.166
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 497.166 506.574
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 506.574 514.181
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 514.181 520.135
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 520.135 533.384
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 533.384 540.275
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 540.275 547.757
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 547.757 552.976
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 552.976 558.383
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 558.383 567.271
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 567.271 575.239
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 581.411 584.629
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 584.629 591.848
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 596.348 601.958
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 601.958 617.017
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 617.017 633.601
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 640.751 645.814
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 645.814 659.437
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 659.437 663.542
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 663.542 668.322
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 668.322 679.681
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 679.681 690.271
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 690.271 703.386
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 703.386 713.979
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 713.979 721.657
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 721.657 732.046
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 732.046 743.477
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 768.381 774.067
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 774.067 782.489
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 782.489 788.896
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 788.896 797.457
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 805.003 816.628
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 816.628 823.926
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 823.926 832.112
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 832.112 833.815
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 839.424 852.986
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 867.282 876.986
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 876.986 892.158
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 905.111 913.809
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 913.809 916.558
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 916.558 919.496
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 919.496 927.948
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 927.948 937.875
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 937.875 955.487
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 955.487 971.142
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 979.812 988.934
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 988.934 994.982
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 994.982 1003.881
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1003.881 1009.440
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1009.440 1023.520
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1023.520 1025.488
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1025.488 1034.377
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1041.127 1045.674
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1045.674 1054.331
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1054.331 1063.112
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1063.112 1066.378
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1066.378 1076.675
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1081.284 1085.769
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1095.286 1100.661
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1100.661 1104.720
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1104.720 1113.782
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1113.782 1123.170
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1123.170 1128.733
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1128.733 1146.732
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1146.732 1151.919
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1151.919 1155.950
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1166.418 1171.902
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1171.902 1181.512
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1181.512 1187.964
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1187.964 1190.324
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1190.324 1191.746
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1191.746 1192.434
+CCTV2_ECON30MIN_CMN_20080425_213502 25.719 33.376
+CCTV2_ECON30MIN_CMN_20080425_213502 36.746 45.495
+CCTV2_ECON30MIN_CMN_20080425_213502 50.808 54.652
+CCTV2_ECON30MIN_CMN_20080425_213502 216.596 218.956
+CCTV2_ECON30MIN_CMN_20080425_213502 218.956 239.191
+CCTV2_ECON30MIN_CMN_20080425_213502 239.191 250.566
+CCTV2_ECON30MIN_CMN_20080425_213502 272.537 286.157
+CCTV2_ECON30MIN_CMN_20080425_213502 286.157 292.595
+CCTV2_ECON30MIN_CMN_20080425_213502 292.595 299.455
+CCTV2_ECON30MIN_CMN_20080425_213502 299.455 313.924
+CCTV2_ECON30MIN_CMN_20080425_213502 313.924 319.379
+CCTV2_ECON30MIN_CMN_20080425_213502 319.379 321.925
+CCTV2_ECON30MIN_CMN_20080425_213502 319.379 321.925
+CCTV2_ECON30MIN_CMN_20080425_213502 321.925 327.956
+CCTV2_ECON30MIN_CMN_20080425_213502 327.956 335.146
+CCTV2_ECON30MIN_CMN_20080425_213502 335.146 339.349
+CCTV2_ECON30MIN_CMN_20080425_213502 339.349 357.333
+CCTV2_ECON30MIN_CMN_20080425_213502 357.333 364.505
+CCTV2_ECON30MIN_CMN_20080425_213502 364.505 367.790
+CCTV2_ECON30MIN_CMN_20080425_213502 367.790 372.478
+CCTV2_ECON30MIN_CMN_20080425_213502 372.478 383.152
+CCTV2_ECON30MIN_CMN_20080425_213502 383.152 392.501
+CCTV2_ECON30MIN_CMN_20080425_213502 392.501 394.235
+CCTV2_ECON30MIN_CMN_20080425_213502 394.235 401.264
+CCTV2_ECON30MIN_CMN_20080425_213502 401.264 415.155
+CCTV2_ECON30MIN_CMN_20080425_213502 429.875 440.328
+CCTV2_ECON30MIN_CMN_20080425_213502 440.328 450.703
+CCTV2_ECON30MIN_CMN_20080425_213502 450.703 463.235
+CCTV2_ECON30MIN_CMN_20080425_213502 469.252 480.892
+CCTV2_ECON30MIN_CMN_20080425_213502 480.892 489.777
+CCTV2_ECON30MIN_CMN_20080425_213502 489.777 498.264
+CCTV2_ECON30MIN_CMN_20080425_213502 501.889 506.483
+CCTV2_ECON30MIN_CMN_20080425_213502 506.483 517.202
+CCTV2_ECON30MIN_CMN_20080425_213502 517.202 528.155
+CCTV2_ECON30MIN_CMN_20080425_213502 534.310 544.232
+CCTV2_ECON30MIN_CMN_20080425_213502 544.232 551.194
+CCTV2_ECON30MIN_CMN_20080425_213502 551.194 555.475
+CCTV2_ECON30MIN_CMN_20080425_213502 555.475 569.742
+CCTV2_ECON30MIN_CMN_20080425_213502 569.742 577.632
+CCTV2_ECON30MIN_CMN_20080425_213502 577.632 587.850
+CCTV2_ECON30MIN_CMN_20080425_213502 587.850 599.917
+CCTV2_ECON30MIN_CMN_20080425_213502 613.042 618.542
+CCTV2_ECON30MIN_CMN_20080425_213502 618.542 631.917
+CCTV2_ECON30MIN_CMN_20080425_213502 631.917 644.870
+CCTV2_ECON30MIN_CMN_20080425_213502 644.870 676.477
+CCTV2_ECON30MIN_CMN_20080425_213502 676.477 689.633
+CCTV2_ECON30MIN_CMN_20080425_213502 689.633 699.791
+CCTV2_ECON30MIN_CMN_20080425_213502 851.668 852.996
+CCTV2_ECON30MIN_CMN_20080425_213502 852.996 876.511
+CCTV2_ECON30MIN_CMN_20080425_213502 876.511 881.604
+CCTV2_ECON30MIN_CMN_20080425_213502 881.604 903.275
+CCTV2_ECON30MIN_CMN_20080425_213502 903.275 904.447
+CCTV2_ECON30MIN_CMN_20080425_213502 903.275 904.447
+CCTV2_ECON30MIN_CMN_20080425_213502 904.447 906.790
+CCTV2_ECON30MIN_CMN_20080425_213502 906.790 911.665
+CCTV2_ECON30MIN_CMN_20080425_213502 914.868 923.118
+CCTV2_ECON30MIN_CMN_20080425_213502 923.118 929.744
+CCTV2_ECON30MIN_CMN_20080425_213502 929.744 942.631
+CCTV2_ECON30MIN_CMN_20080425_213502 942.631 946.850
+CCTV2_ECON30MIN_CMN_20080425_213502 946.850 949.709
+CCTV2_ECON30MIN_CMN_20080425_213502 949.709 950.224
+CCTV2_ECON30MIN_CMN_20080425_213502 961.945 975.632
+CCTV2_ECON30MIN_CMN_20080425_213502 975.632 980.478
+CCTV2_ECON30MIN_CMN_20080425_213502 980.478 982.056
+CCTV2_ECON30MIN_CMN_20080425_213502 982.056 989.393
+CCTV2_ECON30MIN_CMN_20080425_213502 989.393 1002.122
+CCTV2_ECON30MIN_CMN_20080425_213502 1002.122 1003.029
+CCTV2_ECON30MIN_CMN_20080425_213502 1002.122 1003.029
+CCTV2_ECON30MIN_CMN_20080425_213502 1003.029 1008.045
+CCTV2_ECON30MIN_CMN_20080425_213502 1008.045 1019.029
+CCTV2_ECON30MIN_CMN_20080425_213502 1026.061 1033.936
+CCTV2_ECON30MIN_CMN_20080425_213502 1033.936 1046.405
+CCTV2_ECON30MIN_CMN_20080425_213502 1046.405 1061.850
+CCTV2_ECON30MIN_CMN_20080425_213502 1061.850 1070.678
+CCTV2_ECON30MIN_CMN_20080425_213502 1070.678 1077.507
+CCTV2_ECON30MIN_CMN_20080425_213502 1077.507 1084.179
+CCTV2_ECON30MIN_CMN_20080425_213502 1084.179 1093.492
+CCTV2_ECON30MIN_CMN_20080425_213502 1111.087 1126.793
+CCTV2_ECON30MIN_CMN_20080425_213502 1135.543 1142.245
+CCTV2_ECON30MIN_CMN_20080425_213502 1142.245 1147.538
+CCTV2_ECON30MIN_CMN_20080425_213502 1147.538 1149.476
+CCTV2_ECON30MIN_CMN_20080425_213502 1160.007 1163.570
+CCTV2_ECON30MIN_CMN_20080425_213502 1163.570 1171.992
+CCTV2_ECON30MIN_CMN_20080425_213502 1183.305 1189.123
+CCTV2_ECON30MIN_CMN_20080425_213502 1189.123 1193.841
+CCTV2_ECON30MIN_CMN_20080425_213502 1320.679 1321.835
+CCTV2_ECON30MIN_CMN_20080425_213502 1321.835 1339.758
+CCTV2_ECON30MIN_CMN_20080425_213502 1339.758 1345.945
+CCTV2_ECON30MIN_CMN_20080425_213502 1345.945 1359.070
+CCTV2_ECON30MIN_CMN_20080425_213502 1359.070 1366.007
+CCTV2_ECON30MIN_CMN_20080425_213502 1366.007 1376.663
+CCTV2_ECON30MIN_CMN_20080425_213502 1376.663 1384.632
+CCTV2_ECON30MIN_CMN_20080425_213502 1384.632 1397.725
+CCTV2_ECON30MIN_CMN_20080425_213502 1397.725 1400.023
+CCTV2_ECON30MIN_CMN_20080425_213502 1400.023 1403.734
+CCTV2_ECON30MIN_CMN_20080425_213502 1403.734 1405.265
+CCTV2_ECON30MIN_CMN_20080425_213502 1405.265 1409.281
+CCTV2_ECON30MIN_CMN_20080425_213502 1409.281 1417.828
+CCTV2_ECON30MIN_CMN_20080425_213502 1417.828 1431.453
+CCTV2_ECON30MIN_CMN_20080425_213502 1431.453 1445.022
+CCTV2_ECON30MIN_CMN_20080425_213502 1445.022 1451.636
+CCTV2_ECON30MIN_CMN_20080425_213502 1451.636 1463.714
+CCTV2_ECON30MIN_CMN_20080425_213502 1463.714 1479.306
+CCTV2_ECON30MIN_CMN_20080425_213502 1486.303 1505.256
+CCTV2_ECON30MIN_CMN_20080425_213502 1505.256 1517.693
+CCTV2_ECON30MIN_CMN_20080425_213502 1517.693 1519.912
+CCTV2_ECON30MIN_CMN_20080425_213502 1526.475 1529.428
+CCTV2_ECON30MIN_CMN_20080425_213502 1529.428 1530.366
+CCTV2_ECON30MIN_CMN_20080425_213502 1530.366 1533.631
+CCTV2_ECON30MIN_CMN_20080425_213502 1533.631 1537.912
+CCTV2_ECON30MIN_CMN_20080425_213502 1551.522 1567.381
+CCTV2_ECON30MIN_CMN_20080425_213502 1572.397 1587.053
+CCTV2_ECON30MIN_CMN_20080425_213502 1587.053 1594.929
+CCTV2_ECON30MIN_CMN_20080425_213502 1594.929 1604.288
+CCTV2_ECON30MIN_CMN_20080425_213502 1604.288 1608.975
+CCTV2_ECON30MIN_CMN_20080425_213502 1608.975 1614.209
+CCTV2_ECON30MIN_CMN_20080425_213502 1614.209 1620.429
+VOA_INTNLNEWS_CMN_20080412_210000 241.515 247.437
+VOA_INTNLNEWS_CMN_20080412_210000 247.437 251.249
+VOA_INTNLNEWS_CMN_20080412_210000 251.249 255.157
+VOA_INTNLNEWS_CMN_20080412_210000 255.157 256.985
+VOA_INTNLNEWS_CMN_20080412_210000 256.985 266.042
+VOA_INTNLNEWS_CMN_20080412_210000 266.042 272.027
+VOA_INTNLNEWS_CMN_20080412_210000 285.606 294.059
+VOA_INTNLNEWS_CMN_20080412_210000 327.823 339.136
+VOA_INTNLNEWS_CMN_20080412_210000 339.136 347.120
+VOA_INTNLNEWS_CMN_20080412_210000 347.120 353.417
+VOA_INTNLNEWS_CMN_20080412_210000 371.979 379.588
+VOA_INTNLNEWS_CMN_20080412_210000 379.588 382.417
+VOA_INTNLNEWS_CMN_20080412_210000 382.417 390.261
+VOA_INTNLNEWS_CMN_20080412_210000 390.261 400.886
+VOA_INTNLNEWS_CMN_20080412_210000 400.886 409.511
+VOA_INTNLNEWS_CMN_20080412_210000 409.511 418.933
+VOA_INTNLNEWS_CMN_20080412_210000 418.933 430.136
+VOA_INTNLNEWS_CMN_20080412_210000 430.136 436.839
+VOA_INTNLNEWS_CMN_20080412_210000 445.991 456.382
+VOA_INTNLNEWS_CMN_20080412_210000 456.382 465.273
+VOA_INTNLNEWS_CMN_20080412_210000 469.772 473.928
+VOA_INTNLNEWS_CMN_20080412_210000 479.631 485.787
+VOA_INTNLNEWS_CMN_20080412_210000 485.787 490.584
+VOA_INTNLNEWS_CMN_20080412_210000 490.584 498.194
+VOA_INTNLNEWS_CMN_20080412_210000 498.194 502.054
+VOA_INTNLNEWS_CMN_20080412_210000 502.054 510.008
+VOA_INTNLNEWS_CMN_20080412_210000 510.008 516.523
+VOA_INTNLNEWS_CMN_20080412_210000 516.523 522.148
+VOA_INTNLNEWS_CMN_20080412_210000 522.148 530.038
+VOA_INTNLNEWS_CMN_20080412_210000 530.038 535.836
+VOA_INTNLNEWS_CMN_20080412_210000 535.836 541.070
+VOA_INTNLNEWS_CMN_20080412_210000 541.070 547.726
+VOA_INTNLNEWS_CMN_20080412_210000 547.726 552.539
+VOA_INTNLNEWS_CMN_20080402_210000 255.095 267.832
+VOA_INTNLNEWS_CMN_20080402_210000 267.832 277.345
+VOA_INTNLNEWS_CMN_20080402_210000 277.345 284.313
+VOA_INTNLNEWS_CMN_20080402_210000 284.313 289.595
+VOA_INTNLNEWS_CMN_20080402_210000 289.595 291.392
+VOA_INTNLNEWS_CMN_20080402_210000 291.392 295.550
+VOA_INTNLNEWS_CMN_20080402_210000 303.005 309.490
+VOA_INTNLNEWS_CMN_20080402_210000 314.473 326.161
+VOA_INTNLNEWS_CMN_20080402_210000 326.161 332.957
+VOA_INTNLNEWS_CMN_20080402_210000 332.957 338.723
+VOA_INTNLNEWS_CMN_20080402_210000 338.723 346.136
+VOA_INTNLNEWS_CMN_20080402_210000 362.290 370.555
+VOA_INTNLNEWS_CMN_20080402_210000 370.555 379.896
+VOA_INTNLNEWS_CMN_20080402_210000 386.036 392.129
+VOA_INTNLNEWS_CMN_20080402_210000 392.129 400.301
+VOA_INTNLNEWS_CMN_20080402_210000 400.301 414.724
+VOA_INTNLNEWS_CMN_20080402_210000 414.724 430.958
+VOA_INTNLNEWS_CMN_20080402_210000 430.958 435.676
+VOA_INTNLNEWS_CMN_20080402_210000 435.676 451.927
+VOA_INTNLNEWS_CMN_20080402_210000 451.927 461.149
+VOA_INTNLNEWS_CMN_20080402_210000 461.149 467.856
+VOA_INTNLNEWS_CMN_20080402_210000 467.856 474.842
+VOA_INTNLNEWS_CMN_20080402_210000 474.842 479.671
+VOA_INTNLNEWS_CMN_20080402_210000 479.671 490.540
+VOA_INTNLNEWS_CMN_20080402_210000 490.540 499.612
+VOA_INTNLNEWS_CMN_20080402_210000 499.612 509.706
+VOA_INTNLNEWS_CMN_20080402_210000 509.706 512.798
+VOA_INTNLNEWS_CMN_20080402_210000 512.798 523.002
+VOA_INTNLNEWS_CMN_20080402_210000 523.002 530.768
+VOA_INTNLNEWS_CMN_20080402_210000 530.768 534.705
+VOA_INTNLNEWS_CMN_20080402_210000 541.379 545.271
+VOA_INTNLNEWS_CMN_20080402_210000 545.271 553.535
+VOA_INTNLNEWS_CMN_20080402_210000 553.535 561.894
+VOA_INTNLNEWS_CMN_20080402_210000 561.894 564.723
+VOA_INTNLNEWS_CMN_20080402_210000 589.946 597.929
+VOA_INTNLNEWS_CMN_20080402_210000 597.929 607.307
+VOA_INTNLNEWS_CMN_20080402_210000 611.169 617.137
+VOA_INTNLNEWS_CMN_20080402_210000 617.137 626.983
+VOA_INTNLNEWS_CMN_20080402_210000 626.983 635.684
+VOA_INTNLNEWS_CMN_20080402_210000 635.684 641.301
+VOA_INTNLNEWS_CMN_20080402_210000 648.878 655.888
+VOA_INTNLNEWS_CMN_20080402_210000 655.888 659.062
+VOA_INTNLNEWS_CMN_20080402_210000 682.139 691.071
+VOA_INTNLNEWS_CMN_20080402_210000 691.071 697.602
+VOA_INTNLNEWS_CMN_20080402_210000 697.602 706.116
+VOA_INTNLNEWS_CMN_20080402_210000 706.116 715.568
+VOA_INTNLNEWS_CMN_20080402_210000 715.568 719.730
+VOA_INTNLNEWS_CMN_20080402_210000 719.730 721.761
+VOA_INTNLNEWS_CMN_20080402_210000 721.761 734.686
+VOA_INTNLNEWS_CMN_20080402_210000 734.686 738.562
+VOA_INTNLNEWS_CMN_20080402_210000 745.020 758.410
+VOA_INTNLNEWS_CMN_20080402_210000 758.410 773.659
+VOA_INTNLNEWS_CMN_20080402_210000 773.659 778.739
+VOA_INTNLNEWS_CMN_20080402_210000 778.739 786.284
+VOA_INTNLNEWS_CMN_20080402_210000 802.384 807.363
+VOA_INTNLNEWS_CMN_20080402_210000 807.363 816.378
+VOA_INTNLNEWS_CMN_20080402_210000 821.767 826.288
+VOA_INTNLNEWS_CMN_20080402_210000 841.318 852.754
+VOA_INTNLNEWS_CMN_20080402_210000 852.754 856.503
+VOA_INTNLNEWS_CMN_20080402_210000 856.503 858.941
+VOA_INTNLNEWS_CMN_20080402_210000 868.034 869.300
+VOA_INTNLNEWS_CMN_20080402_210000 869.300 881.254
+VOA_INTNLNEWS_CMN_20080402_210000 881.254 884.568
+VOA_INTNLNEWS_CMN_20080402_210000 896.069 913.723
+VOA_INTNLNEWS_CMN_20080402_210000 913.723 925.820
+VOA_INTNLNEWS_CMN_20080402_210000 941.707 947.865
+VOA_INTNLNEWS_CMN_20080402_210000 952.085 960.000
+CCTV1_30MINNEWS_CMN_20080329_115901 73.737 77.049
+CCTV1_30MINNEWS_CMN_20080329_115901 77.049 82.711
+CCTV1_30MINNEWS_CMN_20080329_115901 82.711 87.201
+CCTV1_30MINNEWS_CMN_20080329_115901 87.201 95.233
+CCTV1_30MINNEWS_CMN_20080329_115901 98.890 99.733
+CCTV1_30MINNEWS_CMN_20080329_115901 99.733 102.687
+CCTV1_30MINNEWS_CMN_20080329_115901 102.687 110.877
+CCTV1_30MINNEWS_CMN_20080329_115901 110.877 114.392
+CCTV1_30MINNEWS_CMN_20080329_115901 119.220 126.939
+CCTV1_30MINNEWS_CMN_20080329_115901 126.939 133.564
+CCTV1_30MINNEWS_CMN_20080329_115901 133.564 142.751
+CCTV1_30MINNEWS_CMN_20080329_115901 153.986 158.408
+CCTV1_30MINNEWS_CMN_20080329_115901 158.408 172.456
+CCTV1_30MINNEWS_CMN_20080329_115901 182.995 194.276
+CCTV1_30MINNEWS_CMN_20080329_115901 194.276 202.276
+CCTV1_30MINNEWS_CMN_20080329_115901 202.276 215.833
+CCTV1_30MINNEWS_CMN_20080329_115901 228.865 237.145
+CCTV1_30MINNEWS_CMN_20080329_115901 237.145 248.863
+CCTV1_30MINNEWS_CMN_20080329_115901 248.863 259.941
+CCTV1_30MINNEWS_CMN_20080329_115901 259.941 266.238
+CCTV1_30MINNEWS_CMN_20080329_115901 266.238 274.238
+CCTV1_30MINNEWS_CMN_20080329_115901 274.238 287.286
+CCTV1_30MINNEWS_CMN_20080329_115901 287.286 295.099
+CCTV1_30MINNEWS_CMN_20080329_115901 295.099 306.801
+CCTV1_30MINNEWS_CMN_20080329_115901 306.801 326.598
+CCTV1_30MINNEWS_CMN_20080329_115901 341.614 347.379
+CCTV1_30MINNEWS_CMN_20080329_115901 347.379 357.614
+CCTV1_30MINNEWS_CMN_20080329_115901 357.614 363.676
+CCTV1_30MINNEWS_CMN_20080329_115901 363.676 373.630
+CCTV1_30MINNEWS_CMN_20080329_115901 373.630 384.913
+CCTV1_30MINNEWS_CMN_20080329_115901 384.913 391.115
+CCTV1_30MINNEWS_CMN_20080329_115901 399.272 404.884
+CCTV1_30MINNEWS_CMN_20080329_115901 404.884 411.860
+CCTV1_30MINNEWS_CMN_20080329_115901 480.963 493.542
+CCTV1_30MINNEWS_CMN_20080329_115901 493.542 502.400
+CCTV1_30MINNEWS_CMN_20080329_115901 505.334 507.256
+CCTV1_30MINNEWS_CMN_20080329_115901 507.256 511.100
+CCTV1_30MINNEWS_CMN_20080329_115901 511.100 517.459
+CCTV1_30MINNEWS_CMN_20080329_115901 517.459 526.599
+CCTV1_30MINNEWS_CMN_20080329_115901 526.599 537.850
+CCTV1_30MINNEWS_CMN_20080329_115901 537.850 546.377
+CCTV1_30MINNEWS_CMN_20080329_115901 590.276 605.433
+CCTV1_30MINNEWS_CMN_20080329_115901 605.433 616.324
+CCTV1_30MINNEWS_CMN_20080329_115901 616.324 635.762
+CCTV1_30MINNEWS_CMN_20080329_115901 635.762 640.668
+CCTV1_30MINNEWS_CMN_20080329_115901 640.668 646.683
+CCTV1_30MINNEWS_CMN_20080329_115901 646.683 654.121
+CCTV1_30MINNEWS_CMN_20080329_115901 668.982 681.060
+CCTV1_30MINNEWS_CMN_20080329_115901 681.060 690.889
+CCTV1_30MINNEWS_CMN_20080329_115901 690.889 705.174
+CCTV1_30MINNEWS_CMN_20080329_115901 705.174 718.168
+CCTV1_30MINNEWS_CMN_20080329_115901 718.168 735.215
+CCTV1_30MINNEWS_CMN_20080329_115901 735.215 749.948
+CCTV1_30MINNEWS_CMN_20080329_115901 749.948 755.824
+CCTV1_30MINNEWS_CMN_20080329_115901 755.824 763.433
+CCTV1_30MINNEWS_CMN_20080329_115901 763.433 783.448
+CCTV1_30MINNEWS_CMN_20080329_115901 789.386 804.901
+CCTV1_30MINNEWS_CMN_20080329_115901 804.901 815.636
+CCTV1_30MINNEWS_CMN_20080329_115901 822.120 826.994
+CCTV1_30MINNEWS_CMN_20080329_115901 832.682 842.168
+CCTV1_30MINNEWS_CMN_20080329_115901 842.168 855.058
+CCTV1_30MINNEWS_CMN_20080329_115901 855.058 859.479
+CCTV1_30MINNEWS_CMN_20080329_115901 862.713 870.541
+CCTV1_30MINNEWS_CMN_20080329_115901 870.541 874.744
+CCTV1_30MINNEWS_CMN_20080329_115901 874.744 878.042
+CCTV1_30MINNEWS_CMN_20080329_115901 885.433 893.088
+CCTV1_30MINNEWS_CMN_20080329_115901 893.088 899.478
+CCTV1_30MINNEWS_CMN_20080329_115901 899.478 909.024
+CCTV1_30MINNEWS_CMN_20080329_115901 909.024 912.305
+CCTV1_30MINNEWS_CMN_20080329_115901 912.305 921.181
+CCTV1_30MINNEWS_CMN_20080329_115901 932.742 945.490
+CCTV1_30MINNEWS_CMN_20080329_115901 945.490 955.677
+CCTV1_30MINNEWS_CMN_20080329_115901 955.677 961.792
+CCTV1_30MINNEWS_CMN_20080329_115901 961.792 968.605
+CCTV1_30MINNEWS_CMN_20080329_115901 1030.620 1042.119
+CCTV1_30MINNEWS_CMN_20080329_115901 1059.120 1073.629
+CCTV1_30MINNEWS_CMN_20080329_115901 1095.784 1108.955
+CCTV1_30MINNEWS_CMN_20080329_115901 1108.955 1117.456
+CCTV1_30MINNEWS_CMN_20080329_115901 1245.444 1246.740
+CCTV1_30MINNEWS_CMN_20080329_115901 1246.740 1255.490
+CCTV1_30MINNEWS_CMN_20080329_115901 1255.490 1262.537
+CCTV1_30MINNEWS_CMN_20080329_115901 1266.865 1276.943
+CCTV1_30MINNEWS_CMN_20080329_115901 1276.943 1297.152
+CCTV1_30MINNEWS_CMN_20080329_115901 1304.601 1311.062
+CCTV1_30MINNEWS_CMN_20080329_115901 1322.392 1332.458
+CCTV1_30MINNEWS_CMN_20080329_115901 1350.128 1362.927
+CCTV1_30MINNEWS_CMN_20080329_115901 1362.927 1373.530
+CCTV1_30MINNEWS_CMN_20080329_115901 1373.530 1380.390
+CCTV1_30MINNEWS_CMN_20080329_115901 1380.390 1388.563
+CCTV1_30MINNEWS_CMN_20080329_115901 1388.563 1397.892
+CCTV1_30MINNEWS_CMN_20080329_115901 1404.298 1409.143
+CCTV1_30MINNEWS_CMN_20080329_115901 1409.143 1415.690
+CCTV1_30MINNEWS_CMN_20080329_115901 1415.690 1421.940
+CCTV1_30MINNEWS_CMN_20080329_115901 1421.940 1426.878
+CCTV1_30MINNEWS_CMN_20080329_115901 1426.878 1432.096
+CCTV1_30MINNEWS_CMN_20080329_115901 1432.096 1439.315
+CCTV1_30MINNEWS_CMN_20080329_115901 1439.315 1445.503
+CCTV1_30MINNEWS_CMN_20080329_115901 1445.503 1452.112
+CCTV1_30MINNEWS_CMN_20080329_115901 1452.112 1458.815
+CCTV1_30MINNEWS_CMN_20080329_115901 1458.815 1463.893
+CCTV1_30MINNEWS_CMN_20080329_115901 1477.876 1490.923
+CCTV1_30MINNEWS_CMN_20080329_115901 1490.923 1497.470
+CCTV1_30MINNEWS_CMN_20080329_115901 1497.470 1509.517
+CCTV1_30MINNEWS_CMN_20080329_115901 1509.517 1519.860
+CCTV1_30MINNEWS_CMN_20080329_115901 1519.860 1524.232
+CCTV1_30MINNEWS_CMN_20080329_115901 1524.232 1531.575
+CCTV1_30MINNEWS_CMN_20080329_115901 1531.575 1536.919
+CCTV1_30MINNEWS_CMN_20080329_115901 1536.919 1550.184
+CCTV1_30MINNEWS_CMN_20080329_115901 1550.184 1558.341
+CCTV1_30MINNEWS_CMN_20080329_115901 1558.341 1565.857
+CCTV1_30MINNEWS_CMN_20080329_115901 1565.857 1572.576
+CCTV1_30MINNEWS_CMN_20080329_115901 1572.576 1580.029
+CCTV1_30MINNEWS_CMN_20080329_115901 1580.029 1586.342
+CCTV1_30MINNEWS_CMN_20080329_115901 1586.342 1593.825
+CCTV1_30MINNEWS_CMN_20080329_115901 1593.825 1601.136
+CCTV1_30MINNEWS_CMN_20080329_115901 1601.136 1606.121
+CCTV1_30MINNEWS_CMN_20080329_115901 1606.121 1611.402
+CCTV1_30MINNEWS_CMN_20080329_115901 1616.026 1622.962
+CCTV1_30MINNEWS_CMN_20080329_115901 1844.920 1846.827
+CCTV1_30MINNEWS_CMN_20080329_115901 1848.234 1849.093
+CCTV2_NEWSLIST_CMN_20080415_114902 118.872 124.153
+CCTV2_NEWSLIST_CMN_20080415_114902 124.153 132.330
+CCTV2_NEWSLIST_CMN_20080415_114902 151.335 153.291
+CCTV2_NEWSLIST_CMN_20080415_114902 160.886 162.996
+CCTV2_NEWSLIST_CMN_20080415_114902 162.996 166.512
+CCTV2_NEWSLIST_CMN_20080415_114902 169.262 172.317
+CCTV2_NEWSLIST_CMN_20080415_114902 172.317 174.661
+CCTV2_NEWSLIST_CMN_20080415_114902 174.661 177.129
+CCTV2_NEWSLIST_CMN_20080415_114902 177.129 192.098
+CCTV2_NEWSLIST_CMN_20080415_114902 192.098 202.081
+CCTV2_NEWSLIST_CMN_20080415_114902 202.081 212.719
+CCTV2_NEWSLIST_CMN_20080415_114902 212.719 215.719
+CCTV2_NEWSLIST_CMN_20080415_114902 215.719 221.026
+CCTV2_NEWSLIST_CMN_20080415_114902 221.026 227.468
+CCTV2_NEWSLIST_CMN_20080415_114902 227.468 232.186
+CCTV2_NEWSLIST_CMN_20080415_114902 232.186 237.685
+CCTV2_NEWSLIST_CMN_20080415_114902 237.685 248.085
+CCTV2_NEWSLIST_CMN_20080415_114902 248.085 253.039
+CCTV2_NEWSLIST_CMN_20080415_114902 253.039 256.648
+CCTV2_NEWSLIST_CMN_20080415_114902 256.648 262.523
+CCTV2_NEWSLIST_CMN_20080415_114902 262.523 270.695
+CCTV2_NEWSLIST_CMN_20080415_114902 270.695 279.708
+CCTV2_NEWSLIST_CMN_20080415_114902 279.708 284.803
+CCTV2_NEWSLIST_CMN_20080415_114902 284.803 289.584
+CCTV2_NEWSLIST_CMN_20080415_114902 289.584 303.614
+CCTV2_NEWSLIST_CMN_20080415_114902 303.614 308.741
+CCTV2_NEWSLIST_CMN_20080415_114902 308.741 315.460
+CCTV2_NEWSLIST_CMN_20080415_114902 315.460 331.672
+CCTV2_NEWSLIST_CMN_20080415_114902 331.672 348.261
+CCTV2_NEWSLIST_CMN_20080415_114902 348.261 354.870
+CCTV2_NEWSLIST_CMN_20080415_114902 354.870 370.665
+CCTV2_NEWSLIST_CMN_20080415_114902 370.665 382.167
+CCTV2_NEWSLIST_CMN_20080415_114902 382.167 398.167
+CCTV2_NEWSLIST_CMN_20080415_114902 409.360 415.515
+CCTV2_NEWSLIST_CMN_20080415_114902 415.515 418.773
+CCTV2_NEWSLIST_CMN_20080415_114902 424.625 429.204
+CCTV2_NEWSLIST_CMN_20080415_114902 429.204 433.545
+CCTV2_NEWSLIST_CMN_20080415_114902 433.545 440.273
+CCTV2_NEWSLIST_CMN_20080415_114902 440.273 453.168
+CCTV2_NEWSLIST_CMN_20080415_114902 459.961 469.841
+CCTV2_NEWSLIST_CMN_20080415_114902 469.841 471.216
+CCTV2_NEWSLIST_CMN_20080415_114902 471.216 477.701
+CCTV2_NEWSLIST_CMN_20080415_114902 477.701 485.001
+CCTV2_NEWSLIST_CMN_20080415_114902 485.001 491.797
+CCTV2_NEWSLIST_CMN_20080415_114902 491.797 503.670
+CCTV2_NEWSLIST_CMN_20080415_114902 503.670 508.640
+CCTV2_NEWSLIST_CMN_20080415_114902 516.808 529.315
+CCTV2_NEWSLIST_CMN_20080415_114902 529.315 537.036
+CCTV2_NEWSLIST_CMN_20080415_114902 537.036 548.101
+CCTV2_NEWSLIST_CMN_20080415_114902 548.101 552.509
+CCTV2_NEWSLIST_CMN_20080415_114902 552.509 560.945
+CCTV2_NEWSLIST_CMN_20080415_114902 593.353 601.347
+CCTV2_NEWSLIST_CMN_20080415_114902 601.347 604.683
+CCTV2_NEWSLIST_CMN_20080415_114902 604.683 607.324
+CCTV2_NEWSLIST_CMN_20080415_114902 607.324 618.086
+CCTV2_NEWSLIST_CMN_20080415_114902 618.086 622.098
+CCTV2_NEWSLIST_CMN_20080415_114902 622.098 627.645
+CCTV2_NEWSLIST_CMN_20080415_114902 627.645 632.925
+CCTV2_NEWSLIST_CMN_20080415_114902 632.925 638.422
+CCTV2_NEWSLIST_CMN_20080415_114902 638.422 647.797
+CCTV2_NEWSLIST_CMN_20080415_114902 647.797 659.030
+CCTV2_NEWSLIST_CMN_20080415_114902 659.030 664.373
+CCTV2_NEWSLIST_CMN_20080415_114902 664.373 668.529
+CCTV2_NEWSLIST_CMN_20080415_114902 668.529 675.404
+CCTV2_NEWSLIST_CMN_20080415_114902 675.404 679.185
+CCTV2_NEWSLIST_CMN_20080415_114902 679.185 685.107
+CCTV2_NEWSLIST_CMN_20080415_114902 685.107 691.372
+CCTV2_NEWSLIST_CMN_20080415_114902 691.372 695.168
+CCTV2_NEWSLIST_CMN_20080415_114902 695.168 705.028
+CCTV2_NEWSLIST_CMN_20080415_114902 705.028 713.882
+CCTV2_NEWSLIST_CMN_20080415_114902 736.008 749.039
+CCTV2_NEWSLIST_CMN_20080415_114902 771.900 778.431
+CCTV2_NEWSLIST_CMN_20080415_114902 778.431 784.603
+CCTV2_NEWSLIST_CMN_20080415_114902 784.603 794.727
+CCTV2_NEWSLIST_CMN_20080415_114902 794.727 801.443
+CCTV2_NEWSLIST_CMN_20080415_114902 801.443 808.679
+CCTV2_NEWSLIST_CMN_20080415_114902 808.679 814.381
+CCTV2_NEWSLIST_CMN_20080415_114902 848.253 860.213
+CCTV2_NEWSLIST_CMN_20080415_114902 860.213 870.113
+CCTV2_NEWSLIST_CMN_20080415_114902 870.113 873.941
+CCTV2_NEWSLIST_CMN_20080415_114902 873.941 878.660
+CCTV2_NEWSLIST_CMN_20080415_114902 878.660 886.067
+CCTV2_NEWSLIST_CMN_20080415_114902 886.067 891.098
+CCTV2_NEWSLIST_CMN_20080415_114902 899.375 906.766
+CCTV2_NEWSLIST_CMN_20080415_114902 927.686 931.091
+CCTV2_NEWSLIST_CMN_20080415_114902 931.091 944.248
+CCTV2_NEWSLIST_CMN_20080415_114902 944.248 958.248
+CCTV2_NEWSLIST_CMN_20080415_114902 958.248 981.526
+CCTV2_NEWSLIST_CMN_20080415_114902 981.526 990.776
+CCTV2_NEWSLIST_CMN_20080415_114902 990.776 1001.721
+CCTV2_NEWSLIST_CMN_20080415_114902 1010.787 1018.240
+CCTV2_NEWSLIST_CMN_20080415_114902 1018.240 1025.350
+CCTV2_NEWSLIST_CMN_20080415_114902 1025.350 1034.053
+CCTV2_NEWSLIST_CMN_20080415_114902 1058.628 1067.316
+CCTV2_NEWSLIST_CMN_20080415_114902 1067.316 1078.332
+CCTV2_NEWSLIST_CMN_20080415_114902 1078.332 1088.216
+CCTV2_NEWSLIST_CMN_20080415_114902 1088.216 1097.465
+CCTV2_NEWSLIST_CMN_20080415_114902 1097.465 1108.577
+CCTV2_NEWSLIST_CMN_20080415_114902 1108.577 1117.375
+CCTV2_NEWSLIST_CMN_20080415_114902 1124.037 1146.487
+CCTV2_NEWSLIST_CMN_20080415_114902 1146.487 1151.900
+CCTV2_NEWSLIST_CMN_20080415_114902 1162.203 1172.328
+CCTV2_NEWSLIST_CMN_20080415_114902 1175.196 1190.445
+CCTV2_NEWSLIST_CMN_20080415_114902 1190.445 1200.068
+CCTV2_NEWSLIST_CMN_20080415_114902 1200.068 1204.330
+CCTV2_NEWSLIST_CMN_20080415_114902 1204.330 1211.175
+CCTV2_NEWSLIST_CMN_20080415_114902 1211.175 1215.930
+CCTV2_NEWSLIST_CMN_20080415_114902 1215.930 1224.975
+CCTV2_NEWSLIST_CMN_20080415_114902 1224.975 1231.217
+CCTV2_NEWSLIST_CMN_20080415_114902 1231.217 1236.786
+CCTV2_NEWSLIST_CMN_20080415_114902 1236.786 1244.407
+CCTV2_NEWSLIST_CMN_20080415_114902 1244.407 1255.256
+CCTV2_NEWSLIST_CMN_20080415_114902 1255.256 1264.259
+CCTV2_NEWSLIST_CMN_20080415_114902 1320.930 1330.010
+CCTV2_NEWSLIST_CMN_20080415_114902 1330.010 1341.163
+CCTV2_NEWSLIST_CMN_20080415_114902 1341.163 1347.423
+CCTV2_NEWSLIST_CMN_20080415_114902 1347.423 1353.456
+CCTV2_NEWSLIST_CMN_20080415_114902 1353.456 1358.986
+CCTV2_NEWSLIST_CMN_20080415_114902 1358.986 1376.555
+CCTV2_NEWSLIST_CMN_20080415_114902 1376.555 1384.025
+CCTV2_NEWSLIST_CMN_20080415_114902 1394.014 1398.897
+CCTV2_NEWSLIST_CMN_20080415_114902 1398.897 1404.581
+CCTV2_NEWSLIST_CMN_20080415_114902 1404.581 1419.769
+CCTV2_NEWSLIST_CMN_20080415_114902 1427.902 1430.744
+CCTV2_NEWSLIST_CMN_20080415_114902 1430.744 1440.194
+CCTV2_NEWSLIST_CMN_20080415_114902 1440.194 1445.664
+CCTV2_NEWSLIST_CMN_20080415_114902 1445.664 1449.509
+CCTV2_NEWSLIST_CMN_20080415_114902 1449.509 1458.310
+CCTV2_NEWSLIST_CMN_20080415_114902 1458.310 1464.952
+CCTV2_NEWSLIST_CMN_20080415_114902 1464.952 1471.922
+CCTV2_NEWSLIST_CMN_20080415_114902 1471.922 1478.460
+CCTV2_NEWSLIST_CMN_20080415_114902 1494.346 1508.019
+CCTV2_NEWSLIST_CMN_20080415_114902 1508.019 1512.594
+CCTV2_NEWSLIST_CMN_20080415_114902 1520.223 1532.542
+CCTV2_NEWSLIST_CMN_20080415_114902 1540.155 1543.749
+CCTV2_NEWSLIST_CMN_20080415_114902 1543.749 1561.353
+CCTV2_NEWSLIST_CMN_20080415_114902 1561.353 1566.587
+CCTV2_NEWSLIST_CMN_20080415_114902 1572.348 1577.958
+CCTV2_NEWSLIST_CMN_20080415_114902 1577.958 1583.535
+CCTV2_NEWSLIST_CMN_20080415_114902 1583.535 1589.441
+CCTV2_NEWSLIST_CMN_20080415_114902 1589.441 1595.144
+CCTV2_NEWSLIST_CMN_20080415_114902 1595.144 1605.508
+CCTV2_NEWSLIST_CMN_20080415_114902 1605.508 1614.963
+CCTV2_NEWSLIST_CMN_20080415_114902 1614.963 1620.046
+CCTV2_NEWSLIST_CMN_20080415_114902 1620.046 1621.390
+CCTV2_NEWSLIST_CMN_20080415_114902 1621.390 1629.030
+CCTV2_NEWSLIST_CMN_20080415_114902 1629.030 1638.764
+CCTV2_NEWSLIST_CMN_20080415_114902 1643.812 1649.619
+CCTV2_NEWSLIST_CMN_20080415_114902 1649.619 1652.682
+CCTV2_NEWSLIST_CMN_20080415_114902 1652.682 1661.793
+CCTV2_NEWSLIST_CMN_20080415_114902 1661.793 1664.012
+CCTV2_NEWSLIST_CMN_20080415_114902 1664.012 1671.860
+CCTV2_NEWSLIST_CMN_20080415_114902 1671.860 1679.439
+CCTV2_NEWSLIST_CMN_20080415_114902 1679.439 1685.252
+CCTV2_NEWSLIST_CMN_20080415_114902 1685.252 1695.327
+CCTV2_NEWSLIST_CMN_20080415_114902 1708.134 1715.560
+CCTV2_NEWSLIST_CMN_20080415_114902 1715.560 1726.357
+CCTV2_NEWSLIST_CMN_20080415_114902 1726.357 1739.099
+CCTV2_NEWSLIST_CMN_20080415_114902 1739.099 1749.674
+CCTV2_NEWSLIST_CMN_20080415_114902 1749.674 1754.988
+CCTV2_NEWSLIST_CMN_20080415_114902 1754.988 1761.675
+CCTV2_NEWSLIST_CMN_20080415_114902 1761.675 1768.414
+CCTV2_NEWSLIST_CMN_20080415_114902 1768.414 1776.374
+CCTV2_NEWSLIST_CMN_20080415_114902 1776.374 1780.460
+CCTV2_NEWSLIST_CMN_20080415_114902 1780.460 1783.503
+CCTV2_NEWSLIST_CMN_20080415_114902 1783.503 1789.987
+CCTV2_NEWSLIST_CMN_20080415_114902 1789.987 1801.730
+CCTV2_NEWSLIST_CMN_20080415_114902 1801.730 1812.108
+CCTV2_NEWSLIST_CMN_20080415_114902 1812.108 1816.140
+CCTV2_NEWSLIST_CMN_20080415_114902 1816.140 1825.931
+CCTV2_NEWSLIST_CMN_20080415_114902 1825.931 1843.068
+CCTV2_NEWSLIST_CMN_20080415_114902 1843.068 1856.399
+CCTV2_NEWSLIST_CMN_20080415_114902 1856.399 1860.242
+CCTV2_NEWSLIST_CMN_20080415_114902 1860.242 1865.854
+CCTV2_NEWSLIST_CMN_20080415_114902 1865.854 1874.071
+CCTV2_NEWSLIST_CMN_20080415_114902 1874.071 1881.684
+CCTV2_NEWSLIST_CMN_20080415_114902 1881.684 1887.981
+CCTV2_NEWSLIST_CMN_20080415_114902 1887.981 1892.840
+CCTV2_NEWSLIST_CMN_20080415_114902 1892.840 1899.024
+CCTV2_NEWSLIST_CMN_20080415_114902 1899.024 1902.085
+CCTV2_NEWSLIST_CMN_20080415_114902 1955.315 1964.721
+CCTV2_NEWSLIST_CMN_20080415_114902 1964.721 1971.744
+CCTV2_NEWSLIST_CMN_20080415_114902 1971.744 1975.479
+CCTV2_NEWSLIST_CMN_20080415_114902 1995.780 2003.435
+CCTV2_NEWSLIST_CMN_20080415_114902 2019.123 2023.966
+CCTV2_NEWSLIST_CMN_20080415_114902 2023.966 2031.920
+CCTV2_NEWSLIST_CMN_20080415_114902 2031.920 2041.007
+CCTV2_NEWSLIST_CMN_20080415_114902 2041.007 2047.913
+CCTV2_NEWSLIST_CMN_20080415_114902 2047.913 2055.871
+CCTV2_NEWSLIST_CMN_20080415_114902 2055.871 2065.637
+CCTV2_NEWSLIST_CMN_20080415_114902 2084.090 2092.076
+CCTV2_NEWSLIST_CMN_20080415_114902 2092.076 2107.907
+CCTV2_NEWSLIST_CMN_20080415_114902 2107.907 2115.867
+CCTV2_NEWSLIST_CMN_20080415_114902 2115.867 2127.240
+CCTV2_NEWSLIST_CMN_20080415_114902 2127.240 2128.881
+CCTV2_NEWSLIST_CMN_20080415_114902 2141.889 2149.788
+CCTV2_NEWSLIST_CMN_20080415_114902 2158.202 2168.465
+CCTV2_NEWSLIST_CMN_20080415_114902 2168.465 2181.563
+CCTV2_NEWSLIST_CMN_20080415_114902 2181.563 2184.971
+CCTV2_NEWSLIST_CMN_20080415_114902 2184.971 2187.916
+CCTV2_NEWSLIST_CMN_20080415_114902 2196.459 2200.716
+CCTV2_NEWSLIST_CMN_20080415_114902 2208.655 2212.497
+CCTV2_NEWSLIST_CMN_20080415_114902 2212.497 2222.233
+CCTV2_NEWSLIST_CMN_20080415_114902 2232.969 2239.828
+CCTV2_NEWSLIST_CMN_20080415_114902 2239.828 2247.208
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 212.973 218.392
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 235.071 249.733
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 301.108 320.476
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 320.476 349.126
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 349.126 378.734
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 378.734 388.225
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 418.399 435.853
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 446.061 456.121
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 456.121 469.839
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 498.626 511.021
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 532.605 555.788
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 555.788 576.194
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 643.013 655.558
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 686.449 706.596
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 726.144 737.551
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 748.843 762.548
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 885.263 896.371
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 954.682 966.060
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 989.696 997.660
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1012.148 1022.118
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1022.118 1052.295
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1068.699 1082.442
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1082.442 1093.131
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1093.131 1105.287
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1105.287 1116.095
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1116.095 1133.787
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1158.381 1183.877
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1183.877 1200.015
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1200.015 1228.134
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1228.134 1241.963
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1241.963 1257.472
+CCTV2_ECON30MIN_CMN_20080426_213501 28.410 33.941
+CCTV2_ECON30MIN_CMN_20080426_213501 39.370 46.871
+CCTV2_ECON30MIN_CMN_20080426_213501 50.293 52.980
+CCTV2_ECON30MIN_CMN_20080426_213501 231.537 233.662
+CCTV2_ECON30MIN_CMN_20080426_213501 233.662 238.224
+CCTV2_ECON30MIN_CMN_20080426_213501 238.224 247.521
+CCTV2_ECON30MIN_CMN_20080426_213501 247.521 257.755
+CCTV2_ECON30MIN_CMN_20080426_213501 257.755 266.036
+CCTV2_ECON30MIN_CMN_20080426_213501 266.036 276.957
+CCTV2_ECON30MIN_CMN_20080426_213501 289.346 294.549
+CCTV2_ECON30MIN_CMN_20080426_213501 309.678 326.615
+CCTV2_ECON30MIN_CMN_20080426_213501 333.568 344.084
+CCTV2_ECON30MIN_CMN_20080426_213501 344.084 349.272
+CCTV2_ECON30MIN_CMN_20080426_213501 349.272 355.538
+CCTV2_ECON30MIN_CMN_20080426_213501 355.538 359.757
+CCTV2_ECON30MIN_CMN_20080426_213501 359.757 365.303
+CCTV2_ECON30MIN_CMN_20080426_213501 365.303 372.037
+CCTV2_ECON30MIN_CMN_20080426_213501 397.804 416.102
+CCTV2_ECON30MIN_CMN_20080426_213501 416.102 427.665
+CCTV2_ECON30MIN_CMN_20080426_213501 447.602 452.789
+CCTV2_ECON30MIN_CMN_20080426_213501 464.497 468.310
+CCTV2_ECON30MIN_CMN_20080426_213501 468.310 479.920
+CCTV2_ECON30MIN_CMN_20080426_213501 479.920 490.731
+CCTV2_ECON30MIN_CMN_20080426_213501 490.731 498.571
+CCTV2_ECON30MIN_CMN_20080426_213501 501.821 510.134
+CCTV2_ECON30MIN_CMN_20080426_213501 510.134 515.025
+CCTV2_ECON30MIN_CMN_20080426_213501 523.243 529.915
+CCTV2_ECON30MIN_CMN_20080426_213501 529.915 536.102
+CCTV2_ECON30MIN_CMN_20080426_213501 540.079 545.095
+CCTV2_ECON30MIN_CMN_20080426_213501 558.517 562.376
+CCTV2_ECON30MIN_CMN_20080426_213501 580.438 586.000
+CCTV2_ECON30MIN_CMN_20080426_213501 586.000 602.258
+CCTV2_ECON30MIN_CMN_20080426_213501 602.258 609.677
+CCTV2_ECON30MIN_CMN_20080426_213501 609.677 617.571
+CCTV2_ECON30MIN_CMN_20080426_213501 617.571 625.258
+CCTV2_ECON30MIN_CMN_20080426_213501 625.258 633.961
+CCTV2_ECON30MIN_CMN_20080426_213501 633.961 637.555
+CCTV2_ECON30MIN_CMN_20080426_213501 670.955 672.534
+CCTV2_ECON30MIN_CMN_20080426_213501 793.630 795.083
+CCTV2_ECON30MIN_CMN_20080426_213501 816.223 827.926
+CCTV2_ECON30MIN_CMN_20080426_213501 827.926 832.903
+CCTV2_ECON30MIN_CMN_20080426_213501 832.903 840.122
+CCTV2_ECON30MIN_CMN_20080426_213501 843.859 846.876
+CCTV2_ECON30MIN_CMN_20080426_213501 846.876 849.173
+CCTV2_ECON30MIN_CMN_20080426_213501 849.173 854.803
+CCTV2_ECON30MIN_CMN_20080426_213501 861.365 863.990
+CCTV2_ECON30MIN_CMN_20080426_213501 871.443 880.087
+CCTV2_ECON30MIN_CMN_20080426_213501 880.087 883.212
+CCTV2_ECON30MIN_CMN_20080426_213501 883.212 886.197
+CCTV2_ECON30MIN_CMN_20080426_213501 886.197 895.557
+CCTV2_ECON30MIN_CMN_20080426_213501 895.557 900.979
+CCTV2_ECON30MIN_CMN_20080426_213501 900.979 908.010
+CCTV2_ECON30MIN_CMN_20080426_213501 908.010 914.822
+CCTV2_ECON30MIN_CMN_20080426_213501 914.822 923.994
+CCTV2_ECON30MIN_CMN_20080426_213501 923.994 928.182
+CCTV2_ECON30MIN_CMN_20080426_213501 928.182 933.947
+CCTV2_ECON30MIN_CMN_20080426_213501 938.315 941.189
+CCTV2_ECON30MIN_CMN_20080426_213501 941.189 942.954
+CCTV2_ECON30MIN_CMN_20080426_213501 942.954 947.157
+CCTV2_ECON30MIN_CMN_20080426_213501 942.954 947.157
+CCTV2_ECON30MIN_CMN_20080426_213501 947.157 952.610
+CCTV2_ECON30MIN_CMN_20080426_213501 963.970 967.298
+CCTV2_ECON30MIN_CMN_20080426_213501 967.298 968.657
+CCTV2_ECON30MIN_CMN_20080426_213501 968.657 969.720
+CCTV2_ECON30MIN_CMN_20080426_213501 969.720 971.939
+CCTV2_ECON30MIN_CMN_20080426_213501 979.225 996.490
+CCTV2_ECON30MIN_CMN_20080426_213501 996.490 1001.599
+CCTV2_ECON30MIN_CMN_20080426_213501 1001.599 1012.756
+CCTV2_ECON30MIN_CMN_20080426_213501 1012.756 1019.584
+CCTV2_ECON30MIN_CMN_20080426_213501 1025.537 1033.631
+CCTV2_ECON30MIN_CMN_20080426_213501 1042.006 1046.849
+CCTV2_ECON30MIN_CMN_20080426_213501 1056.069 1066.585
+CCTV2_ECON30MIN_CMN_20080426_213501 1080.184 1093.482
+CCTV2_ECON30MIN_CMN_20080426_213501 1096.039 1114.914
+CCTV2_ECON30MIN_CMN_20080426_213501 1123.039 1132.914
+CCTV2_ECON30MIN_CMN_20080426_213501 1148.338 1159.682
+CCTV2_ECON30MIN_CMN_20080426_213501 1159.682 1167.212
+CCTV2_ECON30MIN_CMN_20080426_213501 1167.212 1170.946
+CCTV2_ECON30MIN_CMN_20080426_213501 1172.555 1173.212
+CCTV2_ECON30MIN_CMN_20080426_213501 1173.212 1180.814
+CCTV2_ECON30MIN_CMN_20080426_213501 1180.814 1186.893
+CCTV2_ECON30MIN_CMN_20080426_213501 1189.456 1200.659
+CCTV2_ECON30MIN_CMN_20080426_213501 1200.659 1211.393
+CCTV2_ECON30MIN_CMN_20080426_213501 1211.393 1229.254
+CCTV2_ECON30MIN_CMN_20080426_213501 1363.474 1364.771
+CCTV2_ECON30MIN_CMN_20080426_213501 1364.771 1374.880
+CCTV2_ECON30MIN_CMN_20080426_213501 1374.880 1382.661
+CCTV2_ECON30MIN_CMN_20080426_213501 1382.661 1392.458
+CCTV2_ECON30MIN_CMN_20080426_213501 1392.458 1409.020
+CCTV2_ECON30MIN_CMN_20080426_213501 1422.069 1431.476
+CCTV2_ECON30MIN_CMN_20080426_213501 1436.079 1437.594
+CCTV2_ECON30MIN_CMN_20080426_213501 1441.390 1449.000
+CCTV2_ECON30MIN_CMN_20080426_213501 1449.000 1456.985
+CCTV2_ECON30MIN_CMN_20080426_213501 1465.672 1486.121
+CCTV2_ECON30MIN_CMN_20080426_213501 1498.824 1508.243
+CCTV2_ECON30MIN_CMN_20080426_213501 1508.243 1514.914
+CCTV2_ECON30MIN_CMN_20080426_213501 1533.164 1544.664
+CCTV2_ECON30MIN_CMN_20080426_213501 1544.664 1562.154
+CCTV2_ECON30MIN_CMN_20080426_213501 1562.154 1580.066
+CCTV2_ECON30MIN_CMN_20080426_213501 1580.066 1592.035
+CCTV2_ECON30MIN_CMN_20080426_213501 1606.364 1620.586
+VOA_INTNLNEWS_CMN_20080407_210000 239.183 245.480
+VOA_INTNLNEWS_CMN_20080407_210000 245.480 247.152
+VOA_INTNLNEWS_CMN_20080407_210000 247.152 253.550
+VOA_INTNLNEWS_CMN_20080407_210000 253.550 257.726
+VOA_INTNLNEWS_CMN_20080407_210000 257.726 262.127
+VOA_INTNLNEWS_CMN_20080407_210000 268.106 273.032
+VOA_INTNLNEWS_CMN_20080407_210000 283.870 286.980
+VOA_INTNLNEWS_CMN_20080407_210000 286.980 291.308
+VOA_INTNLNEWS_CMN_20080407_210000 291.308 292.933
+VOA_INTNLNEWS_CMN_20080407_210000 292.933 302.099
+VOA_INTNLNEWS_CMN_20080407_210000 302.099 308.238
+VOA_INTNLNEWS_CMN_20080407_210000 316.189 318.830
+VOA_INTNLNEWS_CMN_20080407_210000 318.830 329.857
+VOA_INTNLNEWS_CMN_20080407_210000 329.857 347.078
+VOA_INTNLNEWS_CMN_20080407_210000 358.454 365.466
+VOA_INTNLNEWS_CMN_20080407_210000 374.472 378.239
+VOA_INTNLNEWS_CMN_20080407_210000 378.239 384.946
+VOA_INTNLNEWS_CMN_20080407_210000 384.946 390.880
+VOA_INTNLNEWS_CMN_20080407_210000 390.880 401.736
+VOA_INTNLNEWS_CMN_20080407_210000 416.156 422.324
+VOA_INTNLNEWS_CMN_20080407_210000 431.371 438.478
+VOA_INTNLNEWS_CMN_20080407_210000 438.478 443.602
+VOA_INTNLNEWS_CMN_20080407_210000 460.181 464.403
+VOA_INTNLNEWS_CMN_20080407_210000 464.403 473.719
+VOA_INTNLNEWS_CMN_20080407_210000 483.454 487.642
+VOA_INTNLNEWS_CMN_20080407_210000 487.642 491.330
+VOA_INTNLNEWS_CMN_20080407_210000 498.190 505.221
+VOA_INTNLNEWS_CMN_20080407_210000 529.783 542.517
+VOA_INTNLNEWS_CMN_20080407_210000 542.517 550.851
+VOA_INTNLNEWS_CMN_20080407_210000 558.289 568.778
+VOA_INTNLNEWS_CMN_20080407_210000 568.778 576.466
+VOA_INTNLNEWS_CMN_20080407_210000 576.466 585.133
+VOA_INTNLNEWS_CMN_20080407_210000 585.133 588.517
+VOA_INTNLNEWS_CMN_20080407_210000 596.631 602.859
+VOA_INTNLNEWS_CMN_20080407_210000 602.859 608.984
+VOA_INTNLNEWS_CMN_20080407_210000 608.984 611.889
+VOA_INTNLNEWS_CMN_20080407_210000 619.451 629.780
+VOA_INTNLNEWS_CMN_20080407_210000 629.780 637.812
+VOA_INTNLNEWS_CMN_20080407_210000 637.812 643.069
+VOA_INTNLNEWS_CMN_20080407_210000 643.069 651.321
+VOA_INTNLNEWS_CMN_20080407_210000 666.838 672.101
+VOA_INTNLNEWS_CMN_20080407_210000 672.101 676.726
+VOA_INTNLNEWS_CMN_20080407_210000 680.382 707.262
+VOA_INTNLNEWS_CMN_20080407_210000 707.262 719.937
+VOA_INTNLNEWS_CMN_20080407_210000 719.937 723.671
+VOA_INTNLNEWS_CMN_20080407_210000 723.671 737.076
+VOA_INTNLNEWS_CMN_20080407_210000 737.076 741.295
+VOA_INTNLNEWS_CMN_20080407_210000 755.427 763.332
+VOA_INTNLNEWS_CMN_20080407_210000 773.364 782.447
+VOA_INTNLNEWS_CMN_20080407_210000 782.447 791.792
+VOA_INTNLNEWS_CMN_20080407_210000 805.239 807.598
+VOA_INTNLNEWS_CMN_20080407_210000 807.598 814.402
+VOA_INTNLNEWS_CMN_20080407_210000 814.402 821.717
+VOA_INTNLNEWS_CMN_20080407_210000 821.717 829.388
+VOA_INTNLNEWS_CMN_20080407_210000 841.458 850.089
+VOA_INTNLNEWS_CMN_20080407_210000 850.089 858.083
+VOA_INTNLNEWS_CMN_20080407_210000 874.398 883.260
+VOA_INTNLNEWS_CMN_20080407_210000 883.260 888.318
+VOA_INTNLNEWS_CMN_20080407_210000 888.318 899.500
+VOA_INTNLNEWS_CMN_20080407_210000 899.500 908.781
+VOA_INTNLNEWS_CMN_20080407_210000 908.781 911.891
+VOA_INTNLNEWS_CMN_20080407_210000 921.144 927.503
+VOA_INTNLNEWS_CMN_20080407_210000 927.503 929.784
+VOA_INTNLNEWS_CMN_20080407_210000 929.784 935.344
+VOA_INTNLNEWS_CMN_20080407_210000 939.610 940.938
+VOA_INTNLNEWS_CMN_20080407_210000 940.938 954.642
+VOA_INTNLNEWS_CMN_20080407_210000 954.642 960.000
+CCTV1_30MINNEWS_CMN_20080401_115901 85.222 89.924
+CCTV1_30MINNEWS_CMN_20080401_115901 89.924 95.829
+CCTV1_30MINNEWS_CMN_20080401_115901 95.829 97.422
+CCTV1_30MINNEWS_CMN_20080401_115901 102.422 104.000
+CCTV1_30MINNEWS_CMN_20080401_115901 104.000 105.875
+CCTV1_30MINNEWS_CMN_20080401_115901 109.032 118.016
+CCTV1_30MINNEWS_CMN_20080401_115901 118.016 126.157
+CCTV1_30MINNEWS_CMN_20080401_115901 136.095 145.486
+CCTV1_30MINNEWS_CMN_20080401_115901 145.486 150.205
+CCTV1_30MINNEWS_CMN_20080401_115901 150.205 154.564
+CCTV1_30MINNEWS_CMN_20080401_115901 154.564 163.595
+CCTV1_30MINNEWS_CMN_20080401_115901 163.595 170.470
+CCTV1_30MINNEWS_CMN_20080401_115901 170.470 188.345
+CCTV1_30MINNEWS_CMN_20080401_115901 188.345 198.344
+CCTV1_30MINNEWS_CMN_20080401_115901 203.819 213.616
+CCTV1_30MINNEWS_CMN_20080401_115901 220.490 232.554
+CCTV1_30MINNEWS_CMN_20080401_115901 246.908 250.142
+CCTV1_30MINNEWS_CMN_20080401_115901 250.142 255.127
+CCTV1_30MINNEWS_CMN_20080401_115901 302.818 317.366
+CCTV1_30MINNEWS_CMN_20080401_115901 317.366 327.835
+CCTV1_30MINNEWS_CMN_20080401_115901 327.835 343.663
+CCTV1_30MINNEWS_CMN_20080401_115901 343.663 363.819
+CCTV1_30MINNEWS_CMN_20080401_115901 363.819 371.726
+CCTV1_30MINNEWS_CMN_20080401_115901 378.585 392.147
+CCTV1_30MINNEWS_CMN_20080401_115901 408.912 413.584
+CCTV1_30MINNEWS_CMN_20080401_115901 419.818 425.162
+CCTV1_30MINNEWS_CMN_20080401_115901 456.905 466.562
+CCTV1_30MINNEWS_CMN_20080401_115901 466.562 470.655
+CCTV1_30MINNEWS_CMN_20080401_115901 529.681 534.759
+CCTV1_30MINNEWS_CMN_20080401_115901 547.275 559.868
+CCTV1_30MINNEWS_CMN_20080401_115901 565.837 571.415
+CCTV1_30MINNEWS_CMN_20080401_115901 571.415 577.602
+CCTV1_30MINNEWS_CMN_20080401_115901 577.602 585.805
+CCTV1_30MINNEWS_CMN_20080401_115901 585.805 597.804
+CCTV1_30MINNEWS_CMN_20080401_115901 597.804 607.071
+CCTV1_30MINNEWS_CMN_20080401_115901 607.071 616.259
+CCTV1_30MINNEWS_CMN_20080401_115901 624.491 626.413
+CCTV1_30MINNEWS_CMN_20080401_115901 636.069 645.226
+CCTV1_30MINNEWS_CMN_20080401_115901 645.226 649.758
+CCTV1_30MINNEWS_CMN_20080401_115901 649.758 659.382
+CCTV1_30MINNEWS_CMN_20080401_115901 659.382 671.784
+CCTV1_30MINNEWS_CMN_20080401_115901 671.784 677.556
+CCTV1_30MINNEWS_CMN_20080401_115901 677.556 684.025
+CCTV1_30MINNEWS_CMN_20080401_115901 707.748 713.936
+CCTV1_30MINNEWS_CMN_20080401_115901 713.936 718.764
+CCTV1_30MINNEWS_CMN_20080401_115901 723.544 731.932
+CCTV1_30MINNEWS_CMN_20080401_115901 731.932 744.026
+CCTV1_30MINNEWS_CMN_20080401_115901 744.026 754.933
+CCTV1_30MINNEWS_CMN_20080401_115901 754.933 764.152
+CCTV1_30MINNEWS_CMN_20080401_115901 764.152 773.428
+CCTV1_30MINNEWS_CMN_20080401_115901 820.385 831.979
+CCTV1_30MINNEWS_CMN_20080401_115901 841.400 851.994
+CCTV1_30MINNEWS_CMN_20080401_115901 851.994 862.322
+CCTV1_30MINNEWS_CMN_20080401_115901 862.322 874.791
+CCTV1_30MINNEWS_CMN_20080401_115901 874.791 885.995
+CCTV1_30MINNEWS_CMN_20080401_115901 885.995 889.370
+CCTV1_30MINNEWS_CMN_20080401_115901 892.855 901.621
+CCTV1_30MINNEWS_CMN_20080401_115901 911.893 914.710
+CCTV1_30MINNEWS_CMN_20080401_115901 911.893 914.710
+CCTV1_30MINNEWS_CMN_20080401_115901 914.710 917.397
+CCTV1_30MINNEWS_CMN_20080401_115901 917.397 924.053
+CCTV1_30MINNEWS_CMN_20080401_115901 931.038 936.263
+CCTV1_30MINNEWS_CMN_20080401_115901 931.038 936.263
+CCTV1_30MINNEWS_CMN_20080401_115901 936.263 939.179
+CCTV1_30MINNEWS_CMN_20080401_115901 939.179 945.444
+CCTV1_30MINNEWS_CMN_20080401_115901 945.444 947.240
+CCTV1_30MINNEWS_CMN_20080401_115901 947.240 953.901
+CCTV1_30MINNEWS_CMN_20080401_115901 953.901 963.386
+CCTV1_30MINNEWS_CMN_20080401_115901 963.386 968.344
+CCTV1_30MINNEWS_CMN_20080401_115901 968.344 970.704
+CCTV1_30MINNEWS_CMN_20080401_115901 970.704 978.282
+CCTV1_30MINNEWS_CMN_20080401_115901 978.282 988.954
+CCTV1_30MINNEWS_CMN_20080401_115901 1005.032 1013.407
+CCTV1_30MINNEWS_CMN_20080401_115901 1013.407 1024.837
+CCTV1_30MINNEWS_CMN_20080401_115901 1024.837 1034.786
+CCTV1_30MINNEWS_CMN_20080401_115901 1034.786 1040.630
+CCTV1_30MINNEWS_CMN_20080401_115901 1040.630 1046.676
+CCTV1_30MINNEWS_CMN_20080401_115901 1046.676 1056.661
+CCTV1_30MINNEWS_CMN_20080401_115901 1069.892 1075.122
+CCTV1_30MINNEWS_CMN_20080401_115901 1075.122 1079.746
+CCTV1_30MINNEWS_CMN_20080401_115901 1079.746 1091.308
+CCTV1_30MINNEWS_CMN_20080401_115901 1091.308 1098.371
+CCTV1_30MINNEWS_CMN_20080401_115901 1098.371 1101.075
+CCTV1_30MINNEWS_CMN_20080401_115901 1101.075 1108.388
+CCTV1_30MINNEWS_CMN_20080401_115901 1108.388 1119.623
+CCTV1_30MINNEWS_CMN_20080401_115901 1119.623 1126.187
+CCTV1_30MINNEWS_CMN_20080401_115901 1140.828 1146.876
+CCTV1_30MINNEWS_CMN_20080401_115901 1267.391 1269.171
+CCTV1_30MINNEWS_CMN_20080401_115901 1282.216 1294.325
+CCTV1_30MINNEWS_CMN_20080401_115901 1294.325 1307.232
+CCTV1_30MINNEWS_CMN_20080401_115901 1313.482 1318.857
+CCTV1_30MINNEWS_CMN_20080401_115901 1342.153 1349.573
+CCTV1_30MINNEWS_CMN_20080401_115901 1349.573 1364.808
+CCTV1_30MINNEWS_CMN_20080401_115901 1364.808 1376.246
+CCTV1_30MINNEWS_CMN_20080401_115901 1376.246 1387.886
+CCTV1_30MINNEWS_CMN_20080401_115901 1410.499 1418.593
+CCTV1_30MINNEWS_CMN_20080401_115901 1418.593 1426.749
+CCTV1_30MINNEWS_CMN_20080401_115901 1426.749 1431.161
+CCTV1_30MINNEWS_CMN_20080401_115901 1443.590 1450.496
+CCTV1_30MINNEWS_CMN_20080401_115901 1450.496 1465.137
+CCTV1_30MINNEWS_CMN_20080401_115901 1465.137 1467.012
+CCTV1_30MINNEWS_CMN_20080401_115901 1467.012 1482.019
+CCTV1_30MINNEWS_CMN_20080401_115901 1489.660 1491.582
+CCTV1_30MINNEWS_CMN_20080401_115901 1491.582 1504.181
+CCTV1_30MINNEWS_CMN_20080401_115901 1519.757 1522.929
+CCTV1_30MINNEWS_CMN_20080401_115901 1534.820 1544.318
+CCTV1_30MINNEWS_CMN_20080401_115901 1568.481 1580.521
+CCTV1_30MINNEWS_CMN_20080401_115901 1580.521 1592.336
+CCTV1_30MINNEWS_CMN_20080401_115901 1592.336 1603.430
+CCTV1_30MINNEWS_CMN_20080401_115901 1603.430 1612.086
+CCTV1_30MINNEWS_CMN_20080401_115901 1612.086 1620.405
+CCTV1_30MINNEWS_CMN_20080401_115901 1620.405 1634.986
+CCTV1_30MINNEWS_CMN_20080401_115901 1840.741 1843.569
+CCTV1_30MINNEWS_CMN_20080401_115901 1844.929 1845.460
+CCTV2_ECON30MIN_CMN_20080411_213502 226.331 228.722
+CCTV2_ECON30MIN_CMN_20080411_213502 228.722 252.270
+CCTV2_ECON30MIN_CMN_20080411_213502 252.270 260.640
+CCTV2_ECON30MIN_CMN_20080411_213502 260.640 266.796
+CCTV2_ECON30MIN_CMN_20080411_213502 266.796 270.624
+CCTV2_ECON30MIN_CMN_20080411_213502 270.624 273.249
+CCTV2_ECON30MIN_CMN_20080411_213502 273.249 289.630
+CCTV2_ECON30MIN_CMN_20080411_213502 289.630 292.248
+CCTV2_ECON30MIN_CMN_20080411_213502 292.248 317.922
+CCTV2_ECON30MIN_CMN_20080411_213502 317.922 324.940
+CCTV2_ECON30MIN_CMN_20080411_213502 324.940 328.652
+CCTV2_ECON30MIN_CMN_20080411_213502 328.652 330.839
+CCTV2_ECON30MIN_CMN_20080411_213502 330.839 337.152
+CCTV2_ECON30MIN_CMN_20080411_213502 337.152 352.512
+CCTV2_ECON30MIN_CMN_20080411_213502 352.512 370.100
+CCTV2_ECON30MIN_CMN_20080411_213502 370.100 378.838
+CCTV2_ECON30MIN_CMN_20080411_213502 378.838 387.213
+CCTV2_ECON30MIN_CMN_20080411_213502 387.213 402.945
+CCTV2_ECON30MIN_CMN_20080411_213502 414.951 419.264
+CCTV2_ECON30MIN_CMN_20080411_213502 419.264 436.465
+CCTV2_ECON30MIN_CMN_20080411_213502 436.465 459.294
+CCTV2_ECON30MIN_CMN_20080411_213502 459.294 469.013
+CCTV2_ECON30MIN_CMN_20080411_213502 469.013 474.887
+CCTV2_ECON30MIN_CMN_20080411_213502 474.887 484.437
+CCTV2_ECON30MIN_CMN_20080411_213502 484.437 492.967
+CCTV2_ECON30MIN_CMN_20080411_213502 504.045 511.576
+CCTV2_ECON30MIN_CMN_20080411_213502 511.576 525.608
+CCTV2_ECON30MIN_CMN_20080411_213502 525.608 529.438
+CCTV2_ECON30MIN_CMN_20080411_213502 529.438 539.031
+CCTV2_ECON30MIN_CMN_20080411_213502 539.031 553.022
+CCTV2_ECON30MIN_CMN_20080411_213502 563.708 572.630
+CCTV2_ECON30MIN_CMN_20080411_213502 572.630 585.004
+CCTV2_ECON30MIN_CMN_20080411_213502 585.004 600.690
+CCTV2_ECON30MIN_CMN_20080411_213502 600.690 609.373
+CCTV2_ECON30MIN_CMN_20080411_213502 609.373 614.607
+CCTV2_ECON30MIN_CMN_20080411_213502 614.607 624.398
+CCTV2_ECON30MIN_CMN_20080411_213502 630.178 640.601
+CCTV2_ECON30MIN_CMN_20080411_213502 640.601 646.211
+CCTV2_ECON30MIN_CMN_20080411_213502 646.211 658.909
+CCTV2_ECON30MIN_CMN_20080411_213502 658.909 668.457
+CCTV2_ECON30MIN_CMN_20080411_213502 668.457 674.442
+CCTV2_ECON30MIN_CMN_20080411_213502 674.442 697.143
+CCTV2_ECON30MIN_CMN_20080411_213502 697.143 703.089
+CCTV2_ECON30MIN_CMN_20080411_213502 703.089 716.961
+CCTV2_ECON30MIN_CMN_20080411_213502 716.961 732.134
+CCTV2_ECON30MIN_CMN_20080411_213502 732.134 749.540
+CCTV2_ECON30MIN_CMN_20080411_213502 749.540 761.493
+CCTV2_ECON30MIN_CMN_20080411_213502 761.493 770.366
+CCTV2_ECON30MIN_CMN_20080411_213502 770.366 778.417
+CCTV2_ECON30MIN_CMN_20080411_213502 926.746 937.355
+CCTV2_ECON30MIN_CMN_20080411_213502 946.260 967.308
+CCTV2_ECON30MIN_CMN_20080411_213502 967.308 976.011
+CCTV2_ECON30MIN_CMN_20080411_213502 976.011 985.306
+CCTV2_ECON30MIN_CMN_20080411_213502 985.306 987.754
+CCTV2_ECON30MIN_CMN_20080411_213502 987.754 990.176
+CCTV2_ECON30MIN_CMN_20080411_213502 993.364 996.443
+CCTV2_ECON30MIN_CMN_20080411_213502 996.443 1014.180
+CCTV2_ECON30MIN_CMN_20080411_213502 1014.180 1018.570
+CCTV2_ECON30MIN_CMN_20080411_213502 1018.570 1023.804
+CCTV2_ECON30MIN_CMN_20080411_213502 1023.804 1034.678
+CCTV2_ECON30MIN_CMN_20080411_213502 1034.678 1039.300
+CCTV2_ECON30MIN_CMN_20080411_213502 1039.300 1047.890
+CCTV2_ECON30MIN_CMN_20080411_213502 1047.890 1056.781
+CCTV2_ECON30MIN_CMN_20080411_213502 1056.781 1066.354
+CCTV2_ECON30MIN_CMN_20080411_213502 1079.666 1085.854
+CCTV2_ECON30MIN_CMN_20080411_213502 1085.854 1100.354
+CCTV2_ECON30MIN_CMN_20080411_213502 1100.354 1112.058
+CCTV2_ECON30MIN_CMN_20080411_213502 1112.058 1118.740
+CCTV2_ECON30MIN_CMN_20080411_213502 1118.740 1137.460
+CCTV2_ECON30MIN_CMN_20080411_213502 1137.460 1143.819
+CCTV2_ECON30MIN_CMN_20080411_213502 1160.526 1166.682
+CCTV2_ECON30MIN_CMN_20080411_213502 1166.682 1176.721
+CCTV2_ECON30MIN_CMN_20080411_213502 1182.048 1195.365
+CCTV2_ECON30MIN_CMN_20080411_213502 1195.365 1202.993
+CCTV2_ECON30MIN_CMN_20080411_213502 1202.993 1210.688
+CCTV2_ECON30MIN_CMN_20080411_213502 1383.569 1402.834
+CCTV2_ECON30MIN_CMN_20080411_213502 1402.834 1411.302
+CCTV2_ECON30MIN_CMN_20080411_213502 1470.714 1477.370
+CCTV2_ECON30MIN_CMN_20080411_213502 1477.370 1501.710
+CCTV2_ECON30MIN_CMN_20080411_213502 1501.710 1510.116
+CCTV2_ECON30MIN_CMN_20080411_213502 1510.116 1535.443
+CCTV2_ECON30MIN_CMN_20080411_213502 1535.443 1562.162
+CCTV2_ECON30MIN_CMN_20080411_213502 1562.162 1567.240
+CCTV2_ECON30MIN_CMN_20080411_213502 1589.601 1597.726
+CCTV2_ECON30MIN_CMN_20080411_213502 1597.726 1606.148
+CCTV2_ECON30MIN_CMN_20080411_213502 1606.148 1614.374
+CCTV2_ECON30MIN_CMN_20080411_213502 1614.374 1620.586
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 234.403 240.551
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 240.551 242.238
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 242.238 250.802
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 250.802 256.965
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 256.965 262.048
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 262.048 264.220
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 284.799 294.158
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 301.192 311.661
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 311.661 323.645
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 343.659 348.395
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 348.395 356.395
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 356.395 364.144
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 364.144 368.363
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 385.957 393.097
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 399.965 409.148
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 413.070 424.258
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 460.726 471.350
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 471.350 481.348
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 489.202 493.732
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 493.732 500.089
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 500.089 510.916
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 510.916 520.229
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 520.229 531.307
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 542.041 553.788
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 566.473 577.397
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 585.944 589.741
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 589.741 600.400
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 609.951 619.279
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 619.279 627.451
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 632.717 640.046
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 640.046 652.514
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 652.514 658.607
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 658.607 668.138
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 668.138 673.029
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 673.029 675.888
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 675.888 685.277
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 695.403 700.793
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 700.793 714.278
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 714.278 725.465
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 746.903 758.388
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 768.904 775.982
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 775.982 790.061
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 800.358 806.921
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 820.892 825.741
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 825.741 838.585
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 838.585 842.990
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 874.229 883.414
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 883.414 891.695
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 891.695 896.804
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 910.211 920.194
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 983.712 996.213
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1007.869 1030.963
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1030.963 1042.526
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1042.526 1059.981
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1059.981 1063.090
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1074.863 1087.003
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1087.003 1097.775
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1110.949 1115.028
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1115.028 1127.872
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1139.673 1148.751
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1148.751 1154.611
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1154.611 1161.048
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1161.048 1169.095
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1169.095 1178.980
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1178.980 1183.199
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1207.789 1220.694
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1230.441 1240.081
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1250.941 1259.676
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1259.676 1274.550
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1277.191 1286.473
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1292.364 1301.536
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1314.103 1329.396
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1329.396 1340.288
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1350.351 1356.914
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1367.806 1382.602
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1387.087 1392.603
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1392.603 1395.415
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2068.093 2069.921
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2069.921 2078.609
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2089.484 2098.468
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2115.561 2127.229
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2146.852 2151.586
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2151.586 2159.102
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2159.102 2166.461
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2166.461 2171.180
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2189.039 2196.508
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2196.508 2202.852
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2202.852 2212.227
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2212.227 2215.696
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2215.696 2226.915
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2226.915 2238.087
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2250.040 2256.743
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2256.743 2262.649
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2262.649 2272.774
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2272.774 2283.040
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2283.040 2290.884
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2296.352 2304.415
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2304.415 2310.134
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2316.899 2320.621
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2320.621 2322.903
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2322.903 2324.497
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2329.137 2344.855
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2344.855 2351.417
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2351.417 2359.730
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2359.730 2372.872
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2372.872 2382.091
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2391.154 2396.357
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2396.357 2401.403
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2401.403 2414.996
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2425.667 2433.544
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2437.184 2444.524
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2444.524 2457.757
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2457.757 2480.039
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2480.039 2491.806
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2521.898 2534.353
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2534.353 2539.103
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2539.103 2544.055
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2550.086 2561.726
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2561.726 2568.085
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2568.085 2572.866
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2572.866 2591.135
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2591.135 2605.054
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2605.054 2611.399
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2611.399 2617.197
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2617.197 2623.411
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2646.654 2663.341
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2677.645 2697.408
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2697.408 2701.424
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2701.424 2707.034
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2721.551 2739.751
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2739.751 2749.853
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2749.853 2768.477
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2768.477 2771.836
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2781.421 2788.695
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2788.695 2799.925
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2799.925 2808.614
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2822.424 2842.392
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2915.220 2924.248
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2939.951 2947.748
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2947.748 2954.068
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2961.457 2970.768
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2970.768 2985.440
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2985.440 2993.425
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3037.420 3049.810
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3049.810 3057.092
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3057.092 3067.842
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3067.842 3087.422
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3107.811 3124.326
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3158.343 3169.749
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3169.749 3196.089
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3203.200 3207.825
+CCTV1_30MINNEWS_CMN_20080328_115902 73.585 78.148
+CCTV1_30MINNEWS_CMN_20080328_115902 89.847 97.422
+CCTV1_30MINNEWS_CMN_20080328_115902 97.422 98.704
+CCTV1_30MINNEWS_CMN_20080328_115902 101.281 102.766
+CCTV1_30MINNEWS_CMN_20080328_115902 102.766 105.297
+CCTV1_30MINNEWS_CMN_20080328_115902 147.171 160.264
+CCTV1_30MINNEWS_CMN_20080328_115902 160.264 170.358
+CCTV1_30MINNEWS_CMN_20080328_115902 175.546 183.406
+CCTV1_30MINNEWS_CMN_20080328_115902 215.554 226.321
+CCTV1_30MINNEWS_CMN_20080328_115902 226.321 236.103
+CCTV1_30MINNEWS_CMN_20080328_115902 236.103 244.665
+CCTV1_30MINNEWS_CMN_20080328_115902 264.945 275.477
+CCTV1_30MINNEWS_CMN_20080328_115902 275.477 294.895
+CCTV1_30MINNEWS_CMN_20080328_115902 311.243 323.868
+CCTV1_30MINNEWS_CMN_20080328_115902 323.868 331.415
+CCTV1_30MINNEWS_CMN_20080328_115902 358.102 363.400
+CCTV1_30MINNEWS_CMN_20080328_115902 363.400 374.715
+CCTV1_30MINNEWS_CMN_20080328_115902 374.715 379.387
+CCTV1_30MINNEWS_CMN_20080328_115902 379.387 398.543
+CCTV1_30MINNEWS_CMN_20080328_115902 398.543 408.597
+CCTV1_30MINNEWS_CMN_20080328_115902 408.597 423.112
+CCTV1_30MINNEWS_CMN_20080328_115902 439.284 450.939
+CCTV1_30MINNEWS_CMN_20080328_115902 450.939 460.580
+CCTV1_30MINNEWS_CMN_20080328_115902 460.580 465.071
+CCTV1_30MINNEWS_CMN_20080328_115902 492.309 508.319
+CCTV1_30MINNEWS_CMN_20080328_115902 508.319 526.023
+CCTV1_30MINNEWS_CMN_20080328_115902 526.023 533.242
+CCTV1_30MINNEWS_CMN_20080328_115902 533.242 541.180
+CCTV1_30MINNEWS_CMN_20080328_115902 545.978 549.009
+CCTV1_30MINNEWS_CMN_20080328_115902 555.837 561.697
+CCTV1_30MINNEWS_CMN_20080328_115902 587.590 590.840
+CCTV1_30MINNEWS_CMN_20080328_115902 663.955 675.572
+CCTV1_30MINNEWS_CMN_20080328_115902 703.216 716.623
+CCTV1_30MINNEWS_CMN_20080328_115902 770.585 781.493
+CCTV1_30MINNEWS_CMN_20080328_115902 792.062 798.172
+CCTV1_30MINNEWS_CMN_20080328_115902 798.172 808.082
+CCTV1_30MINNEWS_CMN_20080328_115902 808.082 827.097
+CCTV1_30MINNEWS_CMN_20080328_115902 827.097 836.316
+CCTV1_30MINNEWS_CMN_20080328_115902 836.316 845.067
+CCTV1_30MINNEWS_CMN_20080328_115902 845.067 850.661
+CCTV1_30MINNEWS_CMN_20080328_115902 850.661 859.928
+CCTV1_30MINNEWS_CMN_20080328_115902 859.928 869.332
+CCTV1_30MINNEWS_CMN_20080328_115902 869.332 877.643
+CCTV1_30MINNEWS_CMN_20080328_115902 893.080 903.055
+CCTV1_30MINNEWS_CMN_20080328_115902 903.055 910.477
+CCTV1_30MINNEWS_CMN_20080328_115902 910.477 922.368
+CCTV1_30MINNEWS_CMN_20080328_115902 1038.231 1048.715
+CCTV1_30MINNEWS_CMN_20080328_115902 1048.715 1057.403
+CCTV1_30MINNEWS_CMN_20080328_115902 1065.105 1073.324
+CCTV1_30MINNEWS_CMN_20080328_115902 1073.324 1080.652
+CCTV1_30MINNEWS_CMN_20080328_115902 1103.562 1111.328
+CCTV1_30MINNEWS_CMN_20080328_115902 1122.832 1133.020
+CCTV1_30MINNEWS_CMN_20080328_115902 1133.020 1135.973
+CCTV1_30MINNEWS_CMN_20080328_115902 1178.629 1185.410
+CCTV1_30MINNEWS_CMN_20080328_115902 1185.410 1189.379
+CCTV1_30MINNEWS_CMN_20080328_115902 1196.254 1201.832
+CCTV1_30MINNEWS_CMN_20080328_115902 1201.832 1206.988
+CCTV1_30MINNEWS_CMN_20080328_115902 1235.681 1238.947
+CCTV1_30MINNEWS_CMN_20080328_115902 1243.041 1255.108
+CCTV1_30MINNEWS_CMN_20080328_115902 1255.108 1269.918
+CCTV1_30MINNEWS_CMN_20080328_115902 1269.918 1282.816
+CCTV1_30MINNEWS_CMN_20080328_115902 1295.253 1305.726
+CCTV1_30MINNEWS_CMN_20080328_115902 1305.726 1320.361
+CCTV1_30MINNEWS_CMN_20080328_115902 1320.361 1336.776
+CCTV1_30MINNEWS_CMN_20080328_115902 1371.446 1377.617
+CCTV1_30MINNEWS_CMN_20080328_115902 1377.617 1393.383
+CCTV1_30MINNEWS_CMN_20080328_115902 1393.383 1402.274
+CCTV1_30MINNEWS_CMN_20080328_115902 1402.274 1414.181
+CCTV1_30MINNEWS_CMN_20080328_115902 1414.181 1423.133
+CCTV1_30MINNEWS_CMN_20080328_115902 1430.368 1437.976
+CCTV1_30MINNEWS_CMN_20080328_115902 1437.976 1442.397
+CCTV1_30MINNEWS_CMN_20080328_115902 1458.631 1466.991
+CCTV1_30MINNEWS_CMN_20080328_115902 1466.991 1481.444
+CCTV1_30MINNEWS_CMN_20080328_115902 1481.444 1489.477
+CCTV1_30MINNEWS_CMN_20080328_115902 1489.477 1500.758
+CCTV1_30MINNEWS_CMN_20080328_115902 1500.758 1505.977
+CCTV1_30MINNEWS_CMN_20080328_115902 1505.977 1522.180
+CCTV1_30MINNEWS_CMN_20080328_115902 1522.180 1525.618
+CCTV1_30MINNEWS_CMN_20080328_115902 1525.618 1538.791
+CCTV1_30MINNEWS_CMN_20080328_115902 1538.791 1551.387
+CCTV1_30MINNEWS_CMN_20080328_115902 1559.528 1562.582
+CCTV1_30MINNEWS_CMN_20080328_115902 1562.582 1567.754
+CCTV1_30MINNEWS_CMN_20080328_115902 1567.754 1576.798
+CCTV1_30MINNEWS_CMN_20080328_115902 1580.954 1585.971
+CCTV1_30MINNEWS_CMN_20080328_115902 1589.988 1597.159
+CCTV1_30MINNEWS_CMN_20080328_115902 1597.159 1601.471
+CCTV1_30MINNEWS_CMN_20080328_115902 1601.471 1604.846
+CCTV1_30MINNEWS_CMN_20080328_115902 1617.376 1621.814
+CCTV1_30MINNEWS_CMN_20080328_115902 1842.454 1845.478
+CCTV1_30MINNEWS_CMN_20080328_115902 1846.885 1847.541
+CCTV2_ECON30MIN_CMN_20080406_213518 0.000 1.718
+CCTV2_ECON30MIN_CMN_20080406_213518 1.718 3.422
+CCTV2_ECON30MIN_CMN_20080406_213518 7.172 9.220
+CCTV2_ECON30MIN_CMN_20080406_213518 9.220 13.736
+CCTV2_ECON30MIN_CMN_20080406_213518 13.736 18.621
+CCTV2_ECON30MIN_CMN_20080406_213518 18.621 21.713
+CCTV2_ECON30MIN_CMN_20080406_213518 21.713 26.541
+CCTV2_ECON30MIN_CMN_20080406_213518 28.416 30.427
+CCTV2_ECON30MIN_CMN_20080406_213518 58.388 60.122
+CCTV2_ECON30MIN_CMN_20080406_213518 64.686 70.087
+CCTV2_ECON30MIN_CMN_20080406_213518 72.228 75.150
+CCTV2_ECON30MIN_CMN_20080406_213518 80.948 88.997
+CCTV2_ECON30MIN_CMN_20080406_213518 88.997 95.154
+CCTV2_ECON30MIN_CMN_20080406_213518 95.154 101.013
+CCTV2_ECON30MIN_CMN_20080406_213518 101.013 108.839
+CCTV2_ECON30MIN_CMN_20080406_213518 108.839 118.776
+CCTV2_ECON30MIN_CMN_20080406_213518 118.776 123.338
+CCTV2_ECON30MIN_CMN_20080406_213518 123.338 134.854
+CCTV2_ECON30MIN_CMN_20080406_213518 134.854 139.448
+CCTV2_ECON30MIN_CMN_20080406_213518 139.448 145.113
+CCTV2_ECON30MIN_CMN_20080406_213518 145.113 151.660
+CCTV2_ECON30MIN_CMN_20080406_213518 151.660 155.832
+CCTV2_ECON30MIN_CMN_20080406_213518 155.832 161.833
+CCTV2_ECON30MIN_CMN_20080406_213518 161.833 168.074
+CCTV2_ECON30MIN_CMN_20080406_213518 188.387 194.091
+CCTV2_ECON30MIN_CMN_20080406_213518 194.091 198.701
+CCTV2_ECON30MIN_CMN_20080406_213518 198.701 206.083
+CCTV2_ECON30MIN_CMN_20080406_213518 220.551 225.332
+CCTV2_ECON30MIN_CMN_20080406_213518 225.332 231.768
+CCTV2_ECON30MIN_CMN_20080406_213518 231.768 240.471
+CCTV2_ECON30MIN_CMN_20080406_213518 240.471 249.129
+CCTV2_ECON30MIN_CMN_20080406_213518 255.113 267.580
+CCTV2_ECON30MIN_CMN_20080406_213518 285.363 287.535
+CCTV2_ECON30MIN_CMN_20080406_213518 287.535 293.799
+CCTV2_ECON30MIN_CMN_20080406_213518 293.799 295.956
+CCTV2_ECON30MIN_CMN_20080406_213518 295.956 296.385
+CCTV2_ECON30MIN_CMN_20080406_213518 296.385 298.365
+CCTV2_ECON30MIN_CMN_20080406_213518 298.365 299.968
+CCTV2_ECON30MIN_CMN_20080406_213518 299.968 311.067
+CCTV2_ECON30MIN_CMN_20080406_213518 299.968 311.067
+CCTV2_ECON30MIN_CMN_20080406_213518 323.403 329.823
+CCTV2_ECON30MIN_CMN_20080406_213518 329.823 333.286
+CCTV2_ECON30MIN_CMN_20080406_213518 338.990 341.490
+CCTV2_ECON30MIN_CMN_20080406_213518 341.490 356.420
+CCTV2_ECON30MIN_CMN_20080406_213518 356.420 357.967
+CCTV2_ECON30MIN_CMN_20080406_213518 357.967 365.305
+CCTV2_ECON30MIN_CMN_20080406_213518 365.305 378.789
+CCTV2_ECON30MIN_CMN_20080406_213518 365.305 378.789
+CCTV2_ECON30MIN_CMN_20080406_213518 378.789 386.429
+CCTV2_ECON30MIN_CMN_20080406_213518 386.429 389.367
+CCTV2_ECON30MIN_CMN_20080406_213518 389.367 395.906
+CCTV2_ECON30MIN_CMN_20080406_213518 395.906 397.055
+CCTV2_ECON30MIN_CMN_20080406_213518 397.055 398.774
+CCTV2_ECON30MIN_CMN_20080406_213518 408.787 412.318
+CCTV2_ECON30MIN_CMN_20080406_213518 412.318 413.005
+CCTV2_ECON30MIN_CMN_20080406_213518 413.005 415.518
+CCTV2_ECON30MIN_CMN_20080406_213518 415.518 419.738
+CCTV2_ECON30MIN_CMN_20080406_213518 419.738 429.663
+CCTV2_ECON30MIN_CMN_20080406_213518 429.663 434.426
+CCTV2_ECON30MIN_CMN_20080406_213518 434.426 443.773
+CCTV2_ECON30MIN_CMN_20080406_213518 446.835 457.057
+CCTV2_ECON30MIN_CMN_20080406_213518 457.057 462.323
+CCTV2_ECON30MIN_CMN_20080406_213518 462.323 469.529
+CCTV2_ECON30MIN_CMN_20080406_213518 469.529 472.967
+CCTV2_ECON30MIN_CMN_20080406_213518 479.530 491.496
+CCTV2_ECON30MIN_CMN_20080406_213518 491.496 496.606
+CCTV2_ECON30MIN_CMN_20080406_213518 496.606 502.256
+CCTV2_ECON30MIN_CMN_20080406_213518 502.256 505.790
+CCTV2_ECON30MIN_CMN_20080406_213518 505.790 515.337
+CCTV2_ECON30MIN_CMN_20080406_213518 515.337 521.148
+CCTV2_ECON30MIN_CMN_20080406_213518 523.195 535.383
+CCTV2_ECON30MIN_CMN_20080406_213518 535.383 546.032
+CCTV2_ECON30MIN_CMN_20080406_213518 546.032 551.642
+CCTV2_ECON30MIN_CMN_20080406_213518 551.642 556.986
+CCTV2_ECON30MIN_CMN_20080406_213518 556.986 560.034
+CCTV2_ECON30MIN_CMN_20080406_213518 560.034 565.846
+CCTV2_ECON30MIN_CMN_20080406_213518 565.846 575.988
+CCTV2_ECON30MIN_CMN_20080406_213518 565.846 575.988
+CCTV2_ECON30MIN_CMN_20080406_213518 575.988 582.207
+CCTV2_ECON30MIN_CMN_20080406_213518 590.707 595.290
+CCTV2_ECON30MIN_CMN_20080406_213518 595.290 599.559
+CCTV2_ECON30MIN_CMN_20080406_213518 595.290 599.559
+CCTV2_ECON30MIN_CMN_20080406_213518 599.559 600.394
+CCTV2_ECON30MIN_CMN_20080406_213518 600.394 613.443
+CCTV2_ECON30MIN_CMN_20080406_213518 613.443 617.974
+CCTV2_ECON30MIN_CMN_20080406_213518 613.443 617.974
+CCTV2_ECON30MIN_CMN_20080406_213518 618.677 625.864
+CCTV2_ECON30MIN_CMN_20080406_213518 625.864 631.568
+CCTV2_ECON30MIN_CMN_20080406_213518 625.864 631.568
+CCTV2_ECON30MIN_CMN_20080406_213518 631.568 635.333
+CCTV2_ECON30MIN_CMN_20080406_213518 635.333 638.583
+CCTV2_ECON30MIN_CMN_20080406_213518 638.583 647.041
+CCTV2_ECON30MIN_CMN_20080406_213518 653.089 655.154
+CCTV2_ECON30MIN_CMN_20080406_213518 655.154 655.779
+CCTV2_ECON30MIN_CMN_20080406_213518 655.779 657.685
+CCTV2_ECON30MIN_CMN_20080406_213518 657.685 658.974
+CCTV2_ECON30MIN_CMN_20080406_213518 658.974 660.568
+CCTV2_ECON30MIN_CMN_20080406_213518 660.568 663.224
+CCTV2_ECON30MIN_CMN_20080406_213518 663.224 663.927
+CCTV2_ECON30MIN_CMN_20080406_213518 663.927 664.522
+CCTV2_ECON30MIN_CMN_20080406_213518 664.522 665.319
+CCTV2_ECON30MIN_CMN_20080406_213518 665.319 665.819
+CCTV2_ECON30MIN_CMN_20080406_213518 665.819 668.867
+CCTV2_ECON30MIN_CMN_20080406_213518 668.867 670.429
+CCTV2_ECON30MIN_CMN_20080406_213518 670.429 671.841
+CCTV2_ECON30MIN_CMN_20080406_213518 671.841 674.435
+CCTV2_ECON30MIN_CMN_20080406_213518 674.435 675.185
+CCTV2_ECON30MIN_CMN_20080406_213518 675.185 675.828
+CCTV2_ECON30MIN_CMN_20080406_213518 675.828 676.484
+CCTV2_ECON30MIN_CMN_20080406_213518 676.484 679.812
+CCTV2_ECON30MIN_CMN_20080406_213518 679.812 682.875
+CCTV2_ECON30MIN_CMN_20080406_213518 687.361 688.937
+CCTV2_ECON30MIN_CMN_20080406_213518 688.937 690.010
+CCTV2_ECON30MIN_CMN_20080406_213518 690.010 693.164
+CCTV2_ECON30MIN_CMN_20080406_213518 693.164 706.206
+CCTV2_ECON30MIN_CMN_20080406_213518 706.206 710.581
+CCTV2_ECON30MIN_CMN_20080406_213518 716.791 720.277
+CCTV2_ECON30MIN_CMN_20080406_213518 720.277 722.683
+CCTV2_ECON30MIN_CMN_20080406_213518 734.568 739.879
+CCTV2_ECON30MIN_CMN_20080406_213518 739.879 745.467
+CCTV2_ECON30MIN_CMN_20080406_213518 739.879 745.467
+CCTV2_ECON30MIN_CMN_20080406_213518 745.467 753.304
+CCTV2_ECON30MIN_CMN_20080406_213518 753.304 754.304
+CCTV2_ECON30MIN_CMN_20080406_213518 754.304 757.335
+CCTV2_ECON30MIN_CMN_20080406_213518 757.335 757.898
+CCTV2_ECON30MIN_CMN_20080406_213518 757.898 758.570
+CCTV2_ECON30MIN_CMN_20080406_213518 758.570 761.897
+CCTV2_ECON30MIN_CMN_20080406_213518 758.570 761.897
+CCTV2_ECON30MIN_CMN_20080406_213518 761.897 763.366
+CCTV2_ECON30MIN_CMN_20080406_213518 761.897 763.366
+CCTV2_ECON30MIN_CMN_20080406_213518 763.366 771.569
+CCTV2_ECON30MIN_CMN_20080406_213518 763.366 771.569
+CCTV2_ECON30MIN_CMN_20080406_213518 771.569 773.449
+CCTV2_ECON30MIN_CMN_20080406_213518 774.709 776.147
+CCTV2_ECON30MIN_CMN_20080406_213518 779.302 785.484
+CCTV2_ECON30MIN_CMN_20080406_213518 785.484 787.359
+CCTV2_ECON30MIN_CMN_20080406_213518 787.359 800.776
+CCTV2_ECON30MIN_CMN_20080406_213518 787.359 800.776
+CCTV2_ECON30MIN_CMN_20080406_213518 800.776 809.165
+CCTV2_ECON30MIN_CMN_20080406_213518 800.776 809.165
+CCTV2_ECON30MIN_CMN_20080406_213518 809.165 813.785
+CCTV2_ECON30MIN_CMN_20080406_213518 809.165 813.785
+CCTV2_ECON30MIN_CMN_20080406_213518 813.785 817.873
+CCTV2_ECON30MIN_CMN_20080406_213518 817.873 823.504
+CCTV2_ECON30MIN_CMN_20080406_213518 823.504 824.660
+CCTV2_ECON30MIN_CMN_20080406_213518 824.660 829.099
+CCTV2_ECON30MIN_CMN_20080406_213518 829.099 834.832
+CCTV2_ECON30MIN_CMN_20080406_213518 834.832 841.337
+CCTV2_ECON30MIN_CMN_20080406_213518 848.777 850.261
+CCTV2_ECON30MIN_CMN_20080406_213518 854.333 858.077
+CCTV2_ECON30MIN_CMN_20080406_213518 858.077 861.202
+CCTV2_ECON30MIN_CMN_20080406_213518 861.202 862.342
+CCTV2_ECON30MIN_CMN_20080406_213518 862.342 864.991
+CCTV2_ECON30MIN_CMN_20080406_213518 864.991 870.084
+CCTV2_ECON30MIN_CMN_20080406_213518 870.084 876.553
+CCTV2_ECON30MIN_CMN_20080406_213518 876.553 881.607
+CCTV2_ECON30MIN_CMN_20080406_213518 889.763 895.419
+CCTV2_ECON30MIN_CMN_20080406_213518 921.337 931.964
+CCTV2_ECON30MIN_CMN_20080406_213518 931.964 937.211
+CCTV2_ECON30MIN_CMN_20080406_213518 937.211 942.574
+CCTV2_ECON30MIN_CMN_20080406_213518 942.574 949.276
+CCTV2_ECON30MIN_CMN_20080406_213518 949.276 953.074
+CCTV2_ECON30MIN_CMN_20080406_213518 953.074 957.561
+CCTV2_ECON30MIN_CMN_20080406_213518 975.252 979.770
+CCTV2_ECON30MIN_CMN_20080406_213518 979.770 985.552
+CCTV2_ECON30MIN_CMN_20080406_213518 985.552 991.895
+CCTV2_ECON30MIN_CMN_20080406_213518 991.895 998.228
+CCTV2_ECON30MIN_CMN_20080406_213518 1008.139 1009.826
+CCTV2_ECON30MIN_CMN_20080406_213518 1009.826 1011.999
+CCTV2_ECON30MIN_CMN_20080406_213518 1011.999 1017.897
+CCTV2_ECON30MIN_CMN_20080406_213518 1017.897 1027.272
+CCTV2_ECON30MIN_CMN_20080406_213518 1027.272 1033.178
+CCTV2_ECON30MIN_CMN_20080406_213518 1047.754 1049.020
+CCTV2_ECON30MIN_CMN_20080406_213518 1053.755 1054.364
+CCTV2_ECON30MIN_CMN_20080406_213518 1062.989 1063.864
+CCTV2_ECON30MIN_CMN_20080406_213518 1065.694 1067.037
+CCTV2_ECON30MIN_CMN_20080406_213518 1069.600 1074.350
+CCTV2_ECON30MIN_CMN_20080406_213518 1091.039 1098.434
+CCTV2_ECON30MIN_CMN_20080406_213518 1106.726 1108.841
+CCTV2_ECON30MIN_CMN_20080406_213518 1108.841 1110.332
+CCTV2_ECON30MIN_CMN_20080406_213518 1120.191 1136.095
+CCTV2_ECON30MIN_CMN_20080406_213518 1136.095 1137.845
+CCTV2_ECON30MIN_CMN_20080406_213518 1137.845 1144.741
+CCTV2_ECON30MIN_CMN_20080406_213518 1137.845 1144.741
+CCTV2_ECON30MIN_CMN_20080406_213518 1144.741 1146.882
+CCTV2_ECON30MIN_CMN_20080406_213518 1146.882 1148.077
+CCTV2_ECON30MIN_CMN_20080406_213518 1148.077 1149.792
+CCTV2_ECON30MIN_CMN_20080406_213518 1150.956 1152.582
+CCTV2_ECON30MIN_CMN_20080406_213518 1152.582 1156.084
+CCTV2_ECON30MIN_CMN_20080406_213518 1152.582 1156.084
+CCTV2_ECON30MIN_CMN_20080406_213518 1156.084 1161.273
+CCTV2_ECON30MIN_CMN_20080406_213518 1161.273 1167.321
+CCTV2_ECON30MIN_CMN_20080406_213518 1161.273 1167.321
+CCTV2_ECON30MIN_CMN_20080406_213518 1167.321 1171.545
+CCTV2_ECON30MIN_CMN_20080406_213518 1171.545 1172.560
+CCTV2_ECON30MIN_CMN_20080406_213518 1172.560 1180.161
+CCTV2_ECON30MIN_CMN_20080406_213518 1187.631 1188.116
+CCTV2_ECON30MIN_CMN_20080406_213518 1188.116 1190.960
+CCTV2_ECON30MIN_CMN_20080406_213518 1190.960 1198.872
+CCTV2_ECON30MIN_CMN_20080406_213518 1198.872 1210.319
+CCTV2_ECON30MIN_CMN_20080406_213518 1210.319 1214.349
+CCTV2_ECON30MIN_CMN_20080406_213518 1214.349 1219.567
+CCTV2_ECON30MIN_CMN_20080406_213518 1219.567 1224.893
+CCTV2_ECON30MIN_CMN_20080406_213518 1224.893 1225.454
+CCTV2_ECON30MIN_CMN_20080406_213518 1225.454 1234.501
+CCTV2_ECON30MIN_CMN_20080406_213518 1234.501 1236.341
+CCTV2_ECON30MIN_CMN_20080406_213518 1234.501 1236.341
+CCTV2_ECON30MIN_CMN_20080406_213518 1236.341 1237.451
+CCTV2_ECON30MIN_CMN_20080406_213518 1237.451 1240.074
+CCTV2_ECON30MIN_CMN_20080406_213518 1240.074 1243.383
+CCTV2_ECON30MIN_CMN_20080406_213518 1243.383 1247.602
+CCTV2_ECON30MIN_CMN_20080406_213518 1247.602 1248.871
+CCTV2_ECON30MIN_CMN_20080406_213518 1248.871 1249.965
+CCTV2_ECON30MIN_CMN_20080406_213518 1249.965 1257.276
+CCTV2_ECON30MIN_CMN_20080406_213518 1257.276 1272.249
+CCTV2_ECON30MIN_CMN_20080406_213518 1257.276 1272.249
+CCTV2_ECON30MIN_CMN_20080406_213518 1272.249 1274.870
+CCTV2_ECON30MIN_CMN_20080406_213518 1272.249 1274.870
+CCTV2_ECON30MIN_CMN_20080406_213518 1274.870 1276.069
+CCTV2_ECON30MIN_CMN_20080406_213518 1276.069 1288.963
+CCTV2_ECON30MIN_CMN_20080406_213518 1276.069 1288.963
+CCTV2_ECON30MIN_CMN_20080406_213518 1288.963 1295.867
+CCTV2_ECON30MIN_CMN_20080406_213518 1288.963 1295.867
+CCTV2_ECON30MIN_CMN_20080406_213518 1295.867 1304.306
+CCTV2_ECON30MIN_CMN_20080406_213518 1314.324 1327.147
+CCTV2_ECON30MIN_CMN_20080406_213518 1327.147 1334.457
+CCTV2_ECON30MIN_CMN_20080406_213518 1334.457 1343.164
+CCTV2_ECON30MIN_CMN_20080406_213518 1343.164 1348.476
+CCTV2_ECON30MIN_CMN_20080406_213518 1348.476 1349.914
+CCTV2_ECON30MIN_CMN_20080406_213518 1349.914 1350.805
+CCTV2_ECON30MIN_CMN_20080406_213518 1350.805 1359.195
+CCTV2_ECON30MIN_CMN_20080406_213518 1359.195 1369.992
+CCTV2_ECON30MIN_CMN_20080406_213518 1369.992 1380.569
+CCTV2_ECON30MIN_CMN_20080406_213518 1380.569 1390.097
+CCTV2_ECON30MIN_CMN_20080406_213518 1390.659 1397.930
+CCTV2_ECON30MIN_CMN_20080406_213518 1397.930 1406.703
+CCTV2_ECON30MIN_CMN_20080406_213518 1406.703 1407.265
+CCTV2_ECON30MIN_CMN_20080406_213518 1407.265 1415.593
+CCTV2_ECON30MIN_CMN_20080406_213518 1415.593 1420.289
+CCTV2_ECON30MIN_CMN_20080406_213518 1420.289 1423.805
+CCTV2_ECON30MIN_CMN_20080406_213518 1423.805 1432.557
+CCTV2_ECON30MIN_CMN_20080406_213518 1432.557 1439.526
+CCTV2_ECON30MIN_CMN_20080406_213518 1439.526 1450.977
+CCTV2_ECON30MIN_CMN_20080406_213518 1439.526 1450.977
+CCTV2_ECON30MIN_CMN_20080406_213518 1450.977 1457.269
+CCTV2_ECON30MIN_CMN_20080406_213518 1457.269 1457.878
+CCTV2_ECON30MIN_CMN_20080406_213518 1457.878 1462.424
+CCTV2_ECON30MIN_CMN_20080406_213518 1462.424 1465.221
+CCTV2_ECON30MIN_CMN_20080406_213518 1465.221 1467.455
+CCTV2_ECON30MIN_CMN_20080406_213518 1465.221 1467.455
+CCTV2_ECON30MIN_CMN_20080406_213518 1467.455 1470.316
+CCTV2_ECON30MIN_CMN_20080406_213518 1470.316 1471.863
+CCTV2_ECON30MIN_CMN_20080406_213518 1471.863 1472.848
+CCTV2_ECON30MIN_CMN_20080406_213518 1472.848 1474.017
+CCTV2_ECON30MIN_CMN_20080406_213518 1474.017 1475.477
+CCTV2_ECON30MIN_CMN_20080406_213518 1475.477 1478.346
+CCTV2_ECON30MIN_CMN_20080406_213518 1478.346 1480.534
+CCTV2_ECON30MIN_CMN_20080406_213518 1480.534 1483.838
+CCTV2_ECON30MIN_CMN_20080406_213518 1480.534 1483.838
+CCTV2_ECON30MIN_CMN_20080406_213518 1483.838 1487.126
+CCTV2_ECON30MIN_CMN_20080406_213518 1487.126 1491.150
+CCTV2_ECON30MIN_CMN_20080406_213518 1491.150 1492.237
+CCTV2_ECON30MIN_CMN_20080406_213518 1492.237 1494.518
+CCTV2_ECON30MIN_CMN_20080406_213518 1492.237 1494.518
+CCTV2_ECON30MIN_CMN_20080406_213518 1494.518 1497.971
+CCTV2_ECON30MIN_CMN_20080406_213518 1497.971 1503.034
+CCTV2_ECON30MIN_CMN_20080406_213518 1503.034 1506.409
+CCTV2_ECON30MIN_CMN_20080406_213518 1506.409 1508.689
+CCTV2_ECON30MIN_CMN_20080406_213518 1508.689 1516.722
+CCTV2_ECON30MIN_CMN_20080406_213518 1516.722 1522.285
+CCTV2_ECON30MIN_CMN_20080406_213518 1530.286 1541.613
+CCTV2_ECON30MIN_CMN_20080406_213518 1541.613 1548.549
+CCTV2_ECON30MIN_CMN_20080406_213518 1554.427 1558.110
+CCTV2_ECON30MIN_CMN_20080406_213518 1558.110 1578.287
+CCTV2_ECON30MIN_CMN_20080406_213518 1578.287 1588.944
+CCTV2_ECON30MIN_CMN_20080406_213518 1588.944 1595.663
+CCTV2_ECON30MIN_CMN_20080406_213518 1601.929 1609.599
+CCTV2_ECON30MIN_CMN_20080406_213518 1609.599 1616.022
+CCTV2_ECON30MIN_CMN_20080406_213518 1616.022 1619.678
+CCTV2_ECON30MIN_CMN_20080406_213518 1619.678 1633.052
+CCTV2_ECON30MIN_CMN_20080406_213518 1645.770 1652.644
+CCTV2_ECON30MIN_CMN_20080406_213518 1652.644 1659.737
+CCTV2_ECON30MIN_CMN_20080406_213518 1659.737 1660.381
+CCTV2_ECON30MIN_CMN_20080406_213518 1660.381 1664.506
+CCTV2_ECON30MIN_CMN_20080406_213518 1664.506 1666.521
+CCTV2_ECON30MIN_CMN_20080406_213518 1666.521 1671.523
+CCTV2_ECON30MIN_CMN_20080406_213518 1666.521 1671.523
+CCTV2_ECON30MIN_CMN_20080406_213518 1671.523 1671.914
+CCTV2_ECON30MIN_CMN_20080406_213518 1671.914 1674.270
+CCTV2_ECON30MIN_CMN_20080406_213518 1674.270 1675.489
+CCTV2_ECON30MIN_CMN_20080406_213518 1675.489 1677.853
+CCTV2_ECON30MIN_CMN_20080406_213518 1677.853 1679.915
+CCTV2_ECON30MIN_CMN_20080406_213518 1679.915 1682.118
+CCTV2_ECON30MIN_CMN_20080406_213518 1682.118 1684.204
+CCTV2_ECON30MIN_CMN_20080406_213518 1684.204 1689.958
+CCTV2_ECON30MIN_CMN_20080406_213518 1689.958 1697.192
+CCTV2_ECON30MIN_CMN_20080406_213518 1697.192 1700.601
+CCTV2_ECON30MIN_CMN_20080406_213518 1700.601 1702.054
+CCTV2_ECON30MIN_CMN_20080406_213518 1702.054 1704.428
+CCTV2_ECON30MIN_CMN_20080406_213518 1704.428 1705.630
+CCTV2_ECON30MIN_CMN_20080406_213518 1705.630 1707.255
+CCTV2_ECON30MIN_CMN_20080406_213518 1707.255 1709.552
+CCTV2_ECON30MIN_CMN_20080406_213518 1709.552 1713.215
+CCTV2_ECON30MIN_CMN_20080406_213518 1713.215 1714.262
+CCTV2_ECON30MIN_CMN_20080406_213518 1714.262 1715.965
+CCTV2_ECON30MIN_CMN_20080406_213518 1715.965 1717.345
+CCTV2_ECON30MIN_CMN_20080406_213518 1717.345 1718.954
+CCTV2_ECON30MIN_CMN_20080406_213518 1718.954 1719.673
+CCTV2_ECON30MIN_CMN_20080406_213518 1719.673 1721.314
+CCTV2_ECON30MIN_CMN_20080406_213518 1721.314 1721.892
+CCTV2_ECON30MIN_CMN_20080406_213518 1721.892 1726.377
+CCTV2_ECON30MIN_CMN_20080406_213518 1726.377 1730.487
+CCTV2_ECON30MIN_CMN_20080406_213518 1730.487 1735.972
+CCTV2_ECON30MIN_CMN_20080406_213518 1735.972 1737.754
+CCTV2_ECON30MIN_CMN_20080406_213518 1737.754 1740.692
+CCTV2_ECON30MIN_CMN_20080406_213518 1740.692 1741.880
+CCTV2_ECON30MIN_CMN_20080406_213518 1741.880 1746.364
+CCTV2_ECON30MIN_CMN_20080406_213518 1747.457 1749.332
+CCTV2_ECON30MIN_CMN_20080406_213518 1751.207 1752.570
+CCTV2_ECON30MIN_CMN_20080406_213518 1752.570 1753.305
+CCTV2_ECON30MIN_CMN_20080406_213518 1753.305 1754.116
+CCTV2_ECON30MIN_CMN_20080406_213518 1754.116 1757.837
+CCTV2_ECON30MIN_CMN_20080406_213518 1757.837 1759.275
+CCTV2_ECON30MIN_CMN_20080406_213518 1759.275 1760.275
+CCTV2_ECON30MIN_CMN_20080406_213518 1760.275 1763.806
+CCTV2_ECON30MIN_CMN_20080406_213518 1763.806 1766.695
+CCTV2_ECON30MIN_CMN_20080406_213518 1766.695 1770.343
+CCTV2_ECON30MIN_CMN_20080406_213518 1770.343 1774.680
+CCTV2_ECON30MIN_CMN_20080406_213518 1774.680 1782.383
+CCTV2_ECON30MIN_CMN_20080406_213518 1788.978 1796.969
+CCTV2_ECON30MIN_CMN_20080406_213518 1796.969 1798.631
+CCTV2_ECON30MIN_CMN_20080406_213518 1798.631 1804.966
+CCTV2_ECON30MIN_CMN_20080406_213518 1804.966 1808.600
+CCTV2_ECON30MIN_CMN_20080406_213518 1808.600 1811.156
+CCTV2_ECON30MIN_CMN_20080406_213518 1811.156 1816.373
+CCTV2_ECON30MIN_CMN_20080406_213518 1932.405 1938.811
+CCTV2_ECON30MIN_CMN_20080406_213518 1938.811 1945.761
+CCTV2_ECON30MIN_CMN_20080406_213518 1952.047 1955.407
+CCTV2_ECON30MIN_CMN_20080406_213518 1955.407 1962.562
+CCTV2_NEWSLIST_CMN_20080407_114902 190.964 206.081
+CCTV2_NEWSLIST_CMN_20080407_114902 206.081 231.442
+CCTV2_NEWSLIST_CMN_20080407_114902 293.868 302.892
+CCTV2_NEWSLIST_CMN_20080407_114902 377.067 402.330
+CCTV2_NEWSLIST_CMN_20080407_114902 402.330 428.120
+CCTV2_NEWSLIST_CMN_20080407_114902 444.461 465.699
+CCTV2_NEWSLIST_CMN_20080407_114902 501.102 518.374
+CCTV2_NEWSLIST_CMN_20080407_114902 518.374 533.099
+CCTV2_NEWSLIST_CMN_20080407_114902 603.572 619.857
+CCTV2_NEWSLIST_CMN_20080407_114902 626.844 634.449
+CCTV2_NEWSLIST_CMN_20080407_114902 634.449 639.293
+CCTV2_NEWSLIST_CMN_20080407_114902 639.293 652.670
+CCTV2_NEWSLIST_CMN_20080407_114902 666.215 684.965
+CCTV2_NEWSLIST_CMN_20080407_114902 684.965 704.091
+CCTV2_NEWSLIST_CMN_20080407_114902 704.091 718.755
+CCTV2_NEWSLIST_CMN_20080407_114902 718.755 734.741
+CCTV2_NEWSLIST_CMN_20080407_114902 734.741 744.705
+CCTV2_NEWSLIST_CMN_20080407_114902 744.705 769.831
+CCTV2_NEWSLIST_CMN_20080407_114902 769.831 783.963
+CCTV2_NEWSLIST_CMN_20080407_114902 808.238 814.887
+CCTV2_NEWSLIST_CMN_20080407_114902 824.133 835.744
+CCTV2_NEWSLIST_CMN_20080407_114902 835.744 840.588
+CCTV2_NEWSLIST_CMN_20080407_114902 849.630 867.651
+CCTV2_NEWSLIST_CMN_20080407_114902 867.651 877.591
+CCTV2_NEWSLIST_CMN_20080407_114902 914.744 929.296
+CCTV2_NEWSLIST_CMN_20080407_114902 929.296 940.488
+CCTV2_NEWSLIST_CMN_20080407_114902 955.102 961.114
+CCTV2_NEWSLIST_CMN_20080407_114902 961.114 977.306
+CCTV2_NEWSLIST_CMN_20080407_114902 977.306 993.617
+CCTV2_NEWSLIST_CMN_20080407_114902 993.617 1005.231
+CCTV2_NEWSLIST_CMN_20080407_114902 1005.231 1025.242
+CCTV2_NEWSLIST_CMN_20080407_114902 1025.242 1042.108
+CCTV2_NEWSLIST_CMN_20080407_114902 1042.108 1050.797
+CCTV2_NEWSLIST_CMN_20080407_114902 1050.797 1065.704
+CCTV2_NEWSLIST_CMN_20080407_114902 1065.704 1080.824
+CCTV2_NEWSLIST_CMN_20080407_114902 1080.824 1094.970
+CCTV2_NEWSLIST_CMN_20080407_114902 1101.267 1106.354
+CCTV2_NEWSLIST_CMN_20080407_114902 1138.680 1147.303
+CCTV2_NEWSLIST_CMN_20080407_114902 1169.665 1172.749
+CCTV2_NEWSLIST_CMN_20080407_114902 1199.363 1220.498
+CCTV2_NEWSLIST_CMN_20080407_114902 1220.498 1233.037
+CCTV2_NEWSLIST_CMN_20080407_114902 1233.037 1245.396
+CCTV2_NEWSLIST_CMN_20080407_114902 1259.169 1263.025
+CCTV2_NEWSLIST_CMN_20080407_114902 1315.415 1324.866
+CCTV2_NEWSLIST_CMN_20080407_114902 1324.866 1343.299
+CCTV2_NEWSLIST_CMN_20080407_114902 1356.988 1371.982
+CCTV2_NEWSLIST_CMN_20080407_114902 1402.039 1415.326
+CCTV2_NEWSLIST_CMN_20080407_114902 1415.326 1418.171
+CCTV2_NEWSLIST_CMN_20080407_114902 1418.171 1442.830
+CCTV2_NEWSLIST_CMN_20080407_114902 1483.270 1496.025
+CCTV2_NEWSLIST_CMN_20080407_114902 1504.702 1522.347
+CCTV2_NEWSLIST_CMN_20080407_114902 1522.347 1538.256
+CCTV2_NEWSLIST_CMN_20080407_114902 1570.672 1585.552
+CCTV2_NEWSLIST_CMN_20080407_114902 1629.706 1653.096
+CCTV2_NEWSLIST_CMN_20080407_114902 1653.096 1672.960
+CCTV2_NEWSLIST_CMN_20080407_114902 1672.960 1685.742
+CCTV2_NEWSLIST_CMN_20080407_114902 1711.517 1729.122
+CCTV2_NEWSLIST_CMN_20080407_114902 1729.122 1750.050
+CCTV2_NEWSLIST_CMN_20080407_114902 1790.849 1817.194
+CCTV2_NEWSLIST_CMN_20080407_114902 1850.350 1864.369
+CCTV2_NEWSLIST_CMN_20080407_114902 1902.717 1923.130
+CCTV2_NEWSLIST_CMN_20080407_114902 1979.999 1991.700
+CCTV2_NEWSLIST_CMN_20080407_114902 2008.520 2021.212
+CCTV2_NEWSLIST_CMN_20080407_114902 2057.251 2062.904
+CCTV2_NEWSLIST_CMN_20080407_114902 2090.204 2105.557
+CCTV2_NEWSLIST_CMN_20080407_114902 2105.557 2114.862
+CCTV2_NEWSLIST_CMN_20080407_114902 2114.862 2131.807
+CCTV2_NEWSLIST_CMN_20080407_114902 2131.807 2143.035
+CCTV2_NEWSLIST_CMN_20080407_114902 2168.478 2178.148
+CCTV2_NEWSLIST_CMN_20080407_114902 2267.472 2279.981
+CCTV2_NEWSLIST_CMN_20080407_114902 2310.269 2322.337
+CCTV2_NEWSLIST_CMN_20080407_114902 2337.035 2354.949
+CCTV2_NEWSLIST_CMN_20080407_114902 2354.949 2366.517
+CCTV2_NEWSLIST_CMN_20080416_114902 136.569 157.486
+CCTV2_NEWSLIST_CMN_20080416_114902 157.486 171.941
+CCTV2_NEWSLIST_CMN_20080416_114902 203.415 211.459
+CCTV2_NEWSLIST_CMN_20080416_114902 236.208 252.317
+CCTV2_NEWSLIST_CMN_20080416_114902 285.120 302.659
+CCTV2_NEWSLIST_CMN_20080416_114902 346.830 356.405
+CCTV2_NEWSLIST_CMN_20080416_114902 356.405 374.393
+CCTV2_NEWSLIST_CMN_20080416_114902 395.198 407.444
+CCTV2_NEWSLIST_CMN_20080416_114902 424.983 442.582
+CCTV2_NEWSLIST_CMN_20080416_114902 442.582 453.295
+CCTV2_NEWSLIST_CMN_20080416_114902 561.461 575.647
+CCTV2_NEWSLIST_CMN_20080416_114902 575.647 587.916
+CCTV2_NEWSLIST_CMN_20080416_114902 628.343 639.624
+CCTV2_NEWSLIST_CMN_20080416_114902 704.371 719.994
+CCTV2_NEWSLIST_CMN_20080416_114902 884.158 899.662
+CCTV2_NEWSLIST_CMN_20080416_114902 924.868 938.455
+CCTV2_NEWSLIST_CMN_20080416_114902 938.455 956.416
+CCTV2_NEWSLIST_CMN_20080416_114902 970.836 976.225
+CCTV2_NEWSLIST_CMN_20080416_114902 976.225 980.905
+CCTV2_NEWSLIST_CMN_20080416_114902 1010.567 1024.643
+CCTV2_NEWSLIST_CMN_20080416_114902 1024.643 1042.937
+CCTV2_NEWSLIST_CMN_20080416_114902 1057.868 1075.197
+CCTV2_NEWSLIST_CMN_20080416_114902 1075.197 1090.614
+CCTV2_NEWSLIST_CMN_20080416_114902 1151.810 1164.080
+CCTV2_NEWSLIST_CMN_20080416_114902 1164.080 1167.218
+CCTV2_NEWSLIST_CMN_20080416_114902 1167.218 1188.853
+CCTV2_NEWSLIST_CMN_20080416_114902 1218.219 1231.686
+CCTV2_NEWSLIST_CMN_20080416_114902 1231.686 1252.859
+CCTV2_NEWSLIST_CMN_20080416_114902 1252.859 1256.506
+CCTV2_NEWSLIST_CMN_20080416_114902 1356.073 1368.073
+CCTV2_NEWSLIST_CMN_20080416_114902 1375.798 1394.564
+CCTV2_NEWSLIST_CMN_20080416_114902 1394.564 1413.738
+CCTV2_NEWSLIST_CMN_20080416_114902 1413.738 1425.539
+CCTV2_NEWSLIST_CMN_20080416_114902 1425.539 1428.713
+CCTV2_NEWSLIST_CMN_20080416_114902 1428.713 1447.270
+CCTV2_NEWSLIST_CMN_20080416_114902 1447.270 1464.119
+CCTV2_NEWSLIST_CMN_20080416_114902 1464.119 1492.141
+CCTV2_NEWSLIST_CMN_20080416_114902 1492.141 1503.366
+CCTV2_NEWSLIST_CMN_20080416_114902 1523.247 1545.067
+CCTV2_NEWSLIST_CMN_20080416_114902 1545.067 1557.549
+CCTV2_NEWSLIST_CMN_20080416_114902 1557.549 1578.087
+CCTV2_NEWSLIST_CMN_20080416_114902 1578.087 1606.821
+CCTV2_NEWSLIST_CMN_20080416_114902 1606.821 1628.438
+CCTV2_NEWSLIST_CMN_20080416_114902 1628.438 1639.360
+CCTV2_NEWSLIST_CMN_20080416_114902 1639.360 1642.773
+CCTV2_NEWSLIST_CMN_20080416_114902 1642.773 1661.213
+CCTV2_NEWSLIST_CMN_20080416_114902 1683.387 1697.079
+CCTV2_NEWSLIST_CMN_20080416_114902 1697.079 1714.468
+CCTV2_NEWSLIST_CMN_20080416_114902 1804.873 1816.843
+CCTV2_NEWSLIST_CMN_20080416_114902 1816.843 1827.322
+CCTV2_NEWSLIST_CMN_20080416_114902 1827.322 1842.289
+CCTV2_NEWSLIST_CMN_20080416_114902 1862.795 1876.957
+CCTV2_NEWSLIST_CMN_20080416_114902 1876.957 1885.155
+CCTV2_NEWSLIST_CMN_20080416_114902 1900.104 1912.100
+CCTV2_NEWSLIST_CMN_20080416_114902 1930.933 1937.460
+CCTV2_NEWSLIST_CMN_20080416_114902 1937.460 1948.838
+CCTV2_NEWSLIST_CMN_20080416_114902 1948.838 1964.288
+CCTV2_NEWSLIST_CMN_20080416_114902 1989.252 1999.437
+CCTV2_NEWSLIST_CMN_20080416_114902 1999.437 2012.964
+CCTV2_NEWSLIST_CMN_20080416_114902 2044.868 2060.413
+CCTV2_NEWSLIST_CMN_20080416_114902 2090.254 2100.015
+CCTV2_NEWSLIST_CMN_20080416_114902 2100.015 2116.180
+CCTV2_NEWSLIST_CMN_20080416_114902 2141.813 2153.639
+CCTV2_NEWSLIST_CMN_20080416_114902 2153.639 2166.783
+CCTV2_NEWSLIST_CMN_20080416_114902 2215.999 2229.077
+CCTV2_NEWSLIST_CMN_20080416_114902 2273.986 2283.360
diff --git a/egs/mandarin_bn_bc/s5/local/gigaword_prep_txt.sh b/egs/mandarin_bn_bc/s5/local/gigaword_prep_txt.sh
new file mode 100644
index 00000000000..5359325bd9d
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gigaword_prep_txt.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins Univeersity (author: Jinyi Yang)
+# Apache 2.0
+
+if [ $# != 2 ]; then
+  echo "$0 <gigaword-dir> <giga-lang-dir>"
+  exit 0;
+fi
+
+giga_dir=$1
+giga_lang_dir=$2
+
+[ ! -d $giga_lang_dir ] && mkdir -p $giga_lang_dir;
+
+find $giga_dir -name "*.gz" > $giga_lang_dir/giga_trans.flist || exit "Faile to find files"
+
+if [ `wc -l $giga_lang_dir/giga_trans.flist | cut -d " " -f1` == 0 ]; then
+  echo "Empty file list : $giga_lang_dir/giga_trans.flist"
+  exit 1;
+fi
+
+for f in `cat $giga_lang_dir/giga_trans.flist`
+do
+  fname=$(basename "$f" ".gz")
+  gunzip -c $f | \
+    python3 local/gigaword_text_parse.py > $giga_lang_dir/$fname.tmp.txt
+done
+
+cat $giga_lang_dir/*.tmp.txt > $giga_lang_dir/raw.text
+rm $giga_lang_dir/*.tmp.txt
+
+pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'`
+export PYTHONPATH=$PYTHONPATH:`pwd`/tools/mmseg-1.3.0/lib/python${pyver}/site-packages
+if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+  echo "--- Downloading mmseg-1.3.0 ..."
+  echo "NOTE: it assumes that you have Python, Setuptools installed on your system!"
+  wget -P tools http://pypi.python.org/packages/source/m/mmseg/mmseg-1.3.0.tar.gz
+  tar xf tools/mmseg-1.3.0.tar.gz -C tools
+  cd tools/mmseg-1.3.0
+  mkdir -p lib/python${pyver}/site-packages
+  CC=gcc CXX=g++ python setup.py build
+  python setup.py install --prefix=.
+  cd ../..
+  if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+    echo "mmseg is not found - installation failed?"
+    exit 1
+  fi
+fi
+cat $giga_lang_dir/raw.text |\
+  perl local/mandarin_text_normalize.pl |\
+  python local/mandarin_segment.py > $giga_lang_dir/filtered.text
+cat $giga_lang_dir/filtered.text |\
+  python local/mandarin_segment.py > $giga_lang_dir/segmented.text
+mv $giga_lang_dir/segmented.text $giga_lang_dir/text
diff --git a/egs/mandarin_bn_bc/s5/local/gigaword_text_parse.py b/egs/mandarin_bn_bc/s5/local/gigaword_text_parse.py
new file mode 100644
index 00000000000..e2d7ca4bb6b
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gigaword_text_parse.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env/python
+
+# Apache 2.0
+
+from __future__ import print_function
+import io
+import sys
+
+if __name__ == '__main__':
+  input_stream = io.TextIOWrapper(sys.stdin.buffer,encoding='utf-8')
+  anker = False
+  for line in input_stream.readlines():
+    line = line.strip()
+    if line == "<P>":
+      anker = True
+      continue
+    elif line == "</P>":
+      anker = False
+    elif anker:
+      print(line)
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_format_lms.sh b/egs/mandarin_bn_bc/s5/local/mandarin_format_lms.sh
new file mode 100755
index 00000000000..728d2180b70
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_format_lms.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Apache 2.0
+
+if [ -f path.sh ]; then
+  . ./path.sh; else
+   echo "missing path.sh"; exit 1;
+fi
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <lm-file> <src-dir> <tgt-dir>"
+  echo "E.g., $0 data/local/lm/srim.o4g.kn.gz data/lang data/lang_test"
+  exit 1
+fi
+
+arpa_lm=$1
+src_dir=$2
+tgt_dir=$3
+
+
+set -e -o pipefail
+set -x
+
+export LC_ALL=C
+
+#arpa_lm=data/local/gale/train/lm_4gram/srilm.o4g.kn.gz
+
+[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
+
+rm -r $tgt_dir || true
+cp -r $src_dir $tgt_dir
+
+gunzip -c "$arpa_lm" | \
+  arpa2fst --disambig-symbol=#0 \
+           --read-symbol-table=$tgt_dir/words.txt - $tgt_dir/G.fst
+
+
+echo  "Checking how stochastic G is (the first of these numbers should be small):"
+fstisstochastic $tgt_dir/G.fst || true
+
+## Check lexicon.
+## just have a look and make sure it seems sane.
+echo "First few lines of lexicon FST:"
+(
+  fstprint   --isymbols=$src_dir/phones.txt --osymbols=$src_dir/words.txt $src_dir/L.fst | head
+) || true
+echo Performing further checks
+
+# Checking that G.fst is determinizable.
+fstdeterminize $tgt_dir/G.fst /dev/null || {
+  echo Error determinizing G.
+  exit 1
+}
+
+# Checking that L_disambig.fst is determinizable.
+fstdeterminize $tgt_dir/L_disambig.fst /dev/null || echo Error determinizing L.
+
+# Checking that disambiguated lexicon times G is determinizable
+# Note: we do this with fstdeterminizestar not fstdeterminize, as
+# fstdeterminize was taking forever (presumbaly relates to a bug
+# in this version of OpenFst that makes determinization slow for
+# some case).
+fsttablecompose $tgt_dir/L_disambig.fst $tgt_dir/G.fst | \
+   fstdeterminizestar >/dev/null || echo Error
+
+# Checking that LG is stochastic:
+fsttablecompose $tgt_dir/L_disambig.fst $tgt_dir/G.fst | \
+   fstisstochastic || echo LG is not stochastic
+
+echo "LM preparation succeeded."
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_merge_dict.sh b/egs/mandarin_bn_bc/s5/local/mandarin_merge_dict.sh
new file mode 100755
index 00000000000..18ddc6d03a8
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_merge_dict.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (author: Jinyi Yang)
+# Apache 2.0
+
+# This script merges the gale-tdt lexicon dicrectory with gigaword (simplified Mandarin)
+# lexicon directory. It requires the lexiconp.txt file in both directories
+# since the probabilities in lexiconp.txt may be re-estimated.
+
+if [ $# -ne 3 ];then
+  echo "Usage: $0 <gale-tdt-dict-dir> <giga-dict-dir> <tgt-lex-dir>"
+  echo "E.g., $0 data/local/dict_gale_tdt data/local/dict_giga data/local/dict_merged"
+  exit 1
+fi
+
+lex_dir_1=$1
+lex_dir_2=$2
+tgt_lex_dir=$3
+
+mkdir -p $tgt_lex_dir
+
+for f in silence_phones.txt nonsilence_phones.txt lexiconp.txt extra_questions.txt;do
+  [ ! -f $lex_dir_1/$f ] && echo "$0: no such file $lex_dir_1/$f" && exit 1;
+  [ ! -f $lex_dir_2/$f ] && echo "$0: no such file $lex_dir_2/$f" && exit 1;
+  # We copy the phone related files from gale dictionary directory, since they
+  # are the same phone sets as GIGA words.
+  cp $lex_dir_1/$f $tgt_lex_dir
+done
+
+mv $tgt_lex_dir/lexiconp.txt $tgt_lex_dir/lexiconp_1.txt
+
+
+awk 'NR==FNR{a[$1];next}{if (!($1 in a)) print $0}' $tgt_lex_dir/lexiconp_1.txt \
+  $lex_dir_2/lexiconp.txt > $tgt_lex_dir/lexiconp_2.txt
+cat $tgt_lex_dir/lexiconp_1.txt $tgt_lex_dir/lexiconp_2.txt | sort > $tgt_lex_dir/lexiconp.txt
+
+
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_mix_lm.sh b/egs/mandarin_bn_bc/s5/local/mandarin_mix_lm.sh
new file mode 100755
index 00000000000..a5bcf1dc8e0
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_mix_lm.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (author: Jinyi Yang)
+# Apache 2.0
+
+# This script interpolates two language models.
+
+ngram_order=4
+oov_sym="<UNK>"
+prune_thres=1e-9
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+  echo "Usage: [--ngram-order] [--oov-sym] [--prune-thres] <lm-dir-1> <lm-dir-2> <lm-mix-dir> <dev-dir>"
+  echo "E.g. $0 --ngram-order 4 --oov-sym \"<UNK>\" --prune-thres \"1e-9\" \
+  data/local/lm_gale data/local/lm_giga data/local/lm_mix data/dev "
+  exit 1;
+fi
+lm_dir_1=$1
+lm_dir_2=$2
+lm_mix_dir=$3
+heldout=$4/text
+
+mkdir -p $lm_mix_dir || exit 1;
+if [ ! -f $lm_dir_mix/srilm.o${ngram_order}g.kn.gz ]; then
+  for d in $lm_dir_1 $lm_dir_2; do
+    ngram -debug 2 -order $ngram_order -unk -lm $d/srilm.o${ngram_order}g.kn.gz \
+      -ppl $heldout > $d/ppl ;
+  done
+  compute-best-mix $lm_dir_1/ppl $lm_dir_2/ppl > $lm_mix_dir/best-mix.ppl
+  lambdas=$(grep -o '(.*)' $lm_mix_dir/best-mix.ppl | head -1)
+  lambdas=${lambdas%%)}
+  lambdas=${lambdas##(}
+  lambda1=`echo $lambdas | cut -d " " -f1`
+  lambda2=`echo $lambdas | cut -d " " -f2`
+  ngram_opts="$lm_dir_1/srilm.o${ngram_order}g.kn.gz -weight $lambda1 -order \
+    $ngram_order\n$lm_dir_2/srilm.o${ngram_order}g.kn.gz -weight $lambda2 -order $ngram_order"
+  echo -e ${ngram_opts} > $lm_mix_dir/ngram_opts
+  ngram -order $ngram_order \
+    -unk -map-unk $oov_sym \
+    -prune $prune_thres \
+    -read-mix-lms -lm $lm_mix_dir/ngram_opts \
+    -write-lm $lm_mix_dir/srilm.o${ngram_order}g.kn.gz
+  ngram -debug 2 -order $ngram_order -unk \
+  -lm $lm_mix_dir/srilm.o${ngram_order}g.kn.gz \
+  -ppl $heldout > $lm_mix_dir/lm.ppl
+fi
+echo "LM interpolation done"
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_prepare_dict.sh b/egs/mandarin_bn_bc/s5/local/mandarin_prepare_dict.sh
new file mode 100755
index 00000000000..73301f709b0
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_prepare_dict.sh
@@ -0,0 +1,352 @@
+#!/usr/bin/env bash
+
+# prepare dictionary for Mandarin-ASR
+# it is done for English and Chinese separately,
+# For English, we use CMU dictionary, and Sequitur G2P
+# for OOVs, while all englist phone set will concert to Chinese
+# phone set at the end. For Chinese, we use an online dictionary,
+# for OOV, we just produce pronunciation using Charactrt Mapping.
+
+extra_text=
+
+. path.sh
+. utils/parse_options.sh || exit 1;
+
+set -e -o pipefail
+if [ $# != 2 ]; then
+  echo "Usage: local/prepare_dict.sh [--extra-text /path/to/extra-text] <dict-dir> <src-dir>"
+  echo "E.g., $0: --extra-text data/local/gigaword/filtered_text data/local/dict data/local/train"
+  exit 1;
+fi
+
+dict_dir=$1
+src_dir=$2
+
+mkdir -p $dict_dir/lexicon-{en,ch}
+
+# extract full vocabulary from train text
+cat $src_dir/text |\
+  awk '{for (i = 2; i <= NF; i++) print $i}' |\
+  perl -ape 's/ /\n/g;' | sort -u | \
+  grep -v '\[LAUGHTER\]' | \
+  grep -v '\[NOISE\]' |\
+  grep -v '\[VOCALIZEDNOISE\]' |\
+  grep -v '\[VOCALIZED-NOISE\]' \
+  > $dict_dir/words_train.txt
+
+# extract vocabulary from extra text
+if [ ! -z $extra_text ];then
+  echo "Using extra text for LM training, add these words for lexicon: $extra_text"
+  cp $extra_text $dict_dir/lm_extra_text_filtered || exit 1;
+  awk '{for (i=1; i <= NF; i++) print $i}' $dict_dir/lm_extra_text_filtered | sort -u |\
+    grep -v '\[LAUGHTER\]' |\
+    grep -v '\[NOISE\]' |\
+    grep -v '\[VOCALIZEDNOISE\]' |\
+    grep -v '\[VOCALIZED-NOISE\]' |\
+    sed -e 's/((\([^)]\{0,\}\)))/\1/g;' |\
+    perl -ape 's/ /\n/g;' | awk 'NF>0' > $dict_dir/extra_words.txt
+    cat $dict_dir/words_train.txt $dict_dir/extra_words.txt | sort -u >    $dict_dir/words.txt
+else
+  cp $dict_dir/words_train.txt $dict_dir/words.txt
+fi
+
+# split into English and Chinese
+cat $dict_dir/words.txt | grep '[a-zA-Z]' > $dict_dir/lexicon-en/words-en.txt || exit 1;
+cat $dict_dir/words.txt | grep -v '[a-zA-Z]' > $dict_dir/lexicon-ch/words-ch.txt || exit 1;
+
+
+##### produce pronunciations for english
+if [ ! -f $dict_dir/cmudict/cmudict.0.7a ]; then
+  echo "--- Downloading CMU dictionary ..."
+  svn co -r 13068 https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict \
+    $dict_dir/cmudict || exit 1;
+fi
+
+# format cmudict
+echo "--- Striping stress and pronunciation variant markers from cmudict ..."
+perl $dict_dir/cmudict/scripts/make_baseform.pl \
+  $dict_dir/cmudict/cmudict.0.7a /dev/stdout |\
+  sed -e 's:^\([^\s(]\+\)([0-9]\+)\(\s\+\)\(.*\):\1\2\3:' > $dict_dir/cmudict/cmudict-plain.txt || exit 1;
+
+# extract in-vocab lexicon and oov words
+echo "--- Searching for English OOV words ..."
+awk 'NR==FNR{words[$1]; next;} !($1 in words)' \
+  $dict_dir/cmudict/cmudict-plain.txt $dict_dir/lexicon-en/words-en.txt |\
+  egrep -v '<.?s>' > $dict_dir/lexicon-en/words-en-oov.txt || exit 1;
+
+awk 'NR==FNR{words[$1]; next;} ($1 in words)' \
+  $dict_dir/lexicon-en/words-en.txt $dict_dir/cmudict/cmudict-plain.txt |\
+  egrep -v '<.?s>' > $dict_dir/lexicon-en/lexicon-en-iv.txt || exit 1;
+
+wc -l $dict_dir/lexicon-en/words-en-oov.txt
+wc -l $dict_dir/lexicon-en/lexicon-en-iv.txt
+
+# setup g2p and generate oov lexicon
+if [ ! -f conf/g2p_model ]; then
+  echo "--- Downloading a pre-trained Sequitur G2P model ..."
+  wget http://sourceforge.net/projects/kaldi/files/sequitur-model4 -O conf/g2p_model
+  if [ ! -f conf/g2p_model ]; then
+    echo "Failed to download the g2p model!"
+    exit 1
+  fi
+fi
+
+echo "--- Preparing pronunciations for OOV words ..."
+g2p=`which g2p.py`
+if [ ! -x $g2p ]; then
+  echo "g2p.py is not found. Checkout tools/extras/install_sequitur.sh."
+  exit 1
+fi
+g2p.py --model=conf/g2p_model --apply $dict_dir/lexicon-en/words-en-oov.txt \
+  > $dict_dir/lexicon-en/lexicon-en-oov.txt || exit 1;
+
+# merge in-vocab and oov lexicon
+cat $dict_dir/lexicon-en/lexicon-en-oov.txt $dict_dir/lexicon-en/lexicon-en-iv.txt |\
+  sort > $dict_dir/lexicon-en/lexicon-en-phn.txt || exit 1;
+
+# convert cmu phoneme to pinyin phonenme
+mkdir -p $dict_dir/map
+cat conf/cmu2pinyin | awk '{print $1;}' | sort -u > $dict_dir/map/cmu || exit 1;
+cat conf/pinyin2cmu | awk -v cmu=$dict_dir/map/cmu \
+  'BEGIN{while((getline<cmu)) dict[$1] = 1;}
+   {for (i = 2; i <=NF; i++) if (dict[$i]) print $i;}' | sort -u > $dict_dir/map/cmu-used || exit 1;
+cat $dict_dir/map/cmu | awk -v cmu=$dict_dir/map/cmu-used \
+  'BEGIN{while((getline<cmu)) dict[$1] = 1;}
+   {if (!dict[$1]) print $1;}' > $dict_dir/map/cmu-not-used || exit 1;
+
+awk 'NR==FNR{words[$1]; next;} ($1 in words)' \
+  $dict_dir/map/cmu-not-used conf/cmu2pinyin |\
+  egrep -v '<.?s>' > $dict_dir/map/cmu-py || exit 1;
+
+cat $dict_dir/map/cmu-py | \
+  perl -e '
+  open(MAPS, $ARGV[0]) or die("could not open map file");
+  my %py2ph;
+  foreach $line (<MAPS>) {
+    @A = split(" ", $line);
+    $py = shift(@A);
+    $py2ph{$py} = [@A];
+  }
+  my @entry;
+  while (<STDIN>) {
+    @A = split(" ", $_);
+    @entry = ();
+    $W = shift(@A);
+    push(@entry, $W);
+    for($i = 0; $i < @A; $i++) { push(@entry, @{$py2ph{$A[$i]}}); }
+    print "@entry";
+    print "\n";
+  }
+' conf/pinyin2cmu > $dict_dir/map/cmu-cmu || exit 1;
+
+cat $dict_dir/lexicon-en/lexicon-en-phn.txt | \
+  perl -e '
+  open(MAPS, $ARGV[0]) or die("could not open map file");
+  my %py2ph;
+  foreach $line (<MAPS>) {
+    @A = split(" ", $line);
+    $py = shift(@A);
+    $py2ph{$py} = [@A];
+  }
+  my @entry;
+  while (<STDIN>) {
+    @A = split(" ", $_);
+    @entry = ();
+    $W = shift(@A);
+    push(@entry, $W);
+    for($i = 0; $i < @A; $i++) {
+      if (exists $py2ph{$A[$i]}) { push(@entry, @{$py2ph{$A[$i]}}); }
+      else {push(@entry, $A[$i])};
+    }
+    print "@entry";
+    print "\n";
+  }
+' $dict_dir/map/cmu-cmu > $dict_dir/lexicon-en/lexicon-en.txt || exit 1;
+
+
+##### produce pronunciations for chinese
+if [ ! -f $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt ]; then
+  echo "------------- Downloading cedit dictionary ---------------"
+  mkdir -p $dict_dir/cedict
+  wget -P $dict_dir/cedict http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz
+  gunzip $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz
+fi
+
+cat $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt | grep -v '#' | awk -F '/' '{print $1}' |\
+ perl -e '
+  while (<STDIN>) {
+    @A = split(" ", $_);
+    print $A[1];
+    for($n = 2; $n < @A; $n++) {
+      $A[$n] =~ s:\[?([a-zA-Z0-9\:]+)\]?:$1:;
+      $tmp = uc($A[$n]);
+      print " $tmp";
+    }
+    print "\n";
+  }
+ ' | sort -k1 > $dict_dir/cedict/ch-dict.txt || exit 1;
+
+echo "--- Searching for Chinese OOV words ..."
+awk 'NR==FNR{words[$1]; next;} !($1 in words)' \
+  $dict_dir/cedict/ch-dict.txt $dict_dir/lexicon-ch/words-ch.txt |\
+  egrep -v '<.?s>' > $dict_dir/lexicon-ch/words-ch-oov.txt || exit 1;
+
+awk 'NR==FNR{words[$1]; next;} ($1 in words)' \
+  $dict_dir/lexicon-ch/words-ch.txt $dict_dir/cedict/ch-dict.txt |\
+  egrep -v '<.?s>' > $dict_dir/lexicon-ch/lexicon-ch-iv.txt || exit 1;
+
+wc -l $dict_dir/lexicon-ch/words-ch-oov.txt
+wc -l $dict_dir/lexicon-ch/lexicon-ch-iv.txt
+
+
+# validate Chinese dictionary and compose a char-based
+# dictionary in order to get OOV pronunciations
+cat $dict_dir/cedict/ch-dict.txt |\
+  perl -e '
+  use utf8;
+  binmode(STDIN,":encoding(utf8)");
+  binmode(STDOUT,":encoding(utf8)");
+  while (<STDIN>) {
+    @A = split(" ", $_);
+    $word_len = length($A[0]);
+    $proun_len = @A - 1 ;
+    if ($word_len == $proun_len) {print $_;}
+  }
+  ' > $dict_dir/cedict/ch-dict-1.txt || exit 1;
+
+# extract chars
+cat $dict_dir/cedict/ch-dict-1.txt | awk '{print $1}' |\
+  perl -e '
+  use utf8;
+  binmode(STDIN,":encoding(utf8)");
+  binmode(STDOUT,":encoding(utf8)");
+  while (<STDIN>) {
+    @A = split(" ", $_);
+    @chars = split("", $A[0]);
+    foreach (@chars) {
+      print "$_\n";
+    }
+  }
+  ' | grep -v '^$' > $dict_dir/lexicon-ch/ch-char.txt || exit 1;
+
+# extract individual pinyins
+cat $dict_dir/cedict/ch-dict-1.txt |\
+  awk '{for(i=2; i<=NF; i++) print $i}' |\
+  perl -ape 's/ /\n/g;' > $dict_dir/lexicon-ch/ch-char-pinyin.txt || exit 1;
+
+# first make sure number of characters and pinyins
+# are equal, so that a char-based dictionary can
+# be composed.
+nchars=`wc -l < $dict_dir/lexicon-ch/ch-char.txt`
+npinyin=`wc -l < $dict_dir/lexicon-ch/ch-char-pinyin.txt`
+if [ $nchars -ne $npinyin ]; then
+  echo "Found $nchars chars and $npinyin pinyin. Please check!"
+  exit 1
+fi
+
+paste $dict_dir/lexicon-ch/ch-char.txt $dict_dir/lexicon-ch/ch-char-pinyin.txt > $dict_dir/lexicon-ch/ch-char-dict.tmp.txt || exit 1;
+
+# Add "4" and "7" to the character-pinyi pair since they are missing in the
+# mdbg dictionary.
+(echo '4 SI4'; echo '7 QI1'; ) | \
+ cat - $dict_dir/lexicon-ch/ch-char-dict.tmp.txt  | sort -u > $dict_dir/lexicon-ch/ch-char-dict.txt || exit 1;
+
+rm $dict_dir/lexicon-ch/ch-char-dict.tmp.txt
+
+# create a multiple pronunciation dictionary
+cat $dict_dir/lexicon-ch/ch-char-dict.txt |\
+  perl -e '
+  my $prev = "";
+  my $out_line = "";
+  while (<STDIN>) {
+    @A = split(" ", $_);
+    $cur = $A[0];
+    $cur_py = $A[1];
+    #print length($prev);
+    if (length($prev) == 0) { $out_line = $_; chomp($out_line);}
+    if (length($prev)>0 && $cur ne $prev) { print $out_line; print "\n"; $out_line = $_; chomp($out_line);}
+    if (length($prev)>0 && $cur eq $prev) { $out_line = $out_line."/"."$cur_py";}
+    $prev = $cur;
+  }
+  print $out_line;
+  ' >  $dict_dir/lexicon-ch/ch-char-dict-mp.txt || exit 1;
+
+# get lexicon for Chinese OOV words
+local/create_oov_char_lexicon.pl $dict_dir/lexicon-ch/ch-char-dict-mp.txt \
+  $dict_dir/lexicon-ch/words-ch-oov.txt > $dict_dir/lexicon-ch/lexicon-ch-oov.txt || exit 1;
+
+# seperate multiple prons for Chinese OOV lexicon
+cat $dict_dir/lexicon-ch/lexicon-ch-oov.txt |\
+  perl -e '
+  my @entry;
+  my @entry1;
+  while (<STDIN>) {
+    @A = split(" ", $_);
+    @entry = ();
+    push(@entry, $A[0]);
+    for($i = 1; $i < @A; $i++ ) {
+      @py = split("/", $A[$i]);
+      @entry1 = @entry;
+      @entry = ();
+      for ($j = 0; $j < @entry1; $j++) {
+        for ($k = 0; $k < @py; $k++) {
+          $tmp = $entry1[$j]." ".$py[$k];
+          push(@entry, $tmp);
+        }
+      }
+    }
+    for ($i = 0; $i < @entry; $i++) {
+      print $entry[$i];
+      print "\n";
+    }
+  }
+  ' > $dict_dir/lexicon-ch/lexicon-ch-oov-mp.txt || exit 1;
+
+# compose IV and OOV lexicons for Chinese
+cat $dict_dir/lexicon-ch/lexicon-ch-oov-mp.txt $dict_dir/lexicon-ch/lexicon-ch-iv.txt |\
+  awk '{if (NF > 1 && $2 ~ /[A-Za-z0-9]+/) print $0;}' > $dict_dir/lexicon-ch/lexicon-ch.txt || exit 1;
+
+# convert Chinese pinyin to CMU format
+cat $dict_dir/lexicon-ch/lexicon-ch.txt | sed -e 's/U:/V/g' | sed -e 's/ R\([0-9]\)/ ER\1/g' | grep -v 'M2' |\
+  utils/pinyin_map.pl conf/pinyin2cmu > $dict_dir/lexicon-ch/lexicon-ch-cmu.txt || exit 1;
+
+# combine English and Chinese lexicons
+cat $dict_dir/lexicon-en/lexicon-en.txt $dict_dir/lexicon-ch/lexicon-ch-cmu.txt | awk 'NF>1' |\
+  sort -u > $dict_dir/lexicon1.txt || exit 1;
+
+cat $dict_dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}'| \
+  sort -u |\
+  perl -e '
+  my %ph_cl;
+  while (<STDIN>) {
+    $phone = $_;
+    chomp($phone);
+    chomp($_);
+    $phone =~ s:([A-Z]+)[0-9]:$1:;
+    if (exists $ph_cl{$phone}) { push(@{$ph_cl{$phone}}, $_)  }
+    else { $ph_cl{$phone} = [$_]; }
+  }
+  foreach $key ( keys %ph_cl ) {
+     print "@{ $ph_cl{$key} }\n"
+  }
+  ' | sort -k1 > $dict_dir/nonsilence_phones.txt  || exit 1;
+
+( echo SIL; echo SPN; echo NSN; echo LAU ) > $dict_dir/silence_phones.txt
+
+echo SIL > $dict_dir/optional_silence.txt
+
+# No "extra questions" in the input to this setup, as we don't
+# have stress or tone
+
+cat $dict_dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > $dict_dir/extra_questions.txt || exit 1;
+cat $dict_dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) {
+  $p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
+ >> $dict_dir/extra_questions.txt || exit 1;
+
+# Add to the lexicon the silences, noises etc.
+(echo '!SIL SIL'; echo '[VOCALIZED-NOISE] SPN'; echo '[VOCALIZEDNOISE] SPN';echo '[NOISE] NSN'; echo '[LAUGHTER] LAU';
+ echo '<UNK> SPN' ) | \
+ cat - $dict_dir/lexicon1.txt  > $dict_dir/lexicon.txt || exit 1;
+
+echo "$0: Mandarin dict preparation succeeded"
+exit 0;
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_prepare_lm.sh b/egs/mandarin_bn_bc/s5/local/mandarin_prepare_lm.sh
new file mode 100755
index 00000000000..f9de16d78fb
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_prepare_lm.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+ngram_order=4
+oov_sym="<UNK>"
+no_uttid="false"
+prune_thres=1e-9
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+  echo "Usage: [--no-uttid] [--ngram-order] [--oov-sym] [--prune-thres] <dict-dir> <src-dir> <lm-dir> <dev-dir>"
+  echo "E.g. $0 --no-uttid "true" --ngram-order 4 --oov-sym \"<UNK>\" --prune-thres "1e-9" data/local/dict data/local/train data/local/lm data/dev "
+  exit 1;
+fi
+
+dict_dir=$1
+local_text_dir=$2
+lm_dir=$3
+heldout=$4/text
+
+# check if sri is installed or no
+which ngram-count  &>/dev/null
+if [[ $? == 0 ]]; then
+  echo "srilm installed"
+else
+  echo "Please install srilm first !"
+  exit 1
+fi
+echo "Building $ngram_order gram LM"
+[ ! -d $lm_dir ] && mkdir -p $lm_dir exit 1;
+
+if [ ! -f $lm_dir/${ngram_order}gram-mincount/lm_pruned.gz ]; then
+  echo "Training LM with train text"
+  [ ! -f $local_text_dir/text ] && echo "No $local_text_dir/text" && exit 1;
+
+  # If the first column of $local_text_dir/text is uttid, we need to remove
+  # them.
+  if [ $no_uttid == "false" ]; then 
+    awk '{i=$2;for (n=3;n<=NF;++n){i=i" "$n;}print i}' $local_text_dir/text > $lm_dir/text
+  else
+    cp $local_text_dir/text $lm_dir/text
+  fi
+  local/train_lms.sh --ngram-order $ngram_order --prune-thres $prune_thres $lm_dir $dict_dir $lm_dir $heldout
+fi
+
+
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_segment.py b/egs/mandarin_bn_bc/s5/local/mandarin_segment.py
new file mode 100755
index 00000000000..31fe135cf91
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_segment.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+#coding:utf-8
+#!/usr/bin/env python
+from __future__ import print_function
+import sys
+from mmseg import seg_txt
+for line in sys.stdin:
+  blks = str.split(line)
+  out_line = ""
+  for i in range(0, len(blks)):
+    if blks[i] == "[VOCALIZED-NOISE]" or blks[i] == "[NOISE]" or blks[i] == "[LAUGHTER]":
+      out_line += " " + blks[i]
+      continue
+    for j in seg_txt(blks[i]):
+      out_line += " " + j
+  print(out_line) 
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_text_normalize.pl b/egs/mandarin_bn_bc/s5/local/mandarin_text_normalize.pl
new file mode 100644
index 00000000000..d587ce0f074
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_text_normalize.pl
@@ -0,0 +1,195 @@
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
+
+# This script removes special symbols from the transcriptions.
+
+while (<STDIN>) {
+  @A = split(" ", $_);
+  for ($n = 0; $n < @A; $n++) {
+    $a = $A[$n];
+    $tmp = $a;
+    $tmp =~ s:Ａ:A:g;
+    $tmp =~ s:Ｂ:B:g;
+    $tmp =~ s:Ｋ:K:g;
+    $tmp =~ s:Ｄ:D:g;
+    $tmp =~ s:Ｎ:N:g;
+    $tmp =~ s:Ｗ:W:g;
+    $tmp =~ s:Ｇ:G:g;
+    $tmp =~ s:Ｓ:S:g;
+    $tmp =~ s:Ｔ:T:g;
+    $tmp =~ s:Ｖ:V:g;
+    $tmp =~ s:％::g;
+    $tmp =~ s:Ⅱ::g;
+    $tmp =~ s:＋::g;
+    $tmp =~ s:－::g;
+    $tmp =~ s:．::g;
+    $tmp =~ s:０:0:g;
+    $tmp =~ s:１:1:g;
+    $tmp =~ s:２:2:g;
+    $tmp =~ s:３:3:g;
+    $tmp =~ s:４:4:g;
+    $tmp =~ s:５:5:g;
+    $tmp =~ s:６:6:g;
+    $tmp =~ s:７:7:g;
+    $tmp =~ s:８:8:g;
+    $tmp =~ s:９:9:g;
+    $tmp =~ s:；::g;
+    $tmp =~ s:＜::g;
+    $tmp =~ s:＞::g;
+    $tmp =~ s:　::g;
+    $tmp =~ s:、::g;
+    $tmp =~ s:】::g;
+    $tmp =~ s:·::g;
+    $tmp =~ s:〉::g;
+    $tmp =~ s:〈::g;
+    $tmp =~ s:《::g;
+    $tmp =~ s:》::g;
+    $tmp =~ s:"::g;
+    $tmp =~ s:‘::g;
+    $tmp =~ s:’::g;
+    $tmp =~ s:“::g;
+    $tmp =~ s:”::g;
+    $tmp =~ s:：::g;
+    $tmp =~ s:（::g;
+    $tmp =~ s:）::g;
+    $tmp =~ s:…::g;
+    $tmp =~ s:!::g;
+    $tmp =~ s:\?::g;
+    $tmp =~ s:-::g;
+    $tmp =~ s:@::g;
+    $tmp =~ s:‰::g;
+    $tmp =~ s:—::g;
+    $tmp =~ s:○::g;
+    $tmp =~ s:,::g;
+    $tmp =~ s:・::g;
+    $tmp =~ s:;::g;
+    $tmp =~ s:\:::g;
+    $tmp =~ s:\(::g;
+    $tmp =~ s:\)::g;
+    $tmp =~ s:□::g;
+    $tmp =~ s: ::g;
+    $tmp =~ s:＂::g;
+    $tmp =~ s:＃::g;
+    $tmp =~ s:＊::g;
+    $tmp =~ s:／::g;
+    $tmp =~ s:Ｅ::g;
+    $tmp =~ s:Ｈ::g;
+    $tmp =~ s:Ｍ::g;
+    $tmp =~ s:Ｘ::g;
+    $tmp =~ s:［::g;
+    $tmp =~ s:］::g;
+    $tmp =~ s:～::g;
+    $tmp =~ s:￣::g;
+    $tmp =~ s:￥::g;
+    $tmp =~ s:？::g;
+    $tmp =~ s:。::g;
+    $tmp =~ s:！::g;
+    $tmp =~ s:，::g;
+    $tmp =~ s:§::g;
+    $tmp =~ s:¨::g;
+    $tmp =~ s:°::g;
+    $tmp =~ s:±::g;
+    $tmp =~ s:×::g;
+    $tmp =~ s:÷::g;
+    $tmp =~ s:ā::g;
+    $tmp =~ s:ǎ::g;
+    $tmp =~ s:ˉ::g;
+    $tmp =~ s:Ι::g;
+    $tmp =~ s:Υ::g;
+    $tmp =~ s:Φ::g;
+    $tmp =~ s:Χ::g;
+    $tmp =~ s:α::g;
+    $tmp =~ s:β::g;
+    $tmp =~ s:γ::g;
+    $tmp =~ s:ε::g;
+    $tmp =~ s:μ::g;
+    $tmp =~ s:π::g;
+    $tmp =~ s:ρ::g;
+    $tmp =~ s:τ::g;
+    $tmp =~ s:φ::g;
+    $tmp =~ s:χ::g;
+    $tmp =~ s:ψ::g;
+    $tmp =~ s:ω::g;
+    $tmp =~ s:А::g;
+    $tmp =~ s:Б::g;
+    $tmp =~ s:В::g;
+    $tmp =~ s:Г::g;
+    $tmp =~ s:Ж::g;
+    $tmp =~ s:З::g;
+    $tmp =~ s:И::g;
+    $tmp =~ s:Л::g;
+    $tmp =~ s:М::g;
+    $tmp =~ s:Н::g;
+    $tmp =~ s:О::g;
+    $tmp =~ s:П::g;
+    $tmp =~ s:С::g;
+    $tmp =~ s:Ш::g;
+    $tmp =~ s:Э::g;
+    $tmp =~ s:а::g;
+    $tmp =~ s:―::g;
+    $tmp =~ s:′::g;
+    $tmp =~ s:″::g;
+    $tmp =~ s:※::g;
+    $tmp =~ s:℃::g;
+    $tmp =~ s:Ⅰ::g;
+    $tmp =~ s:Ⅲ::g;
+    $tmp =~ s:Ⅳ::g;
+    $tmp =~ s:Ⅴ::g;
+    $tmp =~ s:Ⅵ::g;
+    $tmp =~ s:Ⅶ::g;
+    $tmp =~ s:Ⅷ::g;
+    $tmp =~ s:Ⅸ::g;
+    $tmp =~ s:Ⅹ::g;
+    $tmp =~ s:→::g;
+    $tmp =~ s:∏::g;
+    $tmp =~ s:√::g;
+    $tmp =~ s:∮::g;
+    $tmp =~ s:∶::g;
+    $tmp =~ s:≈::g;
+    $tmp =~ s:≤::g;
+    $tmp =~ s:≥::g;
+    $tmp =~ s:⊥::g;
+    $tmp =~ s:⌒::g;
+    $tmp =~ s:①::g;
+    $tmp =~ s:②::g;
+    $tmp =~ s:③::g;
+    $tmp =~ s:④::g;
+    $tmp =~ s:⑤::g;
+    $tmp =~ s:⑥::g;
+    $tmp =~ s:⑦::g;
+    $tmp =~ s:⑧::g;
+    $tmp =~ s:⑨::g;
+    $tmp =~ s:⑩::g;
+    $tmp =~ s:⑴::g;
+    $tmp =~ s:⒈::g;
+    $tmp =~ s:⒉::g;
+    $tmp =~ s:⒒::g;
+    $tmp =~ s:─::g;
+    $tmp =~ s:━::g;
+    $tmp =~ s:│::g;
+    $tmp =~ s:┄::g;
+    $tmp =~ s:┅::g;
+    $tmp =~ s:┘::g;
+    $tmp =~ s:┼::g;
+    $tmp =~ s:╃::g;
+    $tmp =~ s:■::g;
+    $tmp =~ s:△::g;
+    $tmp =~ s:◆::g;
+    $tmp =~ s:●::g;
+    $tmp =~ s:☆::g;
+    $tmp =~ s:〃::g;
+    $tmp =~ s:「::g;
+    $tmp =~ s:」::g;
+    $tmp =~ s:【::g;
+    $tmp =~ s:〓::g;
+    $tmp =~ s:〔::g;
+    $tmp =~ s:〕::g;
+    $tmp =~ s:〖::g;
+    $tmp =~ s:〗::g;
+    $tmp =~ s:ぃ::g;
+    if ($tmp =~ /[^.]{0,}\.+/) {$tmp =~ s:\.:点:g;}
+    if ($tmp =~ /[a-zA-Z]/) {$tmp=uc($tmp);}
+    print "$tmp ";
+  }
+  print "\n";
+}
diff --git a/egs/mandarin_bn_bc/s5/local/nnet3/run_ivector_common.sh b/egs/mandarin_bn_bc/s5/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..643167a4bae
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,163 @@
+#!/usr/bin/env bash
+
+set -e -o pipefail
+
+# This script is called from scripts like local/nnet3/run_tdnn.sh and
+# local/chain/run_tdnn.sh (and may eventually be called by more scripts).  It
+# contains the common feature preparation and iVector-related parts of the
+# script.  See those scripts for examples of usage.
+
+
+stage=0
+nj=80
+train_set=train_cleanup   # you might set this to e.g. train.
+affix="_cleanup"
+gmm=tri6b_cleanup # This specifies a GMM-dir from the features of the type you're training the system on;
+                         # it should contain alignments for 'train_set'.
+lang=data/lang_large_test
+num_threads_ubm=32
+num_processes=4
+nnet3_affix="_cleanup" # affix for exp/nnet3 directory to put iVector stuff
+ali_dir=exp/${gmm}_sp_ali
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+for f in data/${train_set}/feats.scp $gmm/final.mdl; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist"
+    exit 1
+  fi
+done
+
+if [ $stage -le 1 ]; then
+  mfccdir=mfcc_sp
+  #Although the nnet will be trained by high resolution data, we still have to
+  # perturb the normal data to get the alignment.  _sp stands for speed-perturbed
+  echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+  echo "$0: making MFCC features for low-resolution speed-perturbed data"
+  steps/make_mfcc_pitch_online.sh --nj $nj --cmd "$train_cmd" \
+    data/${train_set}_sp exp/make_sp/${train_set}_sp $mfccdir || exit 1
+  steps/compute_cmvn_stats.sh data/${train_set}_sp exp/make_sp/${train_set}_sp $mfccdir
+  echo "$0: fixing input data-dir to remove nonexistent features, in case some "
+  echo ".. speed-perturbed segments were too short."
+  utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+  if [ -f $ali_dir/ali.1.gz ]; then
+    echo "$0: alignments in $ali_dir appear to already exist.  Please either remove them "
+    echo " ... or use a later --stage option."
+    exit 1
+  fi
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+    data/${train_set}_sp $lang $gmm $ali_dir || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: creating high-resolution MFCC features"
+
+  # this shows how you can split across multiple file-systems.  we'll split the
+  # MFCC dir across multiple locations.  You might want to be careful here, if you
+  # have multiple copies of Kaldi checked out and run the same recipe, not to let
+  # them overwrite each other.
+  mfccdir=mfcc_hires_sp
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/mandarin-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in ${train_set}_sp dev eval; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires
+
+  for datadir in ${train_set}_sp_hires dev_hires eval_hires; do
+    steps/make_mfcc_pitch_online.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir} exp/make_hires_sp/$datadir $mfccdir || exit 1
+    steps/compute_cmvn_stats.sh data/${datadir} exp/make_hires_sp/$datadir $mfccdir
+    utils/fix_data_dir.sh data/${datadir}
+  
+    # make MFCC data dir without pitch to extract iVector
+    utils/data/limit_feature_dim.sh 0:39 data/${datadir} data/${datadir}_nopitch || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_nopitch exp/make_hires_sp/${datadir}_nopitch $mfccdir || exit 1;
+  done
+fi
+
+train_set=${train_set}_sp_hires_nopitch
+if [ $stage -le 3 ]; then
+  echo "Stage 3: train_set is $train_set"
+  echo "$0: computing a subset of data to train the diagonal UBM."
+  mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+  temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+  # train a diagonal UBM using a subset of about a quarter of the data
+  num_utts_total=$(wc -l <data/$train_set/utt2spk)
+  num_utts=$[$num_utts_total/4]
+  utils/data/subset_data_dir.sh data/$train_set\
+      $num_utts ${temp_data_root}/${train_set}_subset
+
+  echo "$0: computing a PCA transform from the hires data."
+  steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
+      --splice-opts "--left-context=3 --right-context=3" \
+      --max-utts 10000 --subsample 2 \
+       ${temp_data_root}/${train_set}_subset \
+       exp/nnet3${nnet3_affix}/pca_transform
+
+  echo "$0: training the diagonal UBM."
+  # Use 512 Gaussians in the UBM.
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj $nj \
+    --num-frames 700000 \
+    --num-threads $num_threads_ubm \
+    ${temp_data_root}/${train_set}_subset 512 \
+    exp/nnet3${nnet3_affix}/pca_transform exp/nnet3${nnet3_affix}/diag_ubm
+fi
+
+if [ $stage -le 4 ]; then
+  # Train the iVector extractor.  Use all of the speed-perturbed data since iVector extractors
+  # can be sensitive to the amount of data.  The script defaults to an iVector dimension of
+  # 100.
+  echo "$0: training the iVector extractor"
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 20 --num-processes $num_processes \
+    data/${train_set} exp/nnet3${nnet3_affix}/diag_ubm exp/nnet3${nnet3_affix}/extractor || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  # note, we don't encode the 'max2' in the name of the ivectordir even though
+  # that's the data we extract the ivectors from, as it's still going to be
+  # valid for the non-'max2' data; the utterance list is the same.
+  ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/ivectors/mandarin-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
+  fi
+  # We extract iVectors on the speed-perturbed training data .  With
+  # --utts-per-spk-max 2, the script pairs the utterances into twos, and treats
+  # each of these pairs as one speaker; this gives more diversity in iVectors..
+  # Note that these are extracted 'online' (they vary within the utterance).
+
+  # Having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (the iVector starts at zero at the beginning
+  # of each pseudo-speaker).
+  temp_data_root=${ivectordir}
+  utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
+    data/$train_set ${temp_data_root}/${train_set}_max2
+
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
+    ${temp_data_root}/${train_set}_max2 \
+    exp/nnet3${nnet3_affix}/extractor $ivectordir
+
+  # Also extract iVectors for the test data, but in this case we don't need the speed
+  # perturbation (sp).
+
+  for data in dev eval; do
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
+      data/${data}_hires_nopitch exp/nnet3${nnet3_affix}/extractor \
+      exp/nnet3${nnet3_affix}/ivectors_${data}_hires_nopitch
+  done
+fi
+
+exit 0;
diff --git a/egs/mandarin_bn_bc/s5/local/prune_lex.py b/egs/mandarin_bn_bc/s5/local/prune_lex.py
new file mode 100644
index 00000000000..0fceeac20ea
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/prune_lex.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+# Copyright 2019 Johns Hopkins University (Author: Jinyi Yang)
+
+# This script keeps the top three pronunciation probabilities for
+# the words with multiple pronunciations. It takes the lexiconp.txt file as input, and
+# write the output to stdout.
+
+import sys
+import io
+sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+max_prons = 3
+lex = {}
+
+with open(sys.argv[1], 'r', encoding='utf-8') as lexfile:
+  for line in lexfile:
+    tokens = line.strip().split()
+    word = tokens.pop(0)
+    #prob = tokens.pop(0)
+    if word in lex.keys(): # Found a word with multiple pronunciations
+      lex[word].append(tokens)
+    else:
+      lex[word] = [tokens]
+
+for key, values in lex.items():
+  if len(values) > max_prons:
+    values_sorted = sorted(values, key=lambda v:v[0], reverse=True)
+    values = values_sorted[:max_prons]
+  for v in values:
+    print(key, " ".join(v))
+
+
+
diff --git a/egs/mandarin_bn_bc/s5/local/rnnlm/run_tdnn_lstm_1a.sh b/egs/mandarin_bn_bc/s5/local/rnnlm/run_tdnn_lstm_1a.sh
new file mode 100644
index 00000000000..1b433890007
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/rnnlm/run_tdnn_lstm_1a.sh
@@ -0,0 +1,136 @@
+#!/usr/bin/env bash
+
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)
+#           2018  Ke Li
+
+
+# Begin configuration section.
+
+dir=exp/rnnlm_lstm_1a
+embedding_dim=1024
+lstm_rpd=256
+lstm_nrpd=256
+stage=-10
+train_stage=-10
+epochs=4
+
+# variables for lattice rescoring
+run_lat_rescore=true
+run_nbest_rescore=true
+run_backward_rnnlm=false
+ac_model_dir=exp/chain_cleanup/tdnn_1d_sp
+decode_dir_suffix=rnnlm_1a
+ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
+              # if it's set, it merges histories in the lattice if they share
+              # the same ngram history and this prevents the lattice from 
+              # exploding exponentially
+pruned_rescore=true
+. path.sh
+. ./cmd.sh
+. ./utils/parse_options.sh
+
+text=data/local/lm_large_4gram/train_text.gz
+lexicon=data/lang_large_test/words.txt
+text_dir=data/rnnlm/text
+mkdir -p $dir/config
+set -e
+for f in $lexicon; do
+  [ ! -f $f ] && \
+    echo "$0: expected file $f to exist; search for run.sh in run.sh" && exit 1
+done
+
+if [ $stage -le 0 ]; then
+  mkdir -p $text_dir
+  echo -n >$text_dir/dev.txt
+  # hold out one in every 2000 lines as dev data.
+  gunzip -c $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%2000 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/mandarin.txt
+fi
+
+if [ $stage -le 1 ]; then
+  cp $lexicon $dir/config/
+  n=`cat $dir/config/words.txt | wc -l`
+  echo "<brk> $n" >> $dir/config/words.txt
+
+  # words that are not present in words.txt but are in the training or dev data, will be
+  # mapped to <SPOKEN_NOISE> during training.
+  echo "<UNK>" >$dir/config/oov.txt
+
+  cat > $dir/config/data_weights.txt <<EOF
+mandarin   1   1.0
+EOF
+
+  rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
+                             --unk-word="<UNK>" \
+                             --data-weights-file=$dir/config/data_weights.txt \
+                             $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
+
+  # choose features
+  rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
+                           --top-word-features=5000 \
+                           --use-constant-feature=true \
+                           --special-words='<s>,</s>,<brk>,<UNK>,[VOCALIZED-NOISE],[NOISE],[LAUGHTER]' \
+                           $dir/config/words.txt > $dir/config/features.txt
+
+  cat >$dir/config/xconfig <<EOF
+input dim=$embedding_dim name=input
+relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1))
+fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
+relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3))
+fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
+relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3))
+output-layer name=output include-log-softmax=false dim=$embedding_dim
+EOF
+  rnnlm/validate_config_dir.sh $text_dir $dir/config
+fi
+
+if [ $stage -le 2 ]; then
+  # the --unigram-factor option is set larger than the default (100)
+  # in order to reduce the size of the sampling LM, because rnnlm-get-egs
+  # was taking up too much CPU (as much as 10 cores).
+  rnnlm/prepare_rnnlm_dir.sh --unigram-factor 400 \
+                            $text_dir $dir/config $dir
+fi
+
+if [ $stage -le 3 ]; then
+  rnnlm/train_rnnlm.sh --num-jobs-final 8 \
+                       --stage $train_stage \
+                       --num-epochs $epochs \
+                       --cmd "$train_cmd" $dir
+fi
+
+echo "RNNLM training finished"
+if [ $stage -le 4 ] && $run_lat_rescore; then
+  echo "$0: Perform lattice-rescoring on $ac_model_dir"
+#  LM=tgsmall # if using the original 3-gram G.fst as old lm
+  pruned=
+  if $pruned_rescore; then
+    pruned=_pruned
+  fi
+  LM="large_test"
+  for decode_set in dev eval; do
+    decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}
+    # Lattice rescoring
+    rnnlm/lmrescore$pruned.sh \
+        --cmd "$decode_cmd --mem 8G" \
+        --weight 0.45 --max-ngram-order $ngram_order \
+        data/lang_${LM} $dir \
+        data/${decode_set}_hires ${decode_dir} \
+        $ac_model_dir/decode_${decode_set}_${LM}_${decode_dir_suffix}_rescore
+  done
+fi
+
+if [ $stage -le 5 ] && $run_nbest_rescore; then
+  echo "$0: Perform nbest-rescoring on $ac_model_dir"
+  LM="large_test"
+  for decode_set in dev eval; do
+    decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}
+    # Nbest rescoring
+    rnnlm/lmrescore_nbest.sh \
+        --cmd "$decode_cmd --mem 8G" --N 20 \
+        0.4 data/lang_${LM} $dir \
+        data/${decode_set}_hires ${decode_dir} \
+        $ac_model_dir/decode_${decode_set}_${LM}_${decode_dir_suffix}_nbest_rescore
+  done
+fi
+
+exit 0
diff --git a/egs/mandarin_bn_bc/s5/local/score.sh b/egs/mandarin_bn_bc/s5/local/score.sh
new file mode 100755
index 00000000000..72df0b378fd
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/score.sh
@@ -0,0 +1,200 @@
+#!/usr/bin/env bash
+# Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
+# Apache 2.0
+
+# This script computes the CER (Character Error Rate) as opposed to the script
+# local/score_kaldi.sh (which computes WER i.e. Word Error Rate).
+# if you need to compute both the WER and CER, you can use the stage parameters
+# i.e. write your own local/score.sh that will contain
+# 
+# steps/scoring/score_kaldi_wer.sh "$@"
+# steps/scoring/score_kaldi_cer.sh --stage 2 "$@"
+#
+# NOTE it would work without the --stage 2, but this way its more effective
+# as the lattice decoding won't be run twice.
+
+
+[ -f ./path.sh ] && . ./path.sh
+
+# begin configuration section.
+cmd=run.pl
+decode_mbr=false
+stats=true
+beam=6
+stage=0
+word_ins_penalty=0.0,0.5,1.0
+min_lmwt=7
+max_lmwt=17
+iter=final
+#end configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
+  echo " Options:"
+  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+  echo "    --stage (0|1|2)                 # start scoring script from part-way through."
+  echo "    --decode_mbr (true/false)       # maximum bayes risk decoding (confusion network)."
+  echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
+  echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
+  exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $dir/lat.1.gz $data/text; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+ref_filtering_cmd="cat"
+[ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter"
+[ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter"
+hyp_filtering_cmd="cat"
+[ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter"
+[ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter"
+
+
+if $decode_mbr ; then
+  echo "$0: scoring with MBR, word insertion penalty=$word_ins_penalty"
+else
+  echo "$0: scoring with word insertion penalty=$word_ins_penalty"
+fi
+
+
+mkdir -p $dir/scoring_kaldi
+cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1;
+if [ $stage -le 0 ]; then
+
+  for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+    mkdir -p $dir/scoring_kaldi/penalty_$wip/log
+
+    if $decode_mbr ; then
+      $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \
+        acwt=\`perl -e \"print 1.0/LMWT\"\`\; \
+        lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+        lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
+        lattice-prune --beam=$beam ark:- ark:- \| \
+        lattice-mbr-decode  --word-symbol-table=$symtab \
+        ark:- ark,t:- \| \
+        utils/int2sym.pl -f 2- $symtab \| \
+        $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1;
+
+    else
+      $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \
+        lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+        lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
+        lattice-best-path --word-symbol-table=$symtab ark:- ark,t:- \| \
+        utils/int2sym.pl -f 2- $symtab \| \
+        $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1;
+    fi
+
+    $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \
+      cat $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \
+      compute-wer --text --mode=present \
+      ark:$dir/scoring_kaldi/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
+
+  done
+fi
+
+
+# the stage 2 is intentional, to allow nice coexistence with score_kaldi.sh
+# in cases user would be combining calls to these two scripts as shown in
+# the example at the top of the file. Otherwise we or he/she would have to
+# filter the script parameters instead of simple forwarding.
+if [ $stage -le 2 ] ; then
+  files=($dir/scoring_kaldi/test_filt.txt)
+  for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+    for lmwt in $(seq $min_lmwt $max_lmwt); do
+      files+=($dir/scoring_kaldi/penalty_${wip}/${lmwt}.txt)
+    done
+  done
+
+  for f in "${files[@]}" ; do
+    fout=${f%.txt}.chars.txt
+    if [ -x local/character_tokenizer ]; then
+      cat $f |  local/character_tokenizer > $fout
+    else
+      cat $f |  perl -CSDA -ane '
+        {
+          print $F[0];
+          foreach $s (@F[1..$#F]) {
+            if (($s =~ /\[.*\]/) || ($s =~ /\<.*\>/) || ($s =~ "!SIL")) {
+              print " $s";
+            } else {
+              @chars = split "", $s;
+              foreach $c (@chars) {
+                print " $c";
+              }
+            }
+          }
+          print "\n";
+        }' > $fout
+    fi
+  done
+
+  for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+    $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.cer.LMWT.log \
+      cat $dir/scoring_kaldi/penalty_$wip/LMWT.chars.txt \| \
+      compute-wer --text --mode=present \
+      ark:$dir/scoring_kaldi/test_filt.chars.txt  ark,p:- ">&" $dir/cer_LMWT_$wip || exit 1;
+  done
+fi
+
+if [ $stage -le 3 ] ; then
+  for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+    for lmwt in $(seq $min_lmwt $max_lmwt); do
+      # adding /dev/null to the command list below forces grep to output the filename
+      grep WER $dir/cer_${lmwt}_${wip} /dev/null
+    done
+  done | utils/best_wer.sh  >& $dir/scoring_kaldi/best_cer || exit 1
+
+  best_cer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_cer)
+  best_wip=$(echo $best_cer_file | awk -F_ '{print $NF}')
+  best_lmwt=$(echo $best_cer_file | awk -F_ '{N=NF-1; print $N}')
+
+  if [ -z "$best_lmwt" ]; then
+    echo "$0: we could not get the details of the best CER from the file $dir/cer_*.  Probably something went wrong."
+    exit 1;
+  fi
+
+  if $stats; then
+    mkdir -p $dir/scoring_kaldi/cer_details
+    echo $best_lmwt > $dir/scoring_kaldi/cer_details/lmwt # record best language model weight
+    echo $best_wip > $dir/scoring_kaldi/cer_details/wip # record best word insertion penalty
+
+    $cmd $dir/scoring_kaldi/log/stats1.cer.log \
+      cat $dir/scoring_kaldi/penalty_$best_wip/${best_lmwt}.chars.txt \| \
+      align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.chars.txt ark:- ark,t:- \|  \
+      utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/cer_details/per_utt \|\
+       utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/cer_details/per_spk || exit 1;
+
+    $cmd $dir/scoring_kaldi/log/stats2.cer.log \
+      cat $dir/scoring_kaldi/cer_details/per_utt \| \
+      utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \
+      sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/cer_details/ops || exit 1;
+
+    $cmd $dir/scoring_kaldi/log/cer_bootci.cer.log \
+      compute-wer-bootci --mode=present \
+        ark:$dir/scoring_kaldi/test_filt.chars.txt ark:$dir/scoring_kaldi/penalty_$best_wip/${best_lmwt}.chars.txt \
+        '>' $dir/scoring_kaldi/cer_details/cer_bootci || exit 1;
+
+  fi
+fi
+
+# If we got here, the scoring was successful.
+# As a  small aid to prevent confusion, we remove all wer_{?,??} files;
+# these originate from the previous version of the scoring files
+# i keep both statement here because it could lead to confusion about
+# the capabilities of the script (we don't do cer in the script)
+rm $dir/wer_{?,??} 2>/dev/null
+rm $dir/cer_{?,??} 2>/dev/null
+
+exit 0;
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_cleanup.sh b/egs/mandarin_bn_bc/s5/local/tdt_cleanup.sh
new file mode 100644
index 00000000000..6a2127fa20e
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_cleanup.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# This script removes non-speech, long musics or long silence from the original
+# speech recordings.
+
+nj=32
+stage=0
+cmd=run.pl
+. cmd.sh
+. path.sh
+. utils/parse_options.sh
+
+set -e -o pipefail
+if [ $# -ne 5 ]; then
+    echo "Usage: $0 <src-data-dir> <lang-dir> <mdl-dir> <dir> <cleaned-data>"
+    echo "E.g., $0 [options] data/train data/lang <gale_mandir_mdl_dir> exp/gale_mandarin data/train_clean"
+    exit 1;
+fi
+
+src_data_dir=$1
+lang_dir=$2
+mdldir=$3
+newdir=$4
+clean_data_dir=$5
+
+steps/cleanup/segment_long_utterances.sh --nj ${nj} --cmd "$train_cmd" --stage $stage \
+  --max-bad-proportion 0.6 $mdldir $lang_dir $src_data_dir \
+  $clean_data_dir $newdir || exit 1;
+
+echo "Clean up succeeded !"
+
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_mandarin_bad_utts b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_bad_utts
new file mode 100644
index 00000000000..aba1e21e2a9
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_bad_utts
@@ -0,0 +1,3 @@
+19981223_0900_1000_VOA_MAN
+19981230_0900_1000_VOA_MAN
+20001226_2000_2025_CTS_MAN
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_audio.sh b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_audio.sh
new file mode 100755
index 00000000000..1226f2ce918
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_audio.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
+# Copyright 2019 Johns Hopkins Univeersity (author: Jinyi Yang)
+# Apache 2.0
+
+
+echo $0 "$@"
+
+tdtData=$(utils/make_absolute.sh "${@: -1}" );
+wavedir=$tdtData/wav
+mkdir -p $wavedir
+
+
+length=$(($#-1))
+args=${@:1:$length}
+
+# Check if sph2pipe is installed
+sph2pipe=`which sph2pipe` || sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
+[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
+set -e -o pipefail
+
+for var in $args; do
+  CD=$(basename $var)
+  [ -d $wavedir/$CD ] && rm -rf $wavedir/$CD
+  mkdir -p $wavedir/$CD
+  find $var -type f -name *.sph | grep "MAN" | while read file; do
+    f=$(basename $file)
+    if [[ ! -L "$wavedir/$CD/$f" ]]; then
+      ln -sf $file $wavedir/$CD/$f
+    fi
+  done
+done
+
+#figure out the proper sph2pipe command line
+(
+  for w in `find $wavedir -name *.sph` ; do
+    base=`basename $w .sph`
+    fullpath=`utils/make_absolute.sh $w`
+    echo "$base $sph2pipe -f wav -p -c 1 $fullpath |"
+  done
+) | sort -u > $tdtData/wav.scp
+
+#clean
+rm -fr $tdtData/id$$ $tdtData/wav$$
+echo "$0: data prep audio succeded"
+
+exit 0
+
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_filter.sh b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_filter.sh
new file mode 100644
index 00000000000..cb1f4feca7a
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_filter.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 (author: Jinyi Yang)
+# Apache 2.0
+
+# This scripts remove bad utterances from the tdt corpus.
+. path.sh
+. ./utils/parse_options.sh
+if [ $# != 2 ]; then
+   echo "Usage: $0 [options] <tmp_data_dir> <tgt_data_dir>";
+   echo "e.g.: $0 TDT2 data/local/tdt2"
+   exit 1;
+fi
+
+set -e -o pipefail
+
+tdtdir=$1
+tgtdir=$2
+mkdir -p $tgtdir
+
+
+for f in "text" "utt2spk" "segments" "uttid"; do
+	cat $tdtdir/txt/$f | grep -v -F -f local/tdt_mandarin_bad_utts > $tgtdir/$f
+done
+
+awk 'NR==FNR{a[$2];next} $1 in a{print $0}' $tgtdir/segments $tdtdir/wav.scp | \
+grep -v -F -f local/tdt_mandarin_bad_utts > $tgtdir/wav.scp
+
+utils/utt2spk_to_spk2utt.pl $tgtdir/utt2spk | sort -u > $tgtdir/spk2utt
+
+echo "TDT data prepare succeeded !"
+
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_txt.sh b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_txt.sh
new file mode 100755
index 00000000000..4fd04acaaaa
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_txt.sh
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (author: Jinyi Yang)
+# Apache 2.0
+
+. ./path.sh || exit 1;
+
+echo $0 "$@"
+export LC_ALL=C
+
+tdtData=$(utils/make_absolute.sh "${@: -1}" );
+
+length=$(($#-1))
+args=${@:1:$length}
+
+top_pwd=`pwd`
+txtdir=$tdtData/txt
+sph_scp=$tdtData/wav.scp
+mkdir -p $txtdir
+
+cd $txtdir
+
+for cdx in ${args[@]}; do
+  echo "Preparing $cdx"
+  if [[ $cdx  == *.tgz ]] ; then
+     tar -zxf $cdx
+  elif [  -d "$cdx" ]; then
+    tgt=$(basename $cdx)
+    zfile=`find $cdx -type f -name *.tgz`
+    if [ ! -z $zfile ]; then
+      test -x $tgt || mkdir $tgt
+      cd $tgt
+      tar -zxf $zfile
+      cd $txtdir
+    else
+      test -x $tgt || ln -s $cdx `basename $tgt`
+    fi
+  else
+    echo "I don't really know what I shall do with $cdx " >&2
+  fi
+done
+
+# There are more transcriptions that audio files. We only use that
+# transcriptions which have corresponding audio files.
+find -L $txtdir -type f -name *.src_sgm | grep "MAN" | \
+  awk 'NR==FNR {a[$1];next}; {name=$0;gsub(".src_sgm$", "", name); gsub(".*/", "", name); \
+    if (name in a) print $0}' $sph_scp - | sort > $txtdir/trans.flist  || exit 1;
+
+perl $top_pwd/local/tdt_mandarin_parse_sgm.pl $txtdir/trans.flist > $txtdir/text.tmp || exit 1;
+cd $top_pwd
+
+cut -d " " -f1 $txtdir/text.tmp > $txtdir/uttid
+cut -d " " -f2- $txtdir/text.tmp > $txtdir/trans
+
+pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'`
+export PYTHONPATH=$PYTHONPATH:`pwd`/tools/mmseg-1.3.0/lib/python${pyver}/site-packages
+if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+  echo "--- Downloading mmseg-1.3.0 ..."
+  echo "NOTE: it assumes that you have Python, Setuptools installed on your system!"
+  wget -P tools http://pypi.python.org/packages/source/m/mmseg/mmseg-1.3.0.tar.gz
+  tar xf tools/mmseg-1.3.0.tar.gz -C tools
+  cd tools/mmseg-1.3.0
+  mkdir -p lib/python${pyver}/site-packages
+  CC=gcc CXX=g++ python setup.py build
+  python setup.py install --prefix=.
+  cd ../..
+  if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+    echo "mmseg is not found - installation failed?"
+    exit 1
+  fi
+fi
+# Create text, use mmseg for splitting Mandarin characters into words.
+cat $txtdir/trans |\
+   sed -e 's/,//g' | \
+   sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\
+   perl local/mandarin_text_normalize.pl |\
+   python local/mandarin_segment.py |\
+   sed -e 's/THISISSPKTURN/<TURN>/g' |\
+   paste $txtdir/uttid - |\
+   awk '{if (NF>2 || (NF==2 && $2 != "<TURN>")) print $0}' > $txtdir/text_with_spk_turn
+
+# The text_with_spk_turn file contains label "<TURN>" to indicate speaker
+# switching, in case the speaker diarization process is required. We do not use
+# speaker diarization at this moment, so the spk id will be the segment
+# (utterance)
+
+cat $txtdir/text_with_spk_turn | sed 's/<TURN>//g' > $txtdir/text
+awk '{print $1" "$1}' $txtdir/text_with_spk_turn > $txtdir/utt2spk
+cp $txtdir/utt2spk $txtdir/spk2utt
+
+awk '{segments=$1; split(segments, S, "_"); uttid=S[1];for (i=2;i<=5;++i) uttid=uttid"_"S[i]; print segments " " uttid " " S[7]/100 " " S[8]/100}' < $txtdir/text > $txtdir/segments
+
+awk '{print $1}' $txtdir/text > $txtdir/uttid
+
+echo "TDT Mandarin text preparation succeed !"
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_mandarin_parse_sgm.pl b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_parse_sgm.pl
new file mode 100755
index 00000000000..3889aa02616
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_parse_sgm.pl
@@ -0,0 +1,170 @@
+#!/usr/bin/env perl
+
+#===============================================================================
+# Copyright (c) 2019  Johns Hopkins University (Author: Jinyi Yang)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+use strict;
+use warnings;
+use utf8;
+use Encode;
+use Time::Piece;
+
+require HTML::Parser or die "This script needs HTML::Parser from CPAN";
+HTML::Parser->import();
+
+binmode(STDOUT, ":utf8");
+
+sub  trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s };
+
+sub get_doc_no {
+  my $tag = shift(@_);
+  my @tmpdoc = split /\s+/, $tag;
+  my @doc_nos = split /\./, $tmpdoc[1];
+  return @doc_nos;
+}
+
+sub check_doc_type {
+  my $tag = shift(@_);
+  if ( $tag =~ /UNTRANSCRIBED/){
+    return 0;
+    ;
+  } else {
+      return 1;
+  }
+}
+
+sub str2time {
+  my ($str) = @_;
+  $str =~ s/(\.[0-9]+)?\z//;
+  my $fraction = $1 || 0;
+  return Time::Piece->strptime($str, '%H:%M:%S')->epoch + $fraction;
+}
+
+sub get_time_tag{
+    my $start = shift(@_);
+    my $end = shift(@_);
+    if (($start ne "") && ($end ne "")) {
+      $start = sprintf("%.2f", $start);
+      $end = sprintf("%.2f", $end);
+      my $tag = sprintf("%06.0f_%06.0f", 100*$start+0.5, 100*$end+0.5);
+     return $tag;
+     ;
+    } else{
+       print STDERR "$0: Empty time tag: $start or $end\n";
+       return "";
+    }
+}
+
+if (@ARGV != 1) {
+  print STDERR "$0: This script needs exactly one parameter (list of SGML files)\n";
+  print STDERR "  Usage: $0 <transripts>\n";
+  print STDERR "  where\n";
+  print STDERR "    <transcripts> is a file containing the official SGML format\n";
+  print STDERR "      transcripts. The files are parsed and the parsed representation\n";
+  print STDERR "      is dumped to STDOUT (one utterance + the additional data fields\n";
+  print STDERR "      per line (we dump all the fields, but not all fields are used\n";
+  print STDERR "      in the recipe).\n";
+  die;
+}
+my $filelist=$ARGV[0];
+
+my $p = HTML::Parser->new();
+
+my @files=();
+open(F, '<', $filelist) or die "Could not open file $filelist: $?\n";
+while(<F>) {
+  chomp;
+  push @files, $_;
+}
+
+foreach my $file (@files) {
+  my $filename = "";
+  my $docname = "";
+  my $doctype = "";
+  my @docno = ();
+  my $doc_id = "";
+  my @text = ();
+  my $start_time = 0;
+  my $end_time = 0;
+  my $doc_start_time = 0;
+  my $current_time = 0;
+  my @times = ();
+
+  my $sgml_file = `basename $file`;
+  $sgml_file = trim $sgml_file;
+  $sgml_file =~ s/\.src_sgm$//g;
+  my @sgml_file_ids = split '_', $sgml_file;
+  my $sgml_file_id = $sgml_file_ids[3].$sgml_file_ids[0].$sgml_file_ids[1];
+
+  open(my $f, '<:encoding(iso-8859-1)', $file) or die "Could not open file $file: $?\n";
+  while(my $line = <$f>) {
+    $line = trim $line;
+    next unless $line;
+
+    if ($line =~ /<DOCNO>/) {
+      @docno = get_doc_no $line;
+      $doc_id = $docno[0].$docno[1];
+      $doc_id = $docno[2]; # Four digits
+      ;
+    } elsif($line =~ /<DATE_TIME>/ ){
+        @times = split /\s+/, $line;
+        $current_time = str2time($times[2]);
+        if ($doc_start_time ==  0){
+          $doc_start_time = $current_time;
+          $start_time = 0;
+          ;
+        } else {
+          $start_time = $current_time - $doc_start_time;
+        }
+      ;
+    } elsif ($line =~ /<TURN>/){
+        $line = "THISISSPKTURN"; # Replace <TURN> with a word, indicating speaker change, will be removed from text before LM training
+        push @text, $line;
+      ;
+    } elsif($line =~ /<END_TIME>/){
+        @times = split /\s+/, $line;
+        $end_time = str2time($times[2]) - $doc_start_time;
+      ;
+    } elsif ($line =~ /<DOCTYPE>/) {
+      $doctype = check_doc_type $line;
+      ;
+    } elsif ($line eq "<\/DOC>") {
+      if ((@text > 0) && ($doctype)) {
+        if ($end_time <= $start_time){
+          print STDERR "$0: WARNING: File $file has invalid time tag at <DOCNO> $doc_id\n";
+        }
+        my $time_tag = get_time_tag($start_time, $end_time);
+        $docname = $sgml_file."_".$doc_id."_".$time_tag;
+        print "$docname ";
+        print join(" ", @text) . "\n";
+      }
+      $docname = "";
+      @text = ();
+      ;
+    } elsif ($line !~ "<") {
+      $line = trim $line;
+      $line = decode("gbk", $line);
+      $line =~ s:〈turn〉:THISISSPKTURN:g;
+      $line =~ s:<turn>:THISISSPKTURN:g;
+      $line =~ s:turn>:THISISSPKTURN:g;
+      $line =~ s:<turn:THISISSPKTURN:g;
+      push @text, $line if $line;
+      ;
+    }
+  }
+  close($f);
+}
diff --git a/egs/mandarin_bn_bc/s5/local/train_lms.sh b/egs/mandarin_bn_bc/s5/local/train_lms.sh
new file mode 100755
index 00000000000..308a7421d75
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/train_lms.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+
+# To be run from one directory above this script.
+ngram_order=4
+oov_sym="<UNK>"
+prune_thres=1e-9
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+  echo "Usage: [--ngram-order] [--prune-thres] <lm-src-dir> <dict-dir> <lm-dir> <heldout>"
+  echo "E.g. $0 --ngram-order 4 --prune-thres 1e-9 data/local/train data/local/dict
+  data/local/lm_no_extra datal/local/dev/text"
+  exit 1
+fi
+
+text=$1/text
+dict_dir=$2
+dir=$3
+dev_text=$4
+
+
+[ ! -d $dir ] && mkdir -p $dir && exit 1;
+[ ! -f $text ] && echo "$0: No such file $text" && exit 1;
+
+lexicon=$dict_dir/lexicon.txt
+[ ! -f $lexicon ] && echo "$0: No such file $lexicon" && exit 1;
+
+
+cleantext=$dir/text.no_oov
+
+cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
+   {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<UNK> ",$n);} } printf("\n");}' \
+   > $cleantext || exit 1;
+
+
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | sort | uniq -c | \
+    sort -nr > $dir/word.counts || exit 1;
+
+
+# Get counts from acoustic training transcripts, and add  one-count
+# for each word in the lexicon (but not silence, we don't want it
+# in the LM-- we'll add it optionally later).
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | \
+   cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
+    sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
+
+cat $dir/unigram.counts  | awk '{print $2}' | get_word_map.pl "<s>" "</s>" "<UNK>" > $dir/word_map \
+    || exit 1;
+
+cat $dir/word_map | awk '{print $1}' | cat - <(echo "<s>"; echo "</s>" ) \
+	> $dir/wordlist
+
+ngram-count -text $dir/text.no_oov -order $ngram_order -limit-vocab -vocab $dir/wordlist -unk \
+   -map-unk "<UNK>" -kndiscount -interpolate -prune $prune_thres -lm $dir/srilm.o${ngram_order}g.kn.gz
+
+cut -d " " -f2- $dev_text > $dir/heldout
+ngram -lm $dir/srilm.o${ngram_order}g.kn.gz -ppl $dir/heldout > $dir/ppl
+# note: output is
+# $dir/${ngram_order}gram-mincount/lm_unpruned.gz
+echo train lm succeeded
diff --git a/egs/mandarin_bn_bc/s5/path.sh b/egs/mandarin_bn_bc/s5/path.sh
new file mode 100644
index 00000000000..e875e4b585c
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=$(pwd)/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/kaldi_lm:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+. $KALDI_ROOT/tools/env.sh
+export LC_ALL=C
diff --git a/egs/mandarin_bn_bc/s5/rnnlm b/egs/mandarin_bn_bc/s5/rnnlm
new file mode 120000
index 00000000000..e136939ba72
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/rnnlm
@@ -0,0 +1 @@
+../../../scripts/rnnlm/
\ No newline at end of file
diff --git a/egs/mandarin_bn_bc/s5/run.sh b/egs/mandarin_bn_bc/s5/run.sh
new file mode 100644
index 00000000000..da183636b7c
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/run.sh
@@ -0,0 +1,278 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (author: Jinyi Yang)
+# Apache 2.0
+
+train_nj=80
+decode_nj=60
+stage=-1
+
+[ -f ./path.sh ] && . ./path.sh
+[ -f ./cmd.sh ] && . ./cmd.sh
+. parse_options.sh
+
+GALE_AUDIO=(
+  /export/corpora/LDC/LDC2013S08/
+  /export/corpora/LDC/LDC2013S04/
+  /export/corpora/LDC/LDC2014S09/
+  /export/corpora/LDC/LDC2015S06/
+  /export/corpora/LDC/LDC2015S13/
+  /export/corpora/LDC/LDC2016S03/
+  /export/corpora/LDC/LDC2017S25/
+)
+GALE_TEXT=(
+  /export/corpora/LDC/LDC2013T20/
+  /export/corpora/LDC/LDC2013T08/
+  /export/corpora/LDC/LDC2014T28/
+  /export/corpora/LDC/LDC2015T09/
+  /export/corpora/LDC/LDC2015T25/
+  /export/corpora/LDC/LDC2016T12/
+  /export/corpora/LDC/LDC2017T18/
+)
+
+TDT_AUDIO=(
+  /export/corpora/LDC/LDC2001S93/
+  /export/corpora/LDC/LDC2001S95/
+  /export/corpora/LDC/LDC2005S11/
+)
+TDT_TEXT=(
+  /export/corpora/LDC/LDC2001T57/
+  /export/corpora/LDC/LDC2001T58/
+  /export/corpora5/LDC/LDC2005T16/
+)
+
+GIGA_TEXT=/export/corpora/LDC/LDC2003T09/gigaword_man/xin/
+
+galeData=GALE/
+tdtData=TDT/
+gigaData=GIGA/
+
+set -e -o pipefail
+set -x
+
+########################### Data preparation ###########################
+if [ $stage -le 0 ]; then
+  echo "`date -u`: Prepare data for GALE"
+  local/gale_data_prep_audio.sh "${GALE_AUDIO[@]}" $galeData
+  local/gale_data_prep_txt.sh  "${GALE_TEXT[@]}" $galeData
+  local/gale_data_prep_split.sh $galeData data/local/gale
+
+  echo "`date -u`: Prepare data for TDT"
+  local/tdt_mandarin_data_prep_audio.sh "${TDT_AUDIO[@]}" $tdtData
+  local/tdt_mandarin_data_prep_txt.sh  "${TDT_TEXT[@]}" $tdtData
+  local/tdt_mandarin_data_prep_filter.sh $tdtData data/local/tdt_mandarin
+
+  ## Merge transcripts from GALE and TDT for lexicon and LM training
+  mkdir -p data/local/gale_tdt_train
+  cat data/local/gale/train/text data/local/tdt_mandarin/text > data/local/gale_tdt_train/text
+fi
+
+########################### Lexicon preparation ########################
+if [ $stage -le 1 ]; then
+  echo "`date -u`: Prepare dictionary for GALE and TDT"
+  local/mandarin_prepare_dict.sh data/local/dict_gale_tdt data/local/gale_tdt_train
+  local/check_oov_rate.sh data/local/dict_gale_tdt/lexicon.txt \
+    data/local/gale_tdt_train/text > data/local/gale_tdt_train/oov.rate
+  grep "rate" data/local/gale_tdt_train/oov.rate |\
+    awk '$10>0{print "Warning: OOV rate is "$10 ", make sure it is a small number"}'
+  utils/prepare_lang.sh data/local/dict_gale_tdt "<UNK>" data/local/lang_gale_tdt data/lang_gale_tdt
+fi
+
+########################### LM preparation for GALE ####################
+if [ $stage -le 2 ]; then
+  echo "`date -u`: Creating LM for GALE"
+  local/mandarin_prepare_lm.sh --no-uttid "false" --ngram-order 4 --oov-sym "<UNK>" --prune_thres "1e-9" \
+    data/local/dict_gale_tdt data/local/gale/train data/local/gale/train/lm_4gram data/local/gale/dev
+  local/mandarin_format_lms.sh data/local/gale/train/lm_4gram/srilm.o4g.kn.gz \
+    data/lang_gale_tdt data/lang_gale_test
+fi
+
+############# Using GALE data to train cleaning up model for TDT #######
+datadir=data/gale
+mfccdir=mfcc/gale
+expdir=exp/gale
+if [ $stage -le 3 ]; then
+  # spread the mfccs over various machines, as this data-set is quite large.
+  if [[  $(hostname -f) ==  *.clsp.jhu.edu ]]; then
+    mfcc=$(basename $mfccdir) # in case was absolute pathname (unlikely), get basename.
+    utils/create_split_dir.pl /export/b{05,06,07,08}/$USER/kaldi-data/egs/gale_asr/s5/$mfcc/storage \
+      $mfccdir/storage
+  fi
+  echo "`date -u`: Extracting GALE MFCC features"
+  for x in train dev eval; do
+    steps/make_mfcc_pitch.sh --cmd "$train_cmd" --nj $train_nj \
+      $datadir/$x exp/make_mfcc/gale/$x $mfccdir
+    utils/fix_data_dir.sh $datadir/$x # some files fail to get mfcc for many reasons
+    steps/compute_cmvn_stats.sh $datadir/$x exp/make_mfcc/gale/$x $mfccdir
+  done
+# Let's create small subsets to make quick flat-start training:
+# train_100k contains about 150 hours of data.
+	utils/subset_data_dir.sh $datadir/train 100000 $datadir/train_100k || exit 1;
+	utils/subset_data_dir.sh --shortest $datadir/train_100k 2000 $datadir/train_2k_short || exit 1;
+	utils/subset_data_dir.sh $datadir/train_100k 5000 $datadir/train_5k || exit 1;
+	utils/subset_data_dir.sh $datadir/train_100k 10000 $datadir/train_10k || exit 1;
+fi
+
+########################### Monophone training #########################
+if [ $stage -le 4 ]; then
+  echo "`date -u`: Monophone trainign with GALE data"
+	steps/train_mono.sh --boost-silence 1.25 --nj $train_nj --cmd "$train_cmd" \
+  $datadir/train_2k_short data/lang_gale_tdt $expdir/mono || exit 1;
+fi
+
+########################### Tri1 training ##############################
+if [ $stage -le 5 ]; then
+  steps/align_si.sh --boost-silence 1.25 --nj $train_nj --cmd "$train_cmd" \
+    $datadir/train_5k data/lang_gale_tdt $expdir/mono $expdir/mono_ali_5k || exit 1;
+  echo "`date -u`: Tri1 trainign with GALE data"
+	# train tri1 [first triphone pass]
+	steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
+  	2000 10000 $datadir/train_5k data/lang_gale_tdt $expdir/mono_ali_5k $expdir/tri1 || exit 1;
+	utils/mkgraph.sh data/lang_gale_test $expdir/tri1 $expdir/tri1/graph_gale_test || exit 1;
+	steps/decode.sh  --nj $decode_nj --cmd "$decode_cmd" \
+  	$expdir/tri1/graph_gale_test $datadir/dev $expdir/tri1/decode_gale_dev
+fi
+
+########################### Tri2b training #############################
+if [ $stage -le 6 ]; then
+	steps/align_si.sh --nj $train_nj --cmd "$train_cmd" \
+  	$datadir/train_10k data/lang_gale_tdt $expdir/tri1 $expdir/tri1_ali_10k || exit 1;
+  echo "`date -u`: Tri2b trainign with GALE data"
+	steps/train_lda_mllt.sh --cmd "$train_cmd" \
+                          --splice-opts "--left-context=3 --right-context=3" 2500 15000 \
+                          $datadir/train_10k data/lang_gale_tdt $expdir/tri1_ali_10k $expdir/tri2b
+	utils/mkgraph.sh data/lang_gale_test $expdir/tri2b $expdir/tri2b/graph_gale_test || exit 1;
+	steps/decode.sh  --nj $decode_nj --cmd "$decode_cmd" \
+  	$expdir/tri2b/graph_gale_test $datadir/dev $expdir/tri2b/decode_gale_dev
+fi
+
+########################### Tri3b training #############################
+if [ $stage -le 7 ]; then
+	steps/align_si.sh --nj $train_nj --cmd "$train_cmd" --use-graphs true \
+  	$datadir/train_10k data/lang_gale_tdt $expdir/tri2b $expdir/tri2b_ali_10k || exit 1;
+  echo "`date -u`: Tri3b trainign with GALE data"
+	steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
+                     $datadir/train_10k data/lang_gale_tdt $expdir/tri2b_ali_10k $expdir/tri3b
+	utils/mkgraph.sh data/lang_gale_test $expdir/tri3b $expdir/tri3b/graph_gale_test || exit 1;
+	steps/decodei_fmllr.sh  --nj $decode_nj --cmd "$decode_cmd" \
+  	$expdir/tri3b/graph_gale_test $datadir/dev $expdir/tri3b/decode_gale_dev
+fi
+
+########################### Tri4b training #############################
+if [ $stage -le 8 ]; then
+	steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" \
+    $datadir/train_100k data/lang_gale_tdt \
+    $expdir/tri3b $expdir/tri3b_ali_100k || exit 1;
+  echo "`date -u`: Tri4b trainign with GALE data"
+	steps/train_sat.sh  --cmd "$train_cmd" 4200 40000 \
+                      $datadir/train_100k data/lang_gale_tdt \
+                      $expdir/tri3b_ali_100k $expdir/tri4b
+	utils/mkgraph.sh data/lang_gale_test $expdir/tri4b $expdir/tri4b/graph_gale_test || exit 1;
+  steps/decode_fmllr.sh  --nj $decode_nj --cmd "$decode_cmd" \
+  	$expdir/tri4b/graph_gale_test $datadir/dev $expdir/tri4b/decode_gale_dev
+fi
+
+######################### Re-create lang directory######################
+# We want to add pronunciation probabilities to lexicon, using the  previously trained model.
+if [ $stage -le 9 ]; then
+	steps/get_prons.sh --cmd "$train_cmd" \
+                     $datadir/train_100k data/lang_gale_tdt $expdir/tri4b
+  utils/dict_dir_add_pronprobs.sh --max-normalize true \
+                                  data/local/dict_gale_tdt \
+                                  $expdir/tri4b/pron_counts_nowb.txt $expdir/tri4b/sil_counts_nowb.txt \
+                                  $expdir/tri4b/pron_bigram_counts_nowb.txt data/local/dict_gale_tdt_reestimated
+  utils/prepare_lang.sh data/local/dict_gale_tdt_reestimated \
+                        "<UNK>" data/local/lang_gale_tdt_reestimated data/lang_gale_tdt_reestimated
+  local/mandarin_format_lms.sh data/local/gale/train/lm_4gram/srilm.o4g.kn.gz \
+    data/lang_gale_tdt_reestimated data/lang_gale_tdt_reestimated_test
+fi
+
+######################### Train tri5b with all GALE data ###############
+if [ $stage -le 10 ]; then
+	steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" \
+    $datadir/train data/lang_gale_tdt_reestimated \
+    $expdir/tri4b $expdir/tri4b_ali_train || exit 1;
+
+	steps/train_sat.sh  --cmd "$train_cmd" 5000 100000 \
+    $datadir/train data/lang_gale_tdt_reestimated \
+		$expdir/tri4b_ali_train $expdir/tri5b || exit 1;
+fi
+
+if [ $stage -le 11 ]; then
+  echo "Clean up TDT data"
+  mkdir -p data/tdt || exit 1;
+  mfccdir=mfcc/tdt
+  cp -r data/local/tdt_mandarin/* data/tdt
+  steps/make_mfcc_pitch.sh --cmd "$train_cmd" --nj $train_nj \
+      data/tdt exp/make_mfcc/tdt $mfccdir
+  utils/fix_data_dir.sh data/tdt # some files fail to get mfcc for many reasons
+  steps/compute_cmvn_stats.sh data/tdt exp/make_mfcc/tdt $mfccdir
+  local/tdt_cleanup.sh --nj $train_nj data/tdt data/lang_gale_tdt_reestimated \
+    $expdir/tri5b $expdir/tri5b_tdt_cleanup data/tdt_cleanup
+  sed -i 's/<UNK>//g' data/tdt_cleanup/text
+  steps/compute_cmvn_stats.sh data/tdt_cleanup exp/make_mfcc/tdt_cleanup ${mfccdir}_cleanup
+fi
+
+datadir=data/train_gale_tdt_cleanup
+expdir=exp
+if [ $stage -le 12 ]; then
+  echo "Combine GALE and TDT cleaned"
+	utils/combine_data.sh \
+    $datadir data/gale/train data/tdt_cleanup
+
+	steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" \
+    $datadir data/lang_gale_tdt_reestimated \
+    exp/gale/tri5b exp/gale/tri5b_ali_gale_tdt_cleanup || exit 1;
+
+	steps/train_quick.sh --cmd "$train_cmd" \
+    7000 150000 $datadir data/lang_gale_tdt_reestimated \
+		exp/gale/tri5b_ali_gale_tdt_cleanup exp/tri6b_cleanup
+  utils/mkgraph.sh data/lang_gale_tdt_reestimated_test exp/tri6b_cleanup \
+    exp/tri6b_cleanup/graph_gale_tdt_reestimated_test || exit 1;
+  steps/decode_fmllr.sh  --nj $decode_nj --cmd "$decode_cmd" \
+    exp/tri6b_cleanup/graph_gale_tdt_reestimated_test data/gale/dev exp/tri6b_cleanup/decode_gale_dev
+fi
+
+if [ $stage -le 13 ]; then
+  echo "Expand the lexicon with Gigaword"
+  local/gigaword_prepare.sh $GIGA_TEXT $gigaData
+  local/mandarin_prepare_dict.sh data/local/dict_giga_man_simp data/local/giga_man_simp
+  utils/prepare_lang.sh data/local/dict_giga_man_simp "<UNK>" \
+    data/local/lang_giga_man_simp data/lang_giga_man_simp
+  # Merge the previous dictionary with GIGAWORD dictionary
+  local/mandarin_merge_dict.sh data/local/dict_gale_tdt_reestimated data/local/dict_giga_man_simp data/local/dict_large
+  # Prune the lexicon for multi-pronunciation words
+  python3 local/prune_lex.py data/local/dict_large/lexiconp.txt | \
+    sort > data/local/dict_large/lexiconp.tmp
+  mv data/local/dict_large/lexiconp.tmp data/local/dict_large/lexiconp.txt
+  utils/prepare_lang.sh data/local/dict_large "<UNK>" \
+    data/local/lang_large data/lang_large
+fi
+
+
+if [ $stage -le 14 ]; then
+  echo "Prepare LM with all data"
+  # Train LM with GALE + TDT
+  local/mandarin_prepare_lm.sh --no-uttid "false" --ngram-order 4 --oov-sym "<UNK>" --prune_thres "1e-9" \
+    data/local/dict_large data/local/gale_tdt_train data/local/gale_tdt_lm_4gram data/local/gale/dev
+
+  # Train LM with gigaword
+  local/mandarin_prepare_lm.sh --no-uttid "true" --ngram-order 4 --oov-sym "<UNK>" --prune_thres "1e-9" \
+    data/local/dict_large GIGA/ data/local/giga_lm_4gram data/local/gale/dev
+
+  # LM interpolation
+  local/mandarin_mix_lm.sh --ngram-order 4 --oov-sym "<UNK>" --prune-thres "1e-9" \
+    data/local/gale_tdt_lm_4gram data/local/giga_lm_4gram data/local/lm_large_4gram data/local/gale/dev
+  local/mandarin_format_lms.sh data/local/lm_large_4gram/srilm.o4g.kn.gz \
+    data/lang_large data/lang_large_test
+fi
+
+# From here, we train a tdnnf model. You should modify the related directories
+# in this script, and in local/nnet3/run_ivector_common.sh
+local/chain/run_tdnn.sh
+
+# We use all GALE+TDT+GIGAWORD text to train RNNLM
+cat local/gale_tdt_lm_4gram/text data/local/giga_lm_4gram/text | gzip > data/local/lm_large_4gram/train_text.gz
+# Train RNNLM. You should modify the related directories in this script.
+local/rnnlm/run_tdnn_lstm_1a.sh
+
diff --git a/egs/mandarin_bn_bc/s5/steps b/egs/mandarin_bn_bc/s5/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/mandarin_bn_bc/s5/utils b/egs/mandarin_bn_bc/s5/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/material/s5/local/chain/decode_test.sh b/egs/material/s5/local/chain/decode_test.sh
index 40115a04cf6..d35ee5fd994 100755
--- a/egs/material/s5/local/chain/decode_test.sh
+++ b/egs/material/s5/local/chain/decode_test.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018  Johns Hopkins University (author: Daniel Povey)
 #           2018  Mahsa Yarmohammadi
diff --git a/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh
index 4f38ee886a7..533ca2a2fc5 100755
--- a/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017-2018  Johns Hopkins University (author: Daniel Povey)
 #           2017-2018  Yiming Wang
diff --git a/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh
index 023cb34b43d..d81e23547ba 100755
--- a/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017-2018  Johns Hopkins University (author: Daniel Povey)
 #           2017-2018  Yiming Wang
diff --git a/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index af5a62dad0d..711bd593ae2 100755
--- a/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017-2018  Johns Hopkins University (author: Daniel Povey)
 #           2017-2018  Yiming Wang
diff --git a/egs/material/s5/local/g2p/apply_g2p.sh b/egs/material/s5/local/g2p/apply_g2p.sh
index 704a1a906bb..66cd52a91e1 100755
--- a/egs/material/s5/local/g2p/apply_g2p.sh
+++ b/egs/material/s5/local/g2p/apply_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Allen Guo
 #           2017  Xiaohui Zhang
diff --git a/egs/material/s5/local/g2p/train_g2p.sh b/egs/material/s5/local/g2p/train_g2p.sh
index 43e75f6608d..af6072baef7 100755
--- a/egs/material/s5/local/g2p/train_g2p.sh
+++ b/egs/material/s5/local/g2p/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Intellisist, Inc. (Author: Navneeth K)
 #           2017  Xiaohui Zhang
diff --git a/egs/material/s5/local/nnet3/run_ivector_common.sh b/egs/material/s5/local/nnet3/run_ivector_common.sh
index a56b3bf67d8..3471834bb75 100755
--- a/egs/material/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/material/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/material/s5/local/prepare_audio_data.sh b/egs/material/s5/local/prepare_audio_data.sh
index 2bf9283f435..ee65d0e47ea 100755
--- a/egs/material/s5/local/prepare_audio_data.sh
+++ b/egs/material/s5/local/prepare_audio_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/material/s5/local/prepare_dict.sh b/egs/material/s5/local/prepare_dict.sh
index 710f1a66e2e..123d9615244 100755
--- a/egs/material/s5/local/prepare_dict.sh
+++ b/egs/material/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/material/s5/local/prepare_text_data.sh b/egs/material/s5/local/prepare_text_data.sh
index 4200a55ed9d..52daa434f87 100755
--- a/egs/material/s5/local/prepare_text_data.sh
+++ b/egs/material/s5/local/prepare_text_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/material/s5/local/preprocess_external_text.sh b/egs/material/s5/local/preprocess_external_text.sh
index 4cbc457310e..83e6988a6f8 100755
--- a/egs/material/s5/local/preprocess_external_text.sh
+++ b/egs/material/s5/local/preprocess_external_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 set -e -o pipefail                                                              
diff --git a/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh b/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh
index 3f5c7e547b1..2c9786fcb0f 100755
--- a/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh
+++ b/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017-2018  Johns Hopkins University (author: Daniel Povey)
 #                2017  Hainan Xu
diff --git a/egs/material/s5/local/rnnlm/run_tdnn_lstm_2.sh b/egs/material/s5/local/rnnlm/run_tdnn_lstm_2.sh
index 13cf0bde44c..4e4314ca3e0 100755
--- a/egs/material/s5/local/rnnlm/run_tdnn_lstm_2.sh
+++ b/egs/material/s5/local/rnnlm/run_tdnn_lstm_2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017-2018  Johns Hopkins University (author: Daniel Povey)
 #                2017  Hainan Xu
diff --git a/egs/material/s5/local/score.sh b/egs/material/s5/local/score.sh
index c7da00fba32..a443efb17dc 100755
--- a/egs/material/s5/local/score.sh
+++ b/egs/material/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/material/s5/local/score_segments.sh b/egs/material/s5/local/score_segments.sh
index 064e15ae40d..4fb31037bdd 100755
--- a/egs/material/s5/local/score_segments.sh
+++ b/egs/material/s5/local/score_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e -o pipefail
 set -o nounset                              # Treat unset variables as an error
 
diff --git a/egs/material/s5/local/score_stm.sh b/egs/material/s5/local/score_stm.sh
index 7e1236ce92e..31f1f31a7d6 100755
--- a/egs/material/s5/local/score_stm.sh
+++ b/egs/material/s5/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (authors: Yenda Trmal)
 #           2018  Vimal Manohar
 
diff --git a/egs/material/s5/local/score_wer_segments.sh b/egs/material/s5/local/score_wer_segments.sh
index 555ec5056d9..dea325853c1 100755
--- a/egs/material/s5/local/score_wer_segments.sh
+++ b/egs/material/s5/local/score_wer_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 [ -f ./path.sh ] && . ./path.sh
diff --git a/egs/material/s5/local/semisup/chain/decode_test.sh b/egs/material/s5/local/semisup/chain/decode_test.sh
index 3d9a1eda1f5..019e15db01b 100755
--- a/egs/material/s5/local/semisup/chain/decode_test.sh
+++ b/egs/material/s5/local/semisup/chain/decode_test.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018  Johns Hopkins University (author: Daniel Povey)
 #           2018  Mahsa Yarmohammadi
diff --git a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
index 3d3056182ee..b171753ed3e 100755
--- a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
+++ b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017-2019  Johns Hopkins University (author: Daniel Povey)
 #                2017  Vimal Manohar
diff --git a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh
index 37c957a3227..3f5e3b7b97e 100755
--- a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh
+++ b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 #           2019  Yiming Wang
diff --git a/egs/material/s5/local/semisup/rnnlm/run_tdnn_lstm.sh b/egs/material/s5/local/semisup/rnnlm/run_tdnn_lstm.sh
index 8fb570ea153..a6cbc79b5da 100755
--- a/egs/material/s5/local/semisup/rnnlm/run_tdnn_lstm.sh
+++ b/egs/material/s5/local/semisup/rnnlm/run_tdnn_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017-2018  Johns Hopkins University (author: Daniel Povey)
 #                2017  Hainan Xu
diff --git a/egs/material/s5/local/semisup/run.sh b/egs/material/s5/local/semisup/run.sh
index 6b22cb1ad36..a0949267c76 100755
--- a/egs/material/s5/local/semisup/run.sh
+++ b/egs/material/s5/local/semisup/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 #           2019  Yiming Wang
diff --git a/egs/material/s5/local/train_lms_srilm.sh b/egs/material/s5/local/train_lms_srilm.sh
index 8160b060dc7..eee6a47eb4d 100755
--- a/egs/material/s5/local/train_lms_srilm.sh
+++ b/egs/material/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 export LC_ALL=C
 
 words_file=
diff --git a/egs/material/s5/run.sh b/egs/material/s5/run.sh
index 4ba518f53e0..c35103e3793 100755
--- a/egs/material/s5/run.sh
+++ b/egs/material/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017-2018  Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 #           2017-2018  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index 56aa815ffb9..c4e361e3b4f 100755
--- a/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Adapted from gale_arabic s5b.
 
diff --git a/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a_disc.sh b/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a_disc.sh
index 290c13e223d..3fbc03e774a 100644
--- a/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a_disc.sh
+++ b/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/mgb2_arabic/s5/local/check_tools.sh b/egs/mgb2_arabic/s5/local/check_tools.sh
new file mode 100755
index 00000000000..448a6536946
--- /dev/null
+++ b/egs/mgb2_arabic/s5/local/check_tools.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# check whether bs4 and lxml is installed
+if ! python3 -c "import bs4" 2>/dev/null; then
+  echo "$0: BeautifulSoup4 not installed, you can install it by 'pip install beautifulsoup4' if you prefer to use python to process xml file" 
+  exit 1;
+fi
+
+if ! python3 -c "import lxml" 2>/dev/null; then
+  echo "$0: lxml not installed, you can install it by 'pip install lxml' if you prefer to use python to process xml file"
+  exit 1;
+fi
+
+echo "both BeatufileSoup4 and lxml are installed in python"
+exit 0
diff --git a/egs/mgb2_arabic/s5/local/graphgeme_mgb_prep_dict.sh b/egs/mgb2_arabic/s5/local/graphgeme_mgb_prep_dict.sh
index 5a88220a19a..2f7c7a5d592 100755
--- a/egs/mgb2_arabic/s5/local/graphgeme_mgb_prep_dict.sh
+++ b/egs/mgb2_arabic/s5/local/graphgeme_mgb_prep_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # Copyright (C) 2016, Qatar Computing Research Institute, HBKU
diff --git a/egs/mgb2_arabic/s5/local/mgb_data_prep.sh b/egs/mgb2_arabic/s5/local/mgb_data_prep.sh
index 9d5b3611da8..681894a9e29 100755
--- a/egs/mgb2_arabic/s5/local/mgb_data_prep.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_data_prep.sh
@@ -1,11 +1,11 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (C) 2016, Qatar Computing Research Institute, HBKU
 #               2016-2019  Vimal Manohar
 #               2019 Dongji Gao
 
-if [ $# -ne 2 ]; then
-  echo "Usage: $0 <DB-dir> <mer-sel>"
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <DB-dir> <mer-sel> <process-xml>"
   exit 1;
 fi
 
@@ -23,12 +23,6 @@ for x in $train_dir $dev_dir; do
   fi
 done
 
-if [ -z $(which xml) ]; then
-  echo "$0: Could not find tool xml"
-  echo "$0: Download and install it from xmlstar.sourceforge.net"
-  exit 1
-fi
-
 find $db_dir/train/wav -type f -name "*.wav" | \
   awk -F/ '{print $NF}' | perl -pe 's/\.wav//g' > \
   $train_dir/wav_list
@@ -39,11 +33,33 @@ head -500 $train_dir/wav_list > $train_dir/wav_list.short
 set -e -o pipefail
 
 xmldir=$db_dir/train/xml/bw
-cat $train_dir/wav_list | while read basename; do
+if [ $process_xml == "python" ]; then
+  echo "using python to process xml file"
+  # check if bs4 and lxml are installin in python
+  local/check_tools.sh
+  # process xml file using python
+  cat $train_dir/wav_list | while read basename; do
     [ ! -e $xmldir/$basename.xml ] && echo "Missing $xmldir/$basename.xml" && exit 1
-    xml sel -t -m '//segments[@annotation_id="transcript_align"]' -m "segment" -n -v  "concat(@who,' ',@starttime,' ',@endtime,' ',@WMER,' ')" -m "element" -v "concat(text(),' ')" $xmldir/$basename.xml | local/add_to_datadir.py $basename $train_dir $mer
-    echo $basename $wavDir/$basename.wav >> $train_dir/wav.scp
-done 
+    local/process_xml.py $xmldir/$basename.xml - | local/add_to_datadir.py $basename $train_dir $mer
+  done
+elif [ $process_xml == 'xml' ]; then
+  # check if xml binary exsits
+  if command -v xml >/dev/null 2>/dev/null; then
+    echo "using xml"
+    cat $train_dir/wav_list | while read basename; do
+      [ ! -e $xmldir/$basename.xml ] && echo "Missing $xmldir/$basename.xml" && exit 1
+      xml sel -t -m '//segments[@annotation_id="transcript_align"]' -m "segment" -n -v  "concat(@who,' ',@starttime,' ',@endtime,' ',@WMER,' ')" -m "element" -v "concat(text(),' ')" $xmldir/$basename.xml | local/add_to_datadir.py $basename $train_dir $mer
+      echo $basename $wavDir/$basename.wav >> $train_dir/wav.scp
+    done
+  else
+    echo "xml not found, you may use python by '--process-xml python'"
+    exit 1;
+  fi
+else
+  # invalid option
+  echo "$0: invalid option for --process-xml, choose from 'xml' or 'python'"
+  exit 1;
+fi
 
 for x in text segments; do
   cp $db_dir/dev/${x}.all $dev_dir/${x}
diff --git a/egs/mgb2_arabic/s5/local/mgb_format_data.sh b/egs/mgb2_arabic/s5/local/mgb_format_data.sh
index 0fc24c15add..91dd114938a 100755
--- a/egs/mgb2_arabic/s5/local/mgb_format_data.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (C) 2016, Qatar Computing Research Institute, HBKU
 
diff --git a/egs/mgb2_arabic/s5/local/mgb_prep_full_data.sh b/egs/mgb2_arabic/s5/local/mgb_prep_full_data.sh
index 30e702c6841..40b464e7a37 100755
--- a/egs/mgb2_arabic/s5/local/mgb_prep_full_data.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_prep_full_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (C) 2016, Qatar Computing Research Institute, HBKU
 #               2016-2019  Vimal Manohar
diff --git a/egs/mgb2_arabic/s5/local/mgb_prep_original_data.sh b/egs/mgb2_arabic/s5/local/mgb_prep_original_data.sh
index 6edb5ac946d..f3c01c7a57e 100755
--- a/egs/mgb2_arabic/s5/local/mgb_prep_original_data.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_prep_original_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (C) 2016, Qatar Computing Research Institute, HBKU
 
diff --git a/egs/mgb2_arabic/s5/local/mgb_train_lms.sh b/egs/mgb2_arabic/s5/local/mgb_train_lms.sh
index e49055b478d..7473c92a6ce 100755
--- a/egs/mgb2_arabic/s5/local/mgb_train_lms.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (C) 2016, Qatar Computing Research Institute, HBKU
 # To be run from one directory above this script.
diff --git a/egs/mgb2_arabic/s5/local/mgb_train_lms_extra.sh b/egs/mgb2_arabic/s5/local/mgb_train_lms_extra.sh
index c29b6e83764..88b618671a4 100755
--- a/egs/mgb2_arabic/s5/local/mgb_train_lms_extra.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_train_lms_extra.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (C) 2016, Qatar Computing Research Institute, HBKU
 # To be run from one directory above this script.
diff --git a/egs/mgb2_arabic/s5/local/mgb_train_lms_extra_pocolm.sh b/egs/mgb2_arabic/s5/local/mgb_train_lms_extra_pocolm.sh
index b9f82012add..1631985764b 100755
--- a/egs/mgb2_arabic/s5/local/mgb_train_lms_extra_pocolm.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_train_lms_extra_pocolm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 #           2017  Vimal Manohar
diff --git a/egs/mgb2_arabic/s5/local/nnet3/run_ivector_common.sh b/egs/mgb2_arabic/s5/local/nnet3/run_ivector_common.sh
index ae2edc27a91..ecfcb780d7c 100755
--- a/egs/mgb2_arabic/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/mgb2_arabic/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/mgb2_arabic/s5/local/process_xml.py b/egs/mgb2_arabic/s5/local/process_xml.py
new file mode 100755
index 00000000000..3c6eed452ac
--- /dev/null
+++ b/egs/mgb2_arabic/s5/local/process_xml.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+from bs4 import BeautifulSoup
+import sys
+import argparse
+
+def get_args():
+    parser = argparse.ArgumentParser(description="""This script process xml file.""")
+    parser.add_argument("xml", type=str, help="""Input xml file""") 
+    parser.add_argument("output", type=str, help="""output text file""") 
+    args = parser.parse_args()
+    return args
+
+def process_xml(xml_handle, output_handle):
+    soup = BeautifulSoup(xml_handle, "xml")
+    for segment in soup.find_all("segment"):
+        who = segment["who"]
+        starttime = segment["starttime"]
+        endtime = segment["endtime"]
+        WMER = segment["WMER"]
+        text = " ".join([element.string for element in segment.find_all("element") if element.string != None])
+        output_handle.write("{} {} {} {} {}\n".format(who, starttime, endtime, WMER, text))
+    xml_handle.close()
+    output_handle.close()
+
+def main():
+    args = get_args()
+
+    xml_handle = open(args.xml, 'r')
+    output_handle = sys.stdout if args.output == '-' else open(args.output, 'w')
+
+    process_xml(xml_handle, output_handle)
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh b/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh
index 559d20046dd..ac5f0cb9009 100755
--- a/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh
+++ b/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/mgb2_arabic/s5/local/score.sh b/egs/mgb2_arabic/s5/local/score.sh
index 08b67050c01..4531c8f3675 100755
--- a/egs/mgb2_arabic/s5/local/score.sh
+++ b/egs/mgb2_arabic/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/mgb2_arabic/s5/local/score_combine.sh b/egs/mgb2_arabic/s5/local/score_combine.sh
index 576962c7442..1c7796e16ea 100755
--- a/egs/mgb2_arabic/s5/local/score_combine.sh
+++ b/egs/mgb2_arabic/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/mgb2_arabic/s5/local/score_mbr.sh b/egs/mgb2_arabic/s5/local/score_mbr.sh
index 4052512f726..a5ca96a67d6 100755
--- a/egs/mgb2_arabic/s5/local/score_mbr.sh
+++ b/egs/mgb2_arabic/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script for minimum bayes risk decoding.
 
diff --git a/egs/mgb2_arabic/s5/local/score_sclite.sh b/egs/mgb2_arabic/s5/local/score_sclite.sh
index 2c8be28a568..de41053a0c9 100755
--- a/egs/mgb2_arabic/s5/local/score_sclite.sh
+++ b/egs/mgb2_arabic/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/mgb2_arabic/s5/run.sh b/egs/mgb2_arabic/s5/run.sh
index 334aef1bf30..e4192c067b0 100755
--- a/egs/mgb2_arabic/s5/run.sh
+++ b/egs/mgb2_arabic/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (C) 2016, Qatar Computing Research Institute, HBKU
 #               2017-19 Vimal Manohar
@@ -6,6 +6,9 @@
 
 stage=-1
 
+# preference on how to process xml file [python, xml]
+process_xml="python"
+
 . ./cmd.sh
 if [ -f ./path.sh ]; then . ./path.sh; fi
 . utils/parse_options.sh
@@ -50,7 +53,7 @@ fi
 if [ $stage -le 1 ]; then
   #DATA PREPARATION
   echo "Preparing training data"
-  local/mgb_data_prep.sh DB $mer
+  local/mgb_data_prep.sh DB $mer $process_xml
 fi
 
 if [ $stage -le 2 ]; then
diff --git a/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh
index 6300511e817..1cc68f43a33 100644
--- a/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # Copyright 2017-2018  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/mgb5/s5/local/nnet3/run_ivector_common.sh b/egs/mgb5/s5/local/nnet3/run_ivector_common.sh
index b909ed04cde..ddec4419a61 100644
--- a/egs/mgb5/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/mgb5/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/mgb5/s5/local/prepare_data.sh b/egs/mgb5/s5/local/prepare_data.sh
index 36cb4d8fa3f..7ab937f7b45 100755
--- a/egs/mgb5/s5/local/prepare_data.sh
+++ b/egs/mgb5/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2019 QCRI (Author: Ahmed Ali)
 # Apache 2.0
 
diff --git a/egs/mgb5/s5/local/prepare_lm.sh b/egs/mgb5/s5/local/prepare_lm.sh
index 02fb59aba87..5b47360a730 100755
--- a/egs/mgb5/s5/local/prepare_lm.sh
+++ b/egs/mgb5/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2019  QCRI (Author: Ahmed Ali)
 # Apache 2.0
 
diff --git a/egs/mgb5/s5/local/score.sh b/egs/mgb5/s5/local/score.sh
index 9988c941441..3ddee8e4b12 100755
--- a/egs/mgb5/s5/local/score.sh
+++ b/egs/mgb5/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/mgb5/s5/local/train_lms_srilm.sh b/egs/mgb5/s5/local/train_lms_srilm.sh
index 6af13921511..ed3200eb103 100755
--- a/egs/mgb5/s5/local/train_lms_srilm.sh
+++ b/egs/mgb5/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #      2019 QCRI (Ahmed Ali)
 
diff --git a/egs/mgb5/s5/run.sh b/egs/mgb5/s5/run.sh
index 6fc21629f0f..27c4f751eae 100755
--- a/egs/mgb5/s5/run.sh
+++ b/egs/mgb5/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019 QCRI (Author:Ahmed Ali)
 # Apache 2.0
diff --git a/egs/mini_librispeech/s5/RESULTS b/egs/mini_librispeech/s5/RESULTS
index 0b747120416..94e95e97f99 100755
--- a/egs/mini_librispeech/s5/RESULTS
+++ b/egs/mini_librispeech/s5/RESULTS
@@ -20,3 +20,7 @@ exit 0
 
 %WER 18.58 [ 3742 / 20138, 366 ins, 763 del, 2613 sub ] exp/chain/tdnn1a_sp/decode_tgsmall_dev_clean_2/wer_10_0.0
 %WER 13.35 [ 2689 / 20138, 318 ins, 491 del, 1880 sub ] exp/chain/tdnn1a_sp/decode_tglarge_dev_clean_2/wer_9_0.5
+
+# Results with chain2 recipe. Results are w/o final model combination
+%WER 21.38 [ 4305 / 20138, 449 ins, 740 del, 3116 sub ] exp/chain2/tdnn1a_sp/decode_tgsmall_dev_clean_2//wer_10_0.0
+%WER 15.64 [ 3150 / 20138, 395 ins, 584 del, 2171 sub ] exp/chain2/tdnn1a_sp/decode_tglarge_dev_clean_2//wer_11_0.0
diff --git a/egs/mini_librispeech/s5/local/chain/compare_wer.sh b/egs/mini_librispeech/s5/local/chain/compare_wer.sh
index 8ee5db2326a..411d2691bb9 100755
--- a/egs/mini_librispeech/s5/local/chain/compare_wer.sh
+++ b/egs/mini_librispeech/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
index c8f2503b578..636de409f2c 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_cnn_tdnn_1a.sh is modified from run_tdnn_1h.sh, but adding CNN layers
 #  near the beginning.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1b.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
index 9be405a5e1a..6bcb4f2e9aa 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1b is as 1a but adding SpecAugment and removing dropout (which, in
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh
index da16297c9dd..20ee39095dd 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is a basic TDNN experiment.
 
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
index 3d0c2d63902..ab0c30f0da6 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This is as 1a but increasing epochs from 4 to 10 and adding the option
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
index 081af8fe2f8..4dbff118902 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1c is as 1b but replacing the renorm with batchnorm components
 # (i.e. NormalizeComponent with BatchNormComponent).
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c_discriminative.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c_discriminative.sh
index 1aa519ccb9d..f2c24443744 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c_discriminative.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -o pipefail
 set -e
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
index 04df38d4da3..b8944be91c3 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1d is as 1c but adding two non-splicing layers towards the beginning
 # of the network.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh
index cdf9bb584f4..13aaf0c12c1 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1e is as 1d but instead of the --proportional-shrink option, using
 #  the newly added xconfig-layer-specific 'l2-regularize' options.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh
index d1385ff2be5..8d2854247c2 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1f is as 1e but a smaller model with various tuning changes, the most
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh
index ad51780e191..646f0875e61 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1g is as 1f but adding dropout (well, something like dropout-- the mask
 #   is shared across time and it's continuous rather than zero-one), increasing
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh
index dbfe5c5a07a..07f6e25473a 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1g20 is as 1g but adding the option "--constrained false" to --egs.opts.
 #   This is the new 'unconstrained egs' code where it uses the e2e examples.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh
index cc4123e2755..5097007e56a 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1h is as 1g but a re-tuned model based on resnet-style TDNN-F layers with
 # bypass connections.  Below, 1h2 and 1h3 are just reruns of 1h with different
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1i.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1i.sh
index 502c225fa87..1577d7dc10a 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1i.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1i is as 1h but adding SpecAugment.
 
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1j.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1j.sh
index 7a6604f9773..824de7d7341 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1j.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1j is as 1i but replaces the LDA layer at the input of the
 # network with delta and delta-delta features.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1k.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1k.sh
index 652f0175558..b4a9ebe1418 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1k.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1k is like 1j, while it introduces 'apply-cmvn-online' that does
 # cmn normalization both for i-extractor and TDNN input.
diff --git a/egs/mini_librispeech/s5/local/chain2/data_prep_common.sh b/egs/mini_librispeech/s5/local/chain2/data_prep_common.sh
new file mode 100755
index 00000000000..21b36cce421
--- /dev/null
+++ b/egs/mini_librispeech/s5/local/chain2/data_prep_common.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# Copyright 2019 Daniel Povey
+#           2019 Srikanth Madikeri (Idiap Research Institute)
+
+set -euo pipefail
+
+# This script is called from local/chain/tuning/run_tdnn_2a.sh and
+# similar scripts.   It contains the common feature preparation and
+# lattice-alignment preparation parts of the chaina training.
+# See those scripts for examples of usage.
+
+stage=0
+train_set=train_clean_5
+test_sets="dev_clean_2"
+gmm=tri3b
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist"
+    exit 1
+  fi
+done
+
+# Our default data augmentation method is 3-way speed augmentation followed by
+# volume perturbation.  We are looking into better ways of doing this,
+# e.g. involving noise and reverberation.
+
+if [ $stage -le 1 ]; then
+  # Although the nnet will be trained by high resolution data, we still have to
+  # perturb the normal data to get the alignment.  _sp stands for speed-perturbed
+  echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+  echo "$0: making MFCC features for low-resolution speed-perturbed data"
+  steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/${train_set}_sp || exit 1;
+  steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1;
+  utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
+    data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+  # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+  # this shows how you can split across multiple file-systems.
+  echo "$0: creating high-resolution MFCC features"
+  mfccdir=data/${train_set}_sp_hires/data
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/fs0{1,2}/$USER/kaldi-data/mfcc/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1;
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+    utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+  done
+fi
+
+
+exit 0
diff --git a/egs/mini_librispeech/s5/local/chain2/run_tdnn.sh b/egs/mini_librispeech/s5/local/chain2/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/mini_librispeech/s5/local/chain2/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/mini_librispeech/s5/local/chain2/tuning/run_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain2/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..2311fc0699e
--- /dev/null
+++ b/egs/mini_librispeech/s5/local/chain2/tuning/run_tdnn_1a.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+
+# Copyright 2019 Srikanth Madikeri (Idiap Research Institute)
+# 
+# This script is a modification of local/chain/run_tdnn.sh adapted to the chain2 recipes.
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=10
+train_set=train_clean_5
+test_sets=dev_clean_2
+gmm=tri3b
+srand=0
+nnet3_affix=
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=2c   # affix for the TDNN directory name
+tree_affix=
+train_stage=-10
+get_egs_stage=-10
+
+
+# training chunk-options
+chunk_width=140
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+xent_regularize=0.1
+bottom_subsampling_factor=1  # I'll set this to 3 later, 1 is for compatibility with a broken ru.
+frame_subsampling_factor=3
+langs="default"  # list of language names
+
+# The amount of extra left/right context we put in the egs.  Note: this could
+# easily be zero, since we're not using a recurrent topology, but we put in a
+# little extra context so that we have more room to play with the configuration
+# without re-dumping egs.
+egs_extra_left_context=5
+egs_extra_right_context=5
+
+# The number of chunks (of length: see $chunk_width above) that we group
+# together for each "speaker" (actually: pseudo-speaker, since we may have
+# to group multiple speaker together in some cases).
+chunks_per_group=4
+
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+# if ! cuda-compiled; then
+#   cat <<EOF && exit 1
+# This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+# If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+# where "nvcc" is installed.
+# EOF
+# fi
+
+if [ $stage -le 9 ]; then
+    local/chain2/data_prep_common.sh  \
+             --train-set $train_set \
+             --gmm $gmm  || exit 1;
+fi
+
+# Problem: We have removed the "train_" prefix of our training set in
+# the alignment directory names! Bad!
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+tree_dir=exp/chaina/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang_chain
+lat_dir=exp/chaina/${gmm}_${train_set}_sp_lats
+dir=exp/chaina/tdnn${affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp \
+    $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 10 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 11 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 12 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  # This will be a two-level tree (with the smaller number of leaves specified
+  # by the '--num-clusters' option); this is needed by the adaptation framework
+  # search below for 'tree.map'
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+   steps/nnet3/chain/build_tree.sh \
+     --frame-subsampling-factor ${frame_subsampling_factor} \
+     --context-opts "--context-width=2 --central-position=1" \
+     --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+     $lang $ali_dir $tree_dir
+fi
+
+
+# $dir/configs will contain xconfig and config files for the initial
+# models.  It's a scratch space used by this script but not by
+# scripts called from here.
+mkdir -p $dir/configs/
+# $dir/init will contain the initial models
+mkdir -p $dir/init/
+
+learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+
+if [ $stage -le 14 ]; then
+
+  # Note: we'll use --bottom-subsampling-factor=3, so all time-strides for the
+  # top network should be interpreted at the 30ms frame subsampling rate.
+  num_leaves=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+
+  echo "$0: creating top model"
+  cat <<EOF > $dir/configs/default.xconfig
+  input name=input dim=40
+  # the first splicing is moved before the lda layer, so no splicing here
+  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2) affine-transform-file=$dir/configs/lda.mat
+  relu-renorm-layer name=tdnn1 dim=512 input=Append(-2,-1,0,1,2)
+  relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1)
+  relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1)
+  relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3)
+  relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0)
+  relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_leaves max-change=1.5
+  output-layer name=output-default input=prefinal-chain include-log-softmax=false dim=$num_leaves max-change=1.5
+  relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5
+  output-layer name=output-xent dim=$num_leaves learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-default-xent input=prefinal-xent dim=$num_leaves learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/default.xconfig --config-dir $dir/configs/
+  if [ $dir/init/default_trans.mdl ]; then # checking this because it may have been copied in a previous run of the same script
+      copy-transition-model $tree_dir/final.mdl $dir/init/default_trans.mdl  || exit 1 &
+  else
+      echo "Keeping the old $dir/init/default_trans.mdl as it already exists."
+  fi
+fi
+wait;
+
+init_info=$dir/init/info.txt
+if [ $stage -le 15 ]; then
+
+  if [ ! -f $dir/configs/ref.raw ]; then
+      echo "Expected $dir/configs/ref.raw to exist"
+      exit
+  fi
+
+  nnet3-info $dir/configs/ref.raw  > $dir/configs/temp.info 
+  model_left_context=`fgrep 'left-context' $dir/configs/temp.info | awk '{print $2}'`
+  model_right_context=`fgrep 'right-context' $dir/configs/temp.info | awk '{print $2}'`
+  cat >$init_info <<EOF
+frame_subsampling_factor $frame_subsampling_factor
+langs $langs
+model_left_context $model_left_context
+model_right_context $model_right_context
+EOF
+  rm $dir/configs/temp.info
+fi
+
+# Make phone LM and denominator and normalization FST
+if [ $stage -le 16 ]; then
+  echo "$0: Making Phone LM and denominator and normalization FST"
+  mkdir -p $dir/den_fsts/log
+
+  # We may later reorganize this.
+  cp $tree_dir/tree $dir/default.tree
+
+  echo "$0: creating phone language-model"
+  $train_cmd $dir/den_fsts/log/make_phone_lm_default.log \
+    chain-est-phone-lm --num-extra-lm-states=2000 \
+       "ark:gunzip -c $gmm_dir/ali.*.gz | ali-to-phones $gmm_dir/final.mdl ark:- ark:- |" \
+       $dir/den_fsts/default.phone_lm.fst
+
+  echo "$0: creating denominator FST"
+  $train_cmd $dir/den_fsts/log/make_den_fst.log \
+     chain-make-den-fst $dir/default.tree $dir/init/default_trans.mdl $dir/den_fsts/default.phone_lm.fst \
+     $dir/den_fsts/default.den.fst $dir/den_fsts/default.normalization.fst || exit 1;
+fi
+
+model_left_context=$(awk '/^model_left_context/ {print $2;}' $dir/init/info.txt)
+model_right_context=$(awk '/^model_right_context/ {print $2;}' $dir/init/info.txt)
+if [ -z $model_left_context ]; then
+    echo "ERROR: Cannot find entry for model_left_context in $dir/init/info.txt"
+fi
+if [ -z $model_right_context ]; then
+    echo "ERROR: Cannot find entry for model_right_context in $dir/init/info.txt"
+fi
+# Note: we add frame_subsampling_factor/2 so that we can support the frame
+# shifting that's done during training, so if frame-subsampling-factor=3, we
+# train on the same egs with the input shifted by -1,0,1 frames.  This is done
+# via the --frame-shift option to nnet3-chain-copy-egs in the script.
+egs_left_context=$[model_left_context+(frame_subsampling_factor/2)+egs_extra_left_context]
+egs_right_context=$[model_right_context+(frame_subsampling_factor/2)+egs_extra_right_context]
+
+for d in $dir/raw_egs $dir/processed_egs; do
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $d/storage ] ; then
+    mkdir -p $d
+    utils/create_split_dir.pl \
+      /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$d/storage $d/storage
+  fi
+done
+
+
+if [ $stage -le 17 ]; then
+  echo "$0: about to dump raw egs."
+  # Dump raw egs.
+  steps/chain2/get_raw_egs.sh --cmd "$train_cmd" \
+    --lang "default" \
+    --left-context $egs_left_context \
+    --right-context $egs_right_context \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --alignment-subsampling-factor $frame_subsampling_factor \
+    --frames-per-chunk 140,100,160 \
+    ${train_data_dir} ${dir} ${lat_dir} ${dir}/raw_egs
+fi
+
+if [ $stage -le 18 ]; then
+  echo "$0: about to process egs"
+  steps/chain2/process_egs.sh  --cmd "$train_cmd" \
+      --num-repeats 1 \
+    ${dir}/raw_egs ${dir}/processed_egs
+fi
+
+if [ $stage -le 19 ]; then
+  echo "$0: about to randomize egs"
+  steps/chain2/randomize_egs.sh --frames-per-job 3000000 \
+    ${dir}/processed_egs ${dir}/egs
+fi
+
+if [ $stage -le 20 ]; then
+    echo "$0: Training pre-conditioning matrix"
+    num_lda_jobs=`find ${dir}/egs/ -iname 'train.*.scp' | wc -l | cut -d ' ' -f2`
+    steps/chain2/compute_preconditioning_matrix.sh --cmd "$train_cmd" \
+        --nj $num_lda_jobs \
+        $dir/configs/init.raw \
+        $dir/egs \
+        $dir || exit 1
+fi
+
+if [ $stage -le 21 ]; then
+    echo "$0: Preparing initial acoustic model"
+    if [ -f $dir/configs/init.config ]; then
+            $train_cmd ${dir}/log/add_first_layer.log \
+                    nnet3-init --srand=${srand} ${dir}/configs/init.raw \
+                    ${dir}/configs/final.config ${dir}/init/default.raw || exit 1
+    else
+            $train_cmd ${dir}/log/init_model.log \
+               nnet3-init --srand=${srand} ${dir}/configs/final.config ${dir}/init/default.raw || exit 1
+    fi
+
+    $train_cmd $dir/log/init_mdl.log \
+        nnet3-am-init ${dir}/init/default_trans.mdl $dir/init/default.raw $dir/init/default.mdl || exit 1
+fi
+
+if [ $stage -le 22 ]; then
+  echo "$0: about to train model"
+  steps/chain2/train.sh \
+    --stage $train_stage --cmd "$cuda_cmd" \
+    --xent-regularize $xent_regularize --leaky-hmm-coefficient 0.1 \
+    --max-param-change 2.0 \
+    --num-jobs-initial 2 --num-jobs-final 5 \
+     $dir/egs $dir
+fi
+
+if [ $stage -le 23 ]; then
+  # Note: it's not important to give mkgraph.sh the lang directory with the
+  # matched topology (since it gets the topology file from the model).
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test_tgsmall \
+    $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if [ $stage -le 24 ]; then
+  # Do the speaker-dependent decoding pass
+  test_sets=dev_clean_2
+  for data in $test_sets; do
+      nspk=$(wc -l <data/${data}_hires/spk2utt)
+      steps/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --extra-left-context $egs_left_context \
+          --extra-right-context $egs_right_context \
+          --frames-per-chunk 150 \
+          --extra-left-context-initial 0 \
+          --extra-right-context-final 0 \
+          --nj $nspk --cmd "$decode_cmd"   \
+          $tree_dir/graph_tgsmall data/${data}_hires ${dir}/decode_tgsmall_${data} || exit 1
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+        data/lang_test_{tgsmall,tglarge} \
+       data/${data}_hires ${dir}/decode_{tgsmall,tglarge}_${data} || exit 1
+  done
+fi
+
+exit 0;
diff --git a/egs/mini_librispeech/s5/local/download_and_untar.sh b/egs/mini_librispeech/s5/local/download_and_untar.sh
index 5a27219f676..71921d868ae 100755
--- a/egs/mini_librispeech/s5/local/download_and_untar.sh
+++ b/egs/mini_librispeech/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 #             2017  Luminar Technologies, Inc. (author: Daniel Galvez)
diff --git a/egs/mini_librispeech/s5/local/download_lm.sh b/egs/mini_librispeech/s5/local/download_lm.sh
index b37ae599118..afa0ff7d51c 100755
--- a/egs/mini_librispeech/s5/local/download_lm.sh
+++ b/egs/mini_librispeech/s5/local/download_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vassil Panayotov
 #           2017  Daniel Povey
diff --git a/egs/mini_librispeech/s5/local/kws/compile_keywords.sh b/egs/mini_librispeech/s5/local/kws/compile_keywords.sh
index 9f88b9665ff..295f89a58a9 100755
--- a/egs/mini_librispeech/s5/local/kws/compile_keywords.sh
+++ b/egs/mini_librispeech/s5/local/kws/compile_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015-2018, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/mini_librispeech/s5/local/kws/create_hitlist.sh b/egs/mini_librispeech/s5/local/kws/create_hitlist.sh
index be06a3b9312..8e9ac40090d 100755
--- a/egs/mini_librispeech/s5/local/kws/create_hitlist.sh
+++ b/egs/mini_librispeech/s5/local/kws/create_hitlist.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2018  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
 
diff --git a/egs/mini_librispeech/s5/local/kws/make_L_align.sh b/egs/mini_librispeech/s5/local/kws/make_L_align.sh
index 72a1e9e3f4c..6c59b779fc7 100755
--- a/egs/mini_librispeech/s5/local/kws/make_L_align.sh
+++ b/egs/mini_librispeech/s5/local/kws/make_L_align.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013-2018  Johns Hopkins University (authors: Guoguo Chen, Yenda Trmal)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/mini_librispeech/s5/local/kws/run_kws.sh b/egs/mini_librispeech/s5/local/kws/run_kws.sh
index 8e7b56f0082..82be01ee14e 100755
--- a/egs/mini_librispeech/s5/local/kws/run_kws.sh
+++ b/egs/mini_librispeech/s5/local/kws/run_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2018, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/mini_librispeech/s5/local/kws/score.sh b/egs/mini_librispeech/s5/local/kws/score.sh
index b056e150e83..6fea8adadb0 100755
--- a/egs/mini_librispeech/s5/local/kws/score.sh
+++ b/egs/mini_librispeech/s5/local/kws/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2018  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # Apache 2.0.
diff --git a/egs/mini_librispeech/s5/local/kws/search.sh b/egs/mini_librispeech/s5/local/kws/search.sh
index 1c69b0da556..73696e55403 100755
--- a/egs/mini_librispeech/s5/local/kws/search.sh
+++ b/egs/mini_librispeech/s5/local/kws/search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2018  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
 # License: Apache 2.0
 
diff --git a/egs/mini_librispeech/s5/local/lookahead/run_lookahead.sh b/egs/mini_librispeech/s5/local/lookahead/run_lookahead.sh
new file mode 100755
index 00000000000..7afe9cc67be
--- /dev/null
+++ b/egs/mini_librispeech/s5/local/lookahead/run_lookahead.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+. ./path.sh
+
+# Example script for lookahead composition
+
+lm=tgmed
+am=exp/chain_online_cmn/tdnn1k_sp
+testset=dev_clean_2
+
+# %WER 10.32 [ 2078 / 20138, 201 ins, 275 del, 1602 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_base/wer_10_0.5
+# %WER 10.29 [ 2073 / 20138, 200 ins, 272 del, 1601 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_static/wer_10_0.5
+# %WER 10.25 [ 2064 / 20138, 192 ins, 277 del, 1595 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead/wer_10_0.5
+# %WER 10.24 [ 2063 / 20138, 187 ins, 290 del, 1586 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_arpa/wer_10_0.5
+# %WER 10.29 [ 2072 / 20138, 228 ins, 242 del, 1602 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_arpa_fast/wer_9_0.5
+
+# Speed
+#
+# base       0.29 xRT
+# static     0.31 xRT
+# lookahead  0.77 xRT
+# arpa       1.03 xRT
+# arpa_fast  0.31 xRT
+
+# Graph size
+#
+# Base                 461 Mb
+# Static               587 Mb
+# Lookahead            44 Mb HCL + 77 Mb Grammar
+# Lookahead + OpenGrm  44 Mb HCL + 42 Mb Grammar
+
+if [ ! -f "${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so" ]; then
+    echo "Missing ${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so"
+    echo "Make sure you compiled openfst with lookahead support. Run make in ${KALDI_ROOT}/tools after git pull."
+    exit 1
+fi
+if [ ! -f "${KALDI_ROOT}/tools/openfst/bin/ngramread" ]; then
+    echo "You appear to not have OpenGRM tools installed. Missing ${KALDI_ROOT}/tools/openfst/bin/ngramread"
+    echo "cd to $KALDI_ROOT/tools and run extras/install_opengrm.sh."
+    exit 1
+fi
+export LD_LIBRARY_PATH=${KALDI_ROOT}/tools/openfst/lib/fst
+
+# Baseline
+utils/format_lm.sh data/lang data/local/lm/lm_${lm}.arpa.gz \
+    data/local/dict/lexicon.txt data/lang_test_${lm}_base
+
+utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov \
+    data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead_base
+
+steps/nnet3/decode.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_base data/${testset}_hires ${am}/decode_${testset}_lookahead_base
+
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --remove-oov --compose-graph \
+    data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead
+
+# Decode with statically composed lookahead graph
+steps/nnet3/decode.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead_static
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead
+
+# Compile arpa graph
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --compose-graph \
+    data/lang_test_${lm}_base ${am} data/local/lm/lm_tgmed.arpa.gz ${am}/graph_${lm}_lookahead_arpa
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa
+
+# Decode with runtime composition and tuned beams
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --beam 12.0 --max-active 3000 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa_fast
diff --git a/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh b/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh
index 095e85cc338..4888de1f159 100755
--- a/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh
+++ b/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh b/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh
index f44b0cb0284..89d2a9f6e57 100755
--- a/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
index c2f90df4b5c..817f5312a40 100755
--- a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is a basic TDNN+LSTM nnet3 experiment.
 
diff --git a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh
index 2b3c2844972..49618686842 100755
--- a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is like 1a, but adding dropout.   It's definitely helpful,
 # and you can see in the objf values that the train-test difference
diff --git a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh
index 5118cb0f8bd..9f5c1cd5e03 100755
--- a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh
+++ b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1c is like 1b, but changing renorm to batchnorm and adding l2 regularization.
 
diff --git a/egs/mini_librispeech/s5/local/score.sh b/egs/mini_librispeech/s5/local/score.sh
index c812199fc98..cb5bbb7277b 100755
--- a/egs/mini_librispeech/s5/local/score.sh
+++ b/egs/mini_librispeech/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #           2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/mini_librispeech/s5/local/subset_dataset.sh b/egs/mini_librispeech/s5/local/subset_dataset.sh
index 050128247a4..f8936b64c97 100755
--- a/egs/mini_librispeech/s5/local/subset_dataset.sh
+++ b/egs/mini_librispeech/s5/local/subset_dataset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Luminar Technologies, Inc. (author: Daniel Galvez)
 # Apache 2.0
diff --git a/egs/mini_librispeech/s5/run.sh b/egs/mini_librispeech/s5/run.sh
index 2a13668e2c2..257d88d4139 100755
--- a/egs/mini_librispeech/s5/run.sh
+++ b/egs/mini_librispeech/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Change this location to somewhere where you want to put the data.
 data=./corpus/
@@ -196,7 +196,7 @@ fi
 
 # Train a chain model
 if [ $stage -le 9 ]; then
-  local/chain/run_tdnn.sh
+  local/chain2/run_tdnn.sh
 fi
 
 # local/grammar/simple_demo.sh
diff --git a/egs/mobvoi/README.txt b/egs/mobvoi/README.txt
new file mode 100644
index 00000000000..a3400dd7f65
--- /dev/null
+++ b/egs/mobvoi/README.txt
@@ -0,0 +1,15 @@
+
+ The Mobvoi dataset is a ~67-hour corpus of wake word corpus
+ in Chinese covering 523 speakers. It is currently not publicly available.
+ The wake word is "Hi Xiaowen" (in Pinyin).
+ Each speaker’s collection includes positive utterances and negative utterances
+ recorded with different speaker-to-microphone distance and different
+ signal-to-noise (SNR) ratio where noises are from typical home environments.
+ The dataset is provided by Mobvoi. Inc.
+
+ The recipe is in v1/
+
+ The E2E LF-MMI recipe does not require any prior alignments for training
+ LF-MMI, making the alignment more flexible during training. It can be optionally
+ followed by a regular LF-MMI training to further improve the performance.
+
diff --git a/egs/mobvoi/v1/cmd.sh b/egs/mobvoi/v1/cmd.sh
new file mode 100644
index 00000000000..fc5d4aa9e1c
--- /dev/null
+++ b/egs/mobvoi/v1/cmd.sh
@@ -0,0 +1,24 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl"
+export decode_cmd="queue.pl --mem 4G"
+# the use of cuda_cmd is deprecated, used only in 'nnet1',
+export cuda_cmd="queue.pl --gpu 1"
+
+if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then
+  queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+  export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+  export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+  export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
+
diff --git a/egs/mobvoi/v1/conf/mfcc.conf b/egs/mobvoi/v1/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/mobvoi/v1/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/mobvoi/v1/conf/mfcc_hires.conf b/egs/mobvoi/v1/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..d96b86ddfcb
--- /dev/null
+++ b/egs/mobvoi/v1/conf/mfcc_hires.conf
@@ -0,0 +1,9 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=20    # low cutoff frequency for mel bins
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
diff --git a/egs/mobvoi/v1/conf/online_cmvn.conf b/egs/mobvoi/v1/conf/online_cmvn.conf
new file mode 100644
index 00000000000..a173510e433
--- /dev/null
+++ b/egs/mobvoi/v1/conf/online_cmvn.conf
@@ -0,0 +1,3 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
+--norm-means=true
+--norm-vars=false
diff --git a/egs/mobvoi/v1/local/add_prefix_to_scp.py b/egs/mobvoi/v1/local/add_prefix_to_scp.py
new file mode 120000
index 00000000000..b6750c78e16
--- /dev/null
+++ b/egs/mobvoi/v1/local/add_prefix_to_scp.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/add_prefix_to_scp.py
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/chain/build_tree.sh b/egs/mobvoi/v1/local/chain/build_tree.sh
new file mode 100755
index 00000000000..452d844401d
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/build_tree.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
+#                2019  Yiming Wang
+#  Apache 2.0.
+
+
+# This script is modified from steps/nnet3/chain/build_tree.sh, but only contains
+# trivial mono phone tree building without any states tying.
+
+
+# Begin configuration section.
+cmd=run.pl
+frame_subsampling_factor=1
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+  echo "Usage: $0 <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: $0 --frame-subsampling-factor 3 \\"
+  echo "    data/train data/lang_chain exp/mono_ali_train_sp exp/chain/tree"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --frame-subsampling-factor <factor>              # Factor (e.g. 3) controlling frame subsampling"
+  echo "                                                   # at the neural net output, so the frame rate at"
+  echo "                                                   # the output is less than at the input."
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+for f in $data/feats.scp $lang/phones.txt $alidir/final.mdl $alidir/ali.1.gz; do
+  [ ! -f $f ] && echo "train_sat.sh: no such file $f" && exit 1;
+done
+
+oov=`cat $lang/oov.int`
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null`
+delta_opts=`cat $alidir/delta_opts 2>/dev/null`
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+cp $alidir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option.
+cp $alidir/delta_opts $dir 2>/dev/null # delta option.
+cp $alidir/ali.1.gz $dir 2>/dev/null # to pass the file checking later during training
+
+utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1;
+cp $lang/phones.txt $dir || exit 1;
+
+echo $nj >$dir/num_jobs
+if [ -f $alidir/per_utt ]; then
+  sdata=$data/split${nj}utt
+  utils/split_data.sh --per-utt $data $nj
+else
+  sdata=$data/split$nj
+  utils/split_data.sh $data $nj
+fi
+
+# Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+## Set up speaker-independent features.
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir
+    cp $alidir/full.mat $dir 2>/dev/null
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# Add fMLLR transforms if available
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: Using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+
+# Do subsampling of feats, if needed
+if [ $frame_subsampling_factor -gt 1 ]; then
+  feats="$feats subsample-feats --n=$frame_subsampling_factor ark:- ark:- |"
+fi
+
+echo "$0: Initializing monophone model (for alignment conversion, in case topology changed)"
+
+[ ! -f $lang/phones/sets.int ] && exit 1;
+shared_phones_opt="--shared-phones=$lang/phones/sets.int"
+# get feature dimension
+example_feats="`echo $feats | sed s/JOB/1/g`";
+if ! feat_dim=$(feat-to-dim "$example_feats" - 2>/dev/null) || [ -z $feat_dim ]; then
+  feat-to-dim "$example_feats" - # to see the error message.
+  echo "error getting feature dimension"
+  exit 1;
+fi
+$cmd JOB=1 $dir/log/init_mono.log \
+  gmm-init-mono $shared_phones_opt "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo $feat_dim \
+    $dir/mono.mdl $dir/mono.tree || exit 1;
+
+cp $dir/mono.mdl $dir/final.mdl
+cp $dir/mono.tree $dir/tree
+
+echo $0: Done building tree
diff --git a/egs/mobvoi/v1/local/chain/run_e2e_tdnn.sh b/egs/mobvoi/v1/local/chain/run_e2e_tdnn.sh
new file mode 120000
index 00000000000..891eec02423
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/run_e2e_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/chain/run_tdnn.sh b/egs/mobvoi/v1/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/chain/run_tdnn_e2eali.sh b/egs/mobvoi/v1/local/chain/run_tdnn_e2eali.sh
new file mode 120000
index 00000000000..38f0bd07e6c
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/run_tdnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_e2eali_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/chain/tuning/run_e2e_tdnn_1a.sh b/egs/mobvoi/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
new file mode 100755
index 00000000000..99ce93ff28e
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
@@ -0,0 +1,239 @@
+#!/bin/bash
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+minibatch_size=150=128,64/300=100,64,32/600=50,32,16/1200=16,8
+common_egs_dir=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_combined_spe2e
+test_sets="dev eval"
+export LC_ALL=en_US.UTF-8
+wake_word="嗨小问"
+export LC_ALL=C
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+lang_decode=data/lang_e2e_decode
+tree_dir=exp/chain/e2e_tree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_tdnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang ..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    local/gen_topo.pl 4 1 $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+  mkdir -p $tree_dir
+  id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+  id_word=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+  id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+  cat <<EOF > $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+0 5 $id_sil $id_sil
+1 2 $id_word $id_word
+2 3 $id_sil $id_sil
+1 4 $id_freetext $id_freetext
+4 5 $id_sil $id_sil
+3 1.9
+5 0.7
+EOF
+  fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+  fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+  mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$train_cmd" \
+                                       data/${train_set}_hires $lang $tree_dir
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-2,-1,0,1,2) $affine_opts dim=$dim
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  linear-component name=prefinal-l dim=30 $linear_opts
+
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
+    --chain.frame-subsampling-factor=3 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.00003 \
+    --trainer.optimization.final-effective-lrate 0.000003 \
+    --trainer.optimization.backstitch-training-scale $bs_scale \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.optimization.momentum=0.0 \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--num-utts-subset 300 --online-cmvn $online_cmvn" \
+    --cleanup.remove-egs=$remove_egs \
+    --feat-dir data/${train_set}_hires \
+    --tree-dir $tree_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    --online-cmvn-config conf/online_cmvn.conf \
+    $lang ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+  for wake_word_cost in 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5; do
+    rm -rf $lang_decode
+    utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+      --position-dependent-phones false \
+      data/local/dict "<sil>" $lang_decode/temp $lang_decode
+
+    sil_id=`cat $lang_decode/words.txt | grep "<sil>" | awk '{print $2}'`
+    freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+    id=`cat $lang_decode/words.txt | grep $wake_word | awk '{print $2}'`
+    mkdir -p $lang_decode/lm
+    cat <<EOF > $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.7
+4 0 $sil_id $sil_id
+1 2 $id $id $wake_word_cost
+2 0 $sil_id $sil_id
+0
+EOF
+    fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+    set +e
+    fstisstochastic $lang_decode/G.fst
+    set -e
+    utils/validate_lang.pl $lang_decode
+    cp $lang/topo $lang_decode/topo
+
+    utils/lang/check_phones_compatible.sh \
+      data/lang/phones.txt $lang_decode/phones.txt
+    rm -rf $tree_dir/graph_online/HCLG.fst
+    utils/mkgraph.sh \
+      --self-loop-scale 1.0 $lang_decode \
+      $dir $tree_dir/graph_online || exit 1;
+
+    frames_per_chunk=150
+    for data in $test_sets; do
+      (
+        nj=30
+        steps/online/nnet3/decode_wake_word.sh \
+          --beam 200 --acwt 1.0 \
+          --wake-word $wake_word \
+          --extra-left-context-initial 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nj --cmd "$decode_cmd" \
+          $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_cost$wake_word_cost || exit 1
+      ) || touch $dir/.error &
+    done
+    wait
+    [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+  done
+  for data in $test_sets; do
+    echo "Results on $data set:"
+    cat ${dir}_online/decode_${data}_cost*/scoring_kaldi/all_results
+  done
+fi
diff --git a/egs/mobvoi/v1/local/chain/tuning/run_tdnn_1a.sh b/egs/mobvoi/v1/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..0b417f6541e
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,292 @@
+#!/bin/bash
+#
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+# Apache 2.0
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=30
+gmm=mono
+train_stage=-5 # starting from -5 to skip phone-lm estimation
+get_egs_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+chunk_width=140,100,160
+common_egs_dir=
+reporting_email=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter
+combined_train_set=train_shorter_sp_combined
+test_sets="dev eval"
+aug_prefix="rev1 noise music babble"
+export LC_ALL=en_US.UTF-8
+wake_word="嗨小问"
+export LC_ALL=C
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+lat_dir=exp/chain/${gmm}_${train_set}_sp_lats
+combined_lat_dir=exp/chain/${gmm}_${combined_train_set}_lats
+combined_train_data_dir=data/${combined_train_set}_hires
+lores_train_data_dir=data/${train_set}_sp
+
+lang=data/lang_chain
+lang_decode=data/lang_chain_decode
+tree_dir=exp/chain/tree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/tdnn_${affix}
+
+for f in $combined_train_data_dir/feats.scp \
+  $lores_train_data_dir/feats.scp $gmm_dir/final.mdl $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang ..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    rm -rf $lang
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    local/gen_topo.pl 4 1 $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom)
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 3 ]; then
+  local/copy_lat_dir.sh --nj 75 --cmd "$train_cmd" --utt-prefixes "$aug_prefix" \
+    $combined_train_data_dir $lat_dir $combined_lat_dir
+fi
+
+if [ $stage -le 4 ]; then
+  # Build a tree using our new topology.  We know we have alignments from
+  # steps/align_fmllr.sh, so use those.
+  # The num-leaves is always somewhat less than the num-leaves from the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  local/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --cmd "$train_cmd" ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+
+  echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+  id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+  id_word=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+  id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+  cat <<EOF > $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+1 2 $id_word $id_word
+2 3 $id_sil $id_sil
+1 4 $id_freetext $id_freetext
+4 5 $id_sil $id_sil
+3 1.9
+5 0.7
+EOF
+  fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+  fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+  mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=$dim
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  linear-component name=prefinal-l dim=30 $linear_opts
+  
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 6 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  cp $tree_dir/phone_lm.fst $dir/phone_lm.fst
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.00005 \
+    --trainer.optimization.final-effective-lrate 0.000005 \
+    --trainer.optimization.backstitch-training-scale $bs_scale \
+    --trainer.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=0 \
+    --egs.chunk-right-context=0 \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --online-cmvn $online_cmvn" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir $combined_train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir=$combined_lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    --online-cmvn-config conf/online_cmvn.conf \
+    $lang ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+  for wake_word_cost in -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0; do
+    rm -rf $lang_decode
+    utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+      --position-dependent-phones false \
+      data/local/dict "<sil>" $lang_decode/temp $lang_decode
+
+    sil_id=`cat $lang_decode/words.txt | grep "<sil>" | awk '{print $2}'`
+    freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+    id=`cat $lang_decode/words.txt | grep $wake_word | awk '{print $2}'`
+    mkdir -p $lang_decode/lm
+    cat <<EOF > $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.7
+4 0 $sil_id $sil_id
+1 2 $id $id $wake_word_cost
+2 0 $sil_id $sil_id
+0
+EOF
+    fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+    set +e
+    fstisstochastic $lang_decode/G.fst
+    set -e
+    utils/validate_lang.pl $lang_decode
+    cp $lang/topo $lang_decode/topo
+
+    utils/lang/check_phones_compatible.sh \
+      data/lang/phones.txt $lang_decode/phones.txt
+    rm -rf $tree_dir/graph_online/HCLG.fst
+    utils/mkgraph.sh \
+      --self-loop-scale 1.0 $lang_decode \
+      $dir $tree_dir/graph_online || exit 1;
+
+    frames_per_chunk=150
+    for data in $test_sets; do
+      (
+        nj=30
+        steps/online/nnet3/decode_wake_word.sh \
+          --beam 200 --acwt 1.0 \
+          --wake-word $wake_word \
+          --extra-left-context-initial 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nj --cmd "$decode_cmd" \
+          $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_cost$wake_word_cost || exit 1
+      ) || touch $dir/.error &
+    done
+    wait
+    [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+  done
+  for data in $test_sets; do
+    echo "Results on $data set:"
+    cat ${dir}_online/decode_${data}_cost*/scoring_kaldi/all_results
+  done
+fi
diff --git a/egs/mobvoi/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh b/egs/mobvoi/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
new file mode 100755
index 00000000000..eb806b76850
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
@@ -0,0 +1,282 @@
+#!/bin/bash
+#
+# Copyright 2019-2020  Daniel Povey
+#           2019-2020  Yiming Wang
+# Apache 2.0
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=30
+e2echain_model_dir=exp/chain/e2e_tdnn_1a
+train_stage=-5 # starting from -5 to skip phone-lm estimation
+get_egs_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+chunk_width=140,100,160
+common_egs_dir=
+reporting_email=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_sp_combined
+test_sets="dev eval"
+export LC_ALL=en_US.UTF-8
+wake_word="嗨小问"
+export LC_ALL=C
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_${train_set}
+lat_dir=exp/chain/e2e_${train_set}_lats
+train_data_dir=data/${train_set}_hires
+
+lang=data/lang_chain
+lang_decode=data/lang_chain_decode
+tree_dir=exp/chain/tree_e2e  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/tdnn_e2eali_${affix}
+
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang ..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    local/gen_topo.pl 4 1 $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom)
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj 75 --cmd "$train_cmd" \
+                      --acoustic-scale 1.0 \
+                      --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                      $train_data_dir data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments from
+  # steps/align_fmllr.sh, so use those.
+  # The num-leaves is always somewhat less than the num-leaves from the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  local/chain/build_tree.sh \
+    --frame-subsampling-factor 3 --cmd "$train_cmd" \
+    $train_data_dir $lang $ali_dir $tree_dir
+
+  echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+  id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+  id_word=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+  id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+  cat <<EOF > $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+1 2 $id_word $id_word
+2 3 $id_sil $id_sil
+1 4 $id_freetext $id_freetext
+4 5 $id_sil $id_sil
+3 1.9
+5 0.7
+EOF
+  fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+  fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+  mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=$dim
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  linear-component name=prefinal-l dim=30 $linear_opts
+  
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 6 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  cp $tree_dir/phone_lm.fst $dir/phone_lm.fst
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
+    --chain.alignment-subsampling-factor=1 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.00005 \
+    --trainer.optimization.final-effective-lrate 0.000005 \
+    --trainer.optimization.backstitch-training-scale $bs_scale \
+    --trainer.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=0 \
+    --egs.chunk-right-context=0 \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --online-cmvn $online_cmvn" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    --online-cmvn-config conf/online_cmvn.conf \
+    $lang ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+  for wake_word_cost in 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0; do
+    rm -rf $lang_decode
+    utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+      --position-dependent-phones false \
+      data/local/dict "<sil>" $lang_decode/temp $lang_decode
+
+    sil_id=`cat $lang_decode/words.txt | grep "<sil>" | awk '{print $2}'`
+    freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+    id=`cat $lang_decode/words.txt | grep $wake_word | awk '{print $2}'`
+    mkdir -p $lang_decode/lm
+    cat <<EOF > $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.7
+4 0 $sil_id $sil_id
+1 2 $id $id $wake_word_cost
+2 0 $sil_id $sil_id
+0
+EOF
+    fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+    set +e
+    fstisstochastic $lang_decode/G.fst
+    set -e
+    utils/validate_lang.pl $lang_decode
+    cp $lang/topo $lang_decode/topo
+
+    utils/lang/check_phones_compatible.sh \
+      data/lang/phones.txt $lang_decode/phones.txt
+    rm -rf $tree_dir/graph_online/HCLG.fst
+    utils/mkgraph.sh \
+      --self-loop-scale 1.0 $lang_decode \
+      $dir $tree_dir/graph_online || exit 1;
+
+    frames_per_chunk=150
+    for data in $test_sets; do
+      (
+        nj=30
+        steps/online/nnet3/decode_wake_word.sh \
+          --beam 200 --acwt 1.0 \
+          --wake-word $wake_word \
+          --extra-left-context-initial 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nj --cmd "$decode_cmd" \
+          $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_cost$wake_word_cost || exit 1
+      ) || touch $dir/.error &
+    done
+    wait
+    [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+  done
+  for data in $test_sets; do
+    echo "Results on $data set:"
+    cat ${dir}_online/decode_${data}_cost*/scoring_kaldi/all_results
+  done
+fi
diff --git a/egs/mobvoi/v1/local/compute_metrics.py b/egs/mobvoi/v1/local/compute_metrics.py
new file mode 120000
index 00000000000..695a2ca5f6d
--- /dev/null
+++ b/egs/mobvoi/v1/local/compute_metrics.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/compute_metrics.py
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/copy_lat_dir.sh b/egs/mobvoi/v1/local/copy_lat_dir.sh
new file mode 120000
index 00000000000..6be684730ad
--- /dev/null
+++ b/egs/mobvoi/v1/local/copy_lat_dir.sh
@@ -0,0 +1 @@
+../../../../scripts/wakeword/copy_lat_dir.sh
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/gen_topo.pl b/egs/mobvoi/v1/local/gen_topo.pl
new file mode 120000
index 00000000000..fd5959cebaf
--- /dev/null
+++ b/egs/mobvoi/v1/local/gen_topo.pl
@@ -0,0 +1 @@
+../../../../scripts/wakeword/gen_topo.pl
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/get_random_subsegments.py b/egs/mobvoi/v1/local/get_random_subsegments.py
new file mode 120000
index 00000000000..24631471ff6
--- /dev/null
+++ b/egs/mobvoi/v1/local/get_random_subsegments.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/get_random_subsegments.py
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/mobvoi_data_download.sh b/egs/mobvoi/v1/local/mobvoi_data_download.sh
new file mode 100755
index 00000000000..f0e7d961be2
--- /dev/null
+++ b/egs/mobvoi/v1/local/mobvoi_data_download.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright  2018-2020  Yiming Wang
+#            2018-2020  Daniel Povey
+# Apache 2.0
+
+# This script loads the Mobvoi dataset.
+[ -f ./path.sh ] && . ./path.sh
+
+dl_dir=data/download
+
+mkdir -p $dl_dir
+
+src_path=/export/fs04/a11/hlyu/wakeup_word_corpra/mobvoi
+
+dataset=ticmini2_dataset_20180607.zip
+if [ -d $dl_dir/$(basename "$dataset" .zip) ]; then
+  echo "Not extracting $(basename "$dataset" .zip) as it is already there."
+else
+  if [ ! -f $dl_dir/$dataset ]; then
+    echo "Downloading $dataset..."
+    cat $src_path/ticmini2_dataset_20180607.z01 $src_path/$dataset > $dl_dir/$dataset
+  fi
+  unzip $dl_dir/$dataset -d $dl_dir
+  rm -f $dl_dir/$dataset 2>/dev/null || true
+  echo "Done extracting $dataset."
+fi
+
+dataset=ticmini2_for_school_20180911.tar.gz
+if [ -d $dl_dir/$(basename "$dataset" .tar.gz) ]; then
+  echo "Not extracting $(basename "$dataset" .tar.gz) as it is already there."
+else
+  echo "Extracting $dataset..."
+  tar -xvzf $src_path/$dataset -C $dl_dir || exit 1;
+  echo "Done extracting $dataset."
+fi
+
+dataset=ticmini2_hixiaowen_adult_20180731.7z
+if [ -d $dl_dir/$(basename "$dataset" .7z) ]; then
+  echo "Not extracting $(basename "$dataset" .7z) as it is already there."
+else
+  echo "Extracting $dataset..."
+  ~/p7zip_16.02/bin/7z x $src_path/$dataset -o$dl_dir|| exit 1;
+  echo "Done extracting $dataset."
+fi
+
+for dataset in train dev eval; do
+  cp $src_path/${dataset}_list $dl_dir/${dataset}_list
+done
+
+exit 0
diff --git a/egs/mobvoi/v1/local/prepare_dict.sh b/egs/mobvoi/v1/local/prepare_dict.sh
new file mode 100755
index 00000000000..96946c83cd7
--- /dev/null
+++ b/egs/mobvoi/v1/local/prepare_dict.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+
+set -e
+dir=data/local/dict
+
+. ./utils/parse_options.sh
+
+mkdir -p $dir
+
+# First get the set of all letters that occur in data/train/text
+echo "hixiaowen" > $dir/nonsilence_phones.txt
+echo "freetext" >> $dir/nonsilence_phones.txt
+
+echo "嗨小问 hixiaowen" > $dir/lexicon.txt
+echo "FREETEXT freetext" >> $dir/lexicon.txt
+echo "<sil> SIL" >> $dir/lexicon.txt
+
+echo SIL > $dir/silence_phones.txt
+
+echo SIL >$dir/optional_silence.txt
+
+echo -n "" >$dir/extra_questions.txt
diff --git a/egs/mobvoi/v1/local/prepare_wav.py b/egs/mobvoi/v1/local/prepare_wav.py
new file mode 100755
index 00000000000..5e42f64ba9f
--- /dev/null
+++ b/egs/mobvoi/v1/local/prepare_wav.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2020  Yiming Wang
+#           2018-2020  Daniel Povey
+# Apache 2.0
+
+""" This script prepares the Mobvoi data into kaldi format.
+"""
+
+
+import argparse
+import os
+import sys
+import glob
+
+def main():
+    parser = argparse.ArgumentParser(description="""Generates {train|dev|eval}_wav.scp files.""")
+    parser.add_argument('dir', type=str,
+                        default='data',
+                        help='path to the directory containing downloaded dataset')
+    args = parser.parse_args()
+
+    assert os.path.isdir(args.dir)
+    with open(os.path.join(args.dir, "train", "text"), 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+        train_set = set([os.path.splitext(os.path.split(line.strip().split()[0])[1])[0] for line in lines])
+        assert len(train_set) > 0
+    with open(os.path.join(args.dir, "dev", "text"), 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+        dev_set = set([os.path.splitext(os.path.split(line.strip().split()[0])[1])[0] for line in lines])
+        assert len(dev_set) > 0
+    with open(os.path.join(args.dir, "eval", "text"), 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+        eval_set = set([os.path.splitext(os.path.split(line.strip().split()[0])[1])[0] for line in lines])
+        assert len(eval_set) > 0
+    assert len(train_set.intersection(dev_set)) == 0
+    assert len(train_set.intersection(eval_set)) == 0
+    assert len(dev_set.intersection(eval_set)) == 0
+
+    train_wav_scp = open(os.path.join(args.dir, "train", "wav.scp"), 'w', encoding='utf-8')
+    dev_wav_scp = open(os.path.join(args.dir, "dev", "wav.scp"), 'w', encoding='utf-8')
+    eval_wav_scp = open(os.path.join(args.dir, "eval", "wav.scp"), 'w', encoding='utf-8')
+
+    # Look through all the subfolders to find audio samples
+    wav_files = {}
+    search_path = os.path.join(args.dir, '**', '*.wav')
+    for wav_path in glob.glob(search_path, recursive=True):
+        _, basename = os.path.split(wav_path)
+        utt_id = os.path.splitext(basename)[0]
+        extended_wav_path = "sox " + os.path.abspath(wav_path) + " -t wav - |"
+        if not utt_id in wav_files:
+            wav_files[utt_id] = extended_wav_path
+    for utt_id in train_set:
+        train_wav_scp.write(utt_id + " " + wav_files[utt_id] + "\n")
+    for utt_id in dev_set:
+        dev_wav_scp.write(utt_id + " " + wav_files[utt_id] + "\n")
+    for utt_id in eval_set:
+        eval_wav_scp.write(utt_id + " " + wav_files[utt_id] + "\n")
+
+    train_wav_scp.close()
+    dev_wav_scp.close()
+    eval_wav_scp.close()
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/mobvoi/v1/local/score_online.sh b/egs/mobvoi/v1/local/score_online.sh
new file mode 100755
index 00000000000..c3a7c60eb46
--- /dev/null
+++ b/egs/mobvoi/v1/local/score_online.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright 2018-2019  Daniel Povey
+#           2018-2020  Yiming Wang
+# Apache 2.0
+
+[ -f ./path.sh ] && . ./path.sh
+
+# begin configuration section.
+wake_word="嗨小问"
+#end configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <data-dir> <lang-dir|graph-dir> <decode-dir>"
+  echo " Options:"
+  exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $data/text; do
+  [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
+done
+
+
+utils/data/get_utt2dur.sh $data
+rm $data/utt2dur_negative 2>/dev/null || true
+utils/filter_scp.pl <(grep -v $wake_word $data/text) $data/utt2dur > $data/utt2dur_negative && dur=`awk '{a+=$2} END{print a}' $data/utt2dur_negative`
+echo "total duration (in seconds) of negative examples in $data: $dur"
+
+ref_filtering_cmd="cat"
+[ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter"
+[ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter"
+hyp_filtering_cmd="cat"
+[ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter"
+[ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter"
+
+
+mkdir -p $dir/scoring_kaldi
+cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1;
+cat $dir/trans.txt | utils/int2sym.pl -f 2- $symtab | $hyp_filtering_cmd > $dir/scoring_kaldi/hyp_filt.txt || exit 1;
+export LC_ALL=en_US.UTF-8
+cat $dir/scoring_kaldi/hyp_filt.txt | \
+local/compute_metrics.py $dir/scoring_kaldi/test_filt.txt - --wake-word $wake_word \
+  --duration $dur > $dir/scoring_kaldi/all_results
+export LC_ALL=C
+
+exit 0;
diff --git a/egs/mobvoi/v1/local/split_datasets.sh b/egs/mobvoi/v1/local/split_datasets.sh
new file mode 100755
index 00000000000..a403523622c
--- /dev/null
+++ b/egs/mobvoi/v1/local/split_datasets.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright 2018-2020  Yiming Wang
+#           2018-2020  Daniel Povey
+# Apache 2.0
+
+stage=0
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -eu
+
+
+if [ $stage -le 1 ]; then
+  dir=data/download/ticmini2_dataset_20180607
+  trans=$dir/hixiaowen.txt
+  paste -d ' ' <(cat $trans | awk '{split($1,a,"."); print a[1]}') <(cat $trans | cut -d ' ' -f2-) > $dir/hixiaowen_text || exit 1
+  dir=data/download/ticmini2_hixiaowen_adult_20180731
+  for folder in patch1 patch2; do
+    trans=$dir/$folder/hixiaowen_trans
+    paste -d ' ' <(cat $trans | awk '{split($1,a,"."); print a[1]}' | awk '{split($1,a,"/"); print a[3]}') <(cat $trans | cut -d ' ' -f2-) || exit 1
+  done > $dir/hixiaowen_text || exit 1
+  dir=data/download/ticmini2_for_school_20180911
+  trans=$dir/hixiaowen/hixiaowen.trans
+  paste -d ' ' <(cat $trans | awk '{split($1,a,"/"); print a[4]}' | awk '{split($1,a,"."); print a[1]}') <(cat $trans | cut -d ' ' -f2-) > $dir/hixiaowen_text || exit 1
+  for dataset in ticmini2_dataset_20180607 ticmini2_hixiaowen_adult_20180731 ticmini2_for_school_20180911; do
+    cat data/download/$dataset/hixiaowen_text || exit 1
+  done | sort -u -k1,1 > data/hixiaowen_text || exit 1
+fi
+
+if [ $stage -le 2 ]; then
+  dir=data/download/ticmini2_dataset_20180607
+  trans=$dir/freetext.txt
+  paste -d ' ' <(cat $trans | awk '{print $1}' | awk '{split($1,a,"."); print a[1]}') <(cat $trans | cut -d ' ' -f2-) > $dir/freetext_text || exit 1
+  dir=data/download/ticmini2_for_school_20180911
+  trans=$dir/freetext/freetext.trans
+  paste -d ' ' <(cat $trans | awk '{print $1}' | awk '{split($1,a,"/"); print a[4]}' | awk '{split($1,a,"."); print a[1]}') <(cat $trans | cut -d ' ' -f2-) > $dir/freetext_text || exit 1
+  for dataset in ticmini2_dataset_20180607 ticmini2_for_school_20180911; do
+    cat data/download/$dataset/freetext_text || exit 1
+  done | sort -u -k1,1 > data/freetext_text || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+  dir=data/download/ticmini2_dataset_20180607
+  trans=$dir/garbage.txt
+  paste -d ' ' <(cat $trans | awk '{print $1}' | awk '{split($1,a,"."); print a[1]}') <(cat $trans | cut -d ' ' -f2-) > $dir/garbage_text || exit 1
+  cat $dir/garbage_text > data/garbage_text
+fi
+
+if [ $stage -le 4 ]; then
+  cat data/hixiaowen_text data/freetext_text data/garbage_text > data/text
+  cat data/hixiaowen_text data/freetext_text | awk '{print $1}' | awk '{split($1,a,"-"); print $1,a[1]}' > data/hixiaowen_freetext_utt2spk || exit 1
+  cat data/garbage_text | awk '{print $1}' | awk '{split($1,a,"_"); if(a[1]=="garbage") print $1,a[1] "_" a[2] "_" a[3]; else if(a[1]=="ticmini" || a[1]=="timini") print $1,a[1] "_" a[2] "_" a[3] "_" a[4] "_" a[5]; else print $1,$1}' | cat data/hixiaowen_freetext_utt2spk - > data/utt2spk || exit 1
+  rm -f data/hixiaowen_freetext_utt2spk 2>/dev/null || true
+fi
+
+if [ $stage -le 5 ]; then
+  for folder in train dev eval; do
+    mkdir -p data/$folder
+    utils/filter_scp.pl data/download/${folder}_list data/text > data/$folder/text || exit 1
+    utils/filter_scp.pl data/download/${folder}_list data/utt2spk > data/$folder/utt2spk || exit 1
+  done
+fi
+
+exit 0
diff --git a/egs/mobvoi/v1/local/wer_output_filter b/egs/mobvoi/v1/local/wer_output_filter
new file mode 100755
index 00000000000..bb4de1d1572
--- /dev/null
+++ b/egs/mobvoi/v1/local/wer_output_filter
@@ -0,0 +1,24 @@
+#!/usr/bin/env perl
+# Copyright 2012-2014  Johns Hopkins University (Author: Yenda Trmal)
+# Apache 2.0
+use utf8;
+
+use open qw(:encoding(utf8));
+binmode STDIN, ":utf8";
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+
+while (<>) {
+  @F = split " ";
+  print $F[0] . " "; 
+  foreach $s (@F[1..$#F]) {
+    if ($s =~ /\<.*\>/) {
+      print "";
+    } else {
+      print "$s "
+    }
+  }
+  print "\n";
+}
+
+
diff --git a/egs/mobvoi/v1/path.sh b/egs/mobvoi/v1/path.sh
new file mode 100755
index 00000000000..2d17b17a84a
--- /dev/null
+++ b/egs/mobvoi/v1/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/mobvoi/v1/run.sh b/egs/mobvoi/v1/run.sh
new file mode 100755
index 00000000000..994f7217848
--- /dev/null
+++ b/egs/mobvoi/v1/run.sh
@@ -0,0 +1,243 @@
+#!/bin/bash
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+stage=0
+
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -euo pipefail
+
+if [ $stage -le 0 ]; then
+  local/mobvoi_data_download.sh
+  echo "$0: Extracted all datasets into data/download/"
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: Splitting datasets..."
+  local/split_datasets.sh
+  echo "$0: text and utt2spk have been generated in data/{train|dev|eval}."
+fi
+    
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing wav.scp..."
+  local/prepare_wav.py data
+  echo "wav.scp has been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Extracting MFCC..."
+  for folder in train dev eval; do
+    dir=data/$folder
+    utils/fix_data_dir.sh $dir
+    steps/make_mfcc.sh --cmd "$train_cmd" --nj 16 $dir
+    steps/compute_cmvn_stats.sh $dir
+    utils/fix_data_dir.sh $dir
+    utils/data/get_utt2dur.sh $dir
+    utils/validate_data_dir.sh $dir
+  done
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: Post processing transcripts..."
+  for folder in train dev eval; do
+    dir=data/$folder
+    cat $dir/text | awk '{if ($2=="嗨小问" || $2=="嗨小问嗨小问") {print $1,"嗨小问";} else {print $1,"FREETEXT"}}' > $dir/text.tmp || exit 1
+    mv $dir/text.tmp $dir/text || exit 1
+  done
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  local/prepare_dict.sh
+  utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.5 \
+    --position-dependent-phones false \
+    data/local/dict "<sil>" data/lang/temp data/lang
+fi
+
+if [ $stage -le 6 ]; then
+  id_sil=`cat data/lang/words.txt | grep "<sil>" | awk '{print $2}'`
+  id_freetext=`cat data/lang/words.txt | grep "FREETEXT" | awk '{print $2}'`
+  export LC_ALL=en_US.UTF-8
+  id_word=`cat data/lang/words.txt | grep "嗨小问" | awk '{print $2}'`
+  export LC_ALL=C
+  mkdir -p data/lang/lm
+  cat <<EOF > data/lang/lm/fst.txt
+0 1 $id_sil $id_sil
+0 4 $id_sil $id_sil 7.0
+1 4 $id_freetext $id_freetext 0.0
+4 0 $id_sil $id_sil
+1 2 $id_word $id_word 1.1
+2 0 $id_sil $id_sil
+0
+EOF
+  fstcompile data/lang/lm/fst.txt data/lang/G.fst
+  set +e
+  fstisstochastic data/lang/G.fst
+  set -e
+  utils/validate_lang.pl data/lang
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: subsegmenting for the training data..."
+  srcdir=data/train
+  utils/data/convert_data_dir_to_whole.sh $srcdir ${srcdir}_whole
+
+  utils/data/get_segments_for_data.sh $srcdir > ${srcdir}_whole/segments
+  utils/filter_scp.pl <(awk '{if ($2 == "FREETEXT") print $1}' ${srcdir}_whole/text) \
+    ${srcdir}_whole/segments >${srcdir}_whole/neg_segments
+  utils/filter_scp.pl --exclude ${srcdir}_whole/neg_segments ${srcdir}_whole/segments \
+    >${srcdir}_whole/pos_segments
+  utils/filter_scp.pl ${srcdir}_whole/pos_segments ${srcdir}_whole/utt2dur >${srcdir}_whole/pos_utt2dur
+  local/get_random_subsegments.py --overlap-duration=0.3 --max-remaining-duration=0.3 \
+    ${srcdir}_whole/neg_segments ${srcdir}_whole/pos_utt2dur | \
+    cat ${srcdir}_whole/pos_segments - | sort >${srcdir}_whole/sub_segments
+  utils/data/subsegment_data_dir.sh ${srcdir}_whole \
+    ${srcdir}_whole/sub_segments data/train_segmented
+  awk '{print $1,$2}' ${srcdir}_whole/sub_segments | \
+    utils/apply_map.pl -f 2 ${srcdir}_whole/text >data/train_segmented/text
+  utils/data/extract_wav_segments_data_dir.sh --nj 50 --cmd "$train_cmd" \
+    data/train_segmented data/train_shorter
+  steps/compute_cmvn_stats.sh data/train_shorter
+  utils/fix_data_dir.sh data/train_shorter
+  utils/validate_data_dir.sh data/train_shorter
+fi
+
+# In this section, we augment the training data with reverberation,
+# noise, music, and babble, and combined it with the clean data.
+if [ $stage -le 8 ]; then
+  utils/data/get_utt2dur.sh data/train_shorter
+  cp data/train/utt2dur data/train/reco2dur
+  # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+  [ ! -f rirs_noises.zip ] && wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+  [ ! -d "RIRS_NOISES" ] && unzip rirs_noises.zip
+
+  # Make a version with reverberated speech
+  rvb_opts=()
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+  # Make a reverberated version of the SWBD+SRE list.  Note that we don't add any
+  # additive noise here.
+  steps/data/reverberate_data_dir.py \
+    "${rvb_opts[@]}" \
+    --speech-rvb-probability 1 \
+    --prefix "rev" \
+    --pointsource-noise-addition-probability 0 \
+    --isotropic-noise-addition-probability 0 \
+    --num-replications 1 \
+    --source-sampling-rate 16000 \
+    data/train_shorter data/train_shorter_reverb
+  cat data/train_shorter/utt2dur | awk -v name=rev1 '{print name"-"$0}' >data/train_shorter_reverb/utt2dur
+
+  # Prepare the MUSAN corpus, which consists of music, speech, and noise
+  # suitable for augmentation.
+  steps/data/make_musan.sh /export/corpora/JHU/musan data
+
+  # Get the duration of the MUSAN recordings.  This will be used by the
+  # script augment_data_dir.py.
+  for name in speech noise music; do
+    utils/data/get_utt2dur.sh data/musan_${name}
+    cp data/musan_${name}/utt2dur data/musan_${name}/reco2dur
+  done
+
+  # Augment with musan_noise
+  export LC_ALL=en_US.UTF-8
+  steps/data/augment_data_dir.py --utt-prefix "noise" --modify-spk-id true --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_shorter data/train_shorter_noise
+  # Augment with musan_music
+  steps/data/augment_data_dir.py --utt-prefix "music" --modify-spk-id true --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_shorter data/train_shorter_music
+  # Augment with musan_speech
+  steps/data/augment_data_dir.py --utt-prefix "babble" --modify-spk-id true --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_shorter data/train_shorter_babble
+  export LC_ALL=C
+fi
+
+if [ $stage -le 9 ]; then
+  # Now make MFCC features
+  for name in reverb noise music babble; do
+    steps/make_mfcc.sh --nj 16 --cmd "$train_cmd" \
+      data/train_shorter_${name} || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_${name}
+    utils/fix_data_dir.sh data/train_shorter_${name}
+    utils/validate_data_dir.sh data/train_shorter_${name}
+  done
+fi
+
+# monophone training
+if [ $stage -le 10 ]; then
+  steps/train_mono.sh --nj 50 --cmd "$train_cmd" \
+    data/train_shorter data/lang exp/mono
+  (
+    utils/mkgraph.sh data/lang \
+      exp/mono exp/mono/graph
+  )&
+
+  steps/align_si.sh --nj 50 --cmd "$train_cmd" \
+    data/train_shorter data/lang exp/mono exp/mono_ali_train_shorter
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: preparing for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh data/train_shorter data/train_shorter_sp
+  steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 data/train_shorter_sp || exit 1;
+  steps/compute_cmvn_stats.sh data/train_shorter_sp || exit 1;
+  utils/fix_data_dir.sh data/train_shorter_sp
+fi
+
+if [ $stage -le 12 ]; then
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj 50 --cmd "$train_cmd" \
+    data/train_shorter_sp data/lang exp/mono exp/mono_ali_train_shorter_sp || exit 1
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: creating high-resolution MFCC features"
+  mfccdir=data/train_shorter_sp_hires/data
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(dte +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in train_shorter_sp dev eval; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/train_shorter_sp_hires || exit 1;
+
+  for datadir in train_shorter_sp dev eval; do
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+    utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+  done
+fi
+
+combined_train_set=train_shorter_sp_combined
+aug_affix="reverb noise music babble"
+if [ $stage -le 14 ]; then
+  for name in $aug_affix; do
+    echo "$0: creating high-resolution MFCC features for train_shorter_${name}"
+    mfccdir=data/train_shorter_${name}_hires/data
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+      utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+    fi
+    utils/copy_data_dir.sh data/train_shorter_${name} data/train_shorter_${name}_hires
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/train_shorter_${name}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_${name}_hires || exit 1;
+    utils/fix_data_dir.sh data/train_shorter_${name}_hires || exit 1;
+  done
+  eval utils/combine_data.sh data/${combined_train_set}_hires data/train_shorter_sp_hires \
+    data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}_hires
+fi
+
+
+if [ $stage -le 15 ]; then
+  local/chain/run_tdnn.sh --train-set train_shorter --combined-train-set ${combined_train_set}
+fi
+
+exit 0
+
diff --git a/egs/mobvoi/v1/run_e2e.sh b/egs/mobvoi/v1/run_e2e.sh
new file mode 100755
index 00000000000..7976a3a7593
--- /dev/null
+++ b/egs/mobvoi/v1/run_e2e.sh
@@ -0,0 +1,302 @@
+#!/bin/bash
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+# This recipe uses E2E LF-MMI training which doesn't require GMM training to obtain alignments.
+# Its performance is slightly better than those based on alignments (cross-entropy or regular LF-MMI)
+# on this dataset.
+
+stage=0
+
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -euo pipefail
+
+if [ $stage -le 0 ]; then
+  local/mobvoi_data_download.sh
+  echo "$0: Extracted all datasets into data/download/"
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: Splitting datasets..."
+  local/split_datasets.sh
+  echo "$0: text and utt2spk have been generated in data/{train|dev|eval}."
+fi
+    
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing wav.scp..."
+  local/prepare_wav.py data
+  echo "wav.scp has been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Extracting MFCC..."
+  for folder in train dev eval; do
+    dir=data/$folder
+    utils/fix_data_dir.sh $dir
+    steps/make_mfcc.sh --cmd "$train_cmd" --nj 16 $dir
+    steps/compute_cmvn_stats.sh $dir
+    utils/fix_data_dir.sh $dir
+    utils/data/get_utt2dur.sh $dir
+    utils/validate_data_dir.sh $dir
+  done
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: Post processing transcripts..."
+  for folder in train dev eval; do
+    dir=data/$folder
+    export LC_ALL=en_US.UTF-8
+    cat $dir/text | awk '{if ($2=="嗨小问" || $2=="嗨小问嗨小问") {print $1,"嗨小问";} else {print $1,"FREETEXT"}}' > $dir/text.tmp || exit 1
+    export LC_ALL=C
+    cat $dir/text.tmp > $dir/text || exit 1
+    rm -f $dir/text.tmp 2>/dev/null || true
+  done
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  local/prepare_dict.sh
+  utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.5 \
+    --position-dependent-phones false \
+    data/local/dict "<sil>" data/lang/temp data/lang
+fi
+
+if [ $stage -le 6 ]; then
+  id_sil=`cat data/lang/words.txt | grep "<sil>" | awk '{print $2}'`
+  id_freetext=`cat data/lang/words.txt | grep "FREETEXT" | awk '{print $2}'`
+  export LC_ALL=en_US.UTF-8
+  id_word=`cat data/lang/words.txt | grep "嗨小问" | awk '{print $2}'`
+  export LC_ALL=C
+  mkdir -p data/lang/lm
+  cat <<EOF > data/lang/lm/fst.txt
+0 1 $id_sil $id_sil
+0 4 $id_sil $id_sil 7.0
+1 4 $id_freetext $id_freetext 0.0
+4 0 $id_sil $id_sil
+1 2 $id_word $id_word 1.1
+2 0 $id_sil $id_sil
+0
+EOF
+  fstcompile data/lang/lm/fst.txt data/lang/G.fst
+  set +e
+  fstisstochastic data/lang/G.fst
+  set -e
+  utils/validate_lang.pl data/lang
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: subsegmenting for the training data..."
+  srcdir=data/train
+  utils/data/convert_data_dir_to_whole.sh $srcdir ${srcdir}_whole
+
+  utils/data/get_segments_for_data.sh $srcdir > ${srcdir}_whole/segments
+  utils/filter_scp.pl <(awk '{if ($2 == "FREETEXT") print $1}' ${srcdir}_whole/text) \
+    ${srcdir}_whole/segments >${srcdir}_whole/neg_segments
+  utils/filter_scp.pl --exclude ${srcdir}_whole/neg_segments ${srcdir}_whole/segments \
+    >${srcdir}_whole/pos_segments
+  utils/filter_scp.pl ${srcdir}_whole/pos_segments ${srcdir}_whole/utt2dur >${srcdir}_whole/pos_utt2dur
+  local/get_random_subsegments.py --overlap-duration=0.3 --max-remaining-duration=0.3 \
+    ${srcdir}_whole/neg_segments ${srcdir}_whole/pos_utt2dur | \
+    cat ${srcdir}_whole/pos_segments - | sort >${srcdir}_whole/sub_segments
+  utils/data/subsegment_data_dir.sh ${srcdir}_whole \
+    ${srcdir}_whole/sub_segments data/train_segmented
+  awk '{print $1,$2}' ${srcdir}_whole/sub_segments | \
+    utils/apply_map.pl -f 2 ${srcdir}_whole/text >data/train_segmented/text
+  utils/data/extract_wav_segments_data_dir.sh --nj 50 --cmd "$train_cmd" \
+    data/train_segmented data/train_shorter
+  steps/compute_cmvn_stats.sh data/train_shorter
+  utils/fix_data_dir.sh data/train_shorter
+  utils/validate_data_dir.sh data/train_shorter
+fi
+
+# In this section, we augment the training data with reverberation,
+# noise, music, and babble, and combined it with the clean data.
+if [ $stage -le 8 ]; then
+  utils/data/get_utt2dur.sh data/train_shorter
+  cp data/train_shorter/utt2dur data/train_shorter/reco2dur
+  # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+  [ ! -f rirs_noises.zip ] && wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+  [ ! -d "RIRS_NOISES" ] && unzip rirs_noises.zip
+
+  # Make a version with reverberated speech
+  rvb_opts=()
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+  # Make a reverberated version of the SWBD+SRE list.  Note that we don't add any
+  # additive noise here.
+  steps/data/reverberate_data_dir.py \
+    "${rvb_opts[@]}" \
+    --speech-rvb-probability 1 \
+    --prefix "rev" \
+    --pointsource-noise-addition-probability 0 \
+    --isotropic-noise-addition-probability 0 \
+    --num-replications 1 \
+    --source-sampling-rate 16000 \
+    data/train_shorter data/train_shorter_reverb
+  cat data/train_shorter/utt2dur | awk -v name=rev1 '{print name"-"$0}' >data/train_shorter_reverb/utt2dur
+
+  # Prepare the MUSAN corpus, which consists of music, speech, and noise
+  # suitable for augmentation.
+  steps/data/make_musan.sh /export/corpora/JHU/musan data
+
+  # Get the duration of the MUSAN recordings.  This will be used by the
+  # script augment_data_dir.py.
+  for name in speech noise music; do
+    utils/data/get_utt2dur.sh data/musan_${name}
+    cp data/musan_${name}/utt2dur data/musan_${name}/reco2dur
+  done
+
+  # Augment with musan_noise
+  export LC_ALL=en_US.UTF-8
+  steps/data/augment_data_dir.py --utt-prefix "noise" --modify-spk-id true --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_shorter data/train_shorter_noise
+  # Augment with musan_music
+  steps/data/augment_data_dir.py --utt-prefix "music" --modify-spk-id true --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_shorter data/train_shorter_music
+  # Augment with musan_speech
+  steps/data/augment_data_dir.py --utt-prefix "babble" --modify-spk-id true --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_shorter data/train_shorter_babble
+  export LC_ALL=C
+fi
+
+if [ $stage -le 9 ]; then
+  # Now make MFCC features
+  for name in reverb noise music babble; do
+    steps/make_mfcc.sh --nj 16 --cmd "$train_cmd" \
+      data/train_shorter_${name} || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_${name}
+    utils/fix_data_dir.sh data/train_shorter_${name}
+    utils/validate_data_dir.sh data/train_shorter_${name}
+  done
+fi
+
+combined_train_set=train_shorter_combined
+aug_affix="reverb noise music babble"
+if [ $stage -le 10 ]; then
+  aug_affix="reverb noise music babble"
+  eval utils/combine_data.sh data/${combined_train_set} data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}
+fi
+
+if [ -f data/${combined_train_set}_spe2e_hires/feats.scp ]; then
+  echo "$0: It seems that features for the perturbed training data already exist."
+  echo "If you want to extract them anyway, remove them first and run this"
+  echo "stage again. Skipping this stage..."
+else
+  if [ $stage -le 11 ]; then
+    echo "$0: perturbing the training data to allowed lengths..."
+    utils/data/get_utt2dur.sh data/${combined_train_set}  # necessary for the next command
+
+    # 12 in the following command means the allowed lengths are spaced
+    # by 12% change in length.
+    utils/data/perturb_speed_to_allowed_lengths.py --speed-perturb false 12 data/${combined_train_set} \
+                                                   data/${combined_train_set}_e2e_hires
+    cat data/${combined_train_set}_e2e_hires/utt2dur | \
+      awk '{print $1 " " substr($1,5)}' >data/${combined_train_set}_e2e_hires/utt2uniq.tmp
+    utils/apply_map.pl -f 2 data/${combined_train_set}/utt2uniq \
+      <data/${combined_train_set}_e2e_hires/utt2uniq.tmp >data/${combined_train_set}_e2e_hires/utt2uniq
+    rm -f data/${combined_train_set}_e2e_hires/utt2uniq.tmp 2>/dev/null || true
+    utils/fix_data_dir.sh data/${combined_train_set}_e2e_hires
+
+    utils/data/get_utt2dur.sh data/train_shorter  # necessary for the next command
+    utils/data/perturb_speed_to_allowed_lengths.py 12 data/train_shorter data/train_shorter_spe2e_hires
+    cat data/train_shorter_spe2e_hires/utt2dur | \
+      awk '{print $1 " " substr($1,5)}' >data/train_shorter_spe2e_hires/utt2uniq
+    utils/fix_data_dir.sh data/train_shorter_spe2e_hires
+    utils/combine_data.sh data/${combined_train_set}_spe2e_hires data/${combined_train_set}_e2e_hires data/train_shorter_spe2e_hires
+    cat data/train_shorter_spe2e_hires/allowed_lengths.txt >data/${combined_train_set}_spe2e_hires/allowed_lengths.txt
+  fi
+
+  if [ $stage -le 12 ]; then
+    echo "$0: extracting MFCC features for the training data..."
+    mfccdir=data/${combined_train_set}_spe2e_hires/data
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+      utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+    fi
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+                       --cmd "$train_cmd" \
+                       data/${combined_train_set}_spe2e_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${combined_train_set}_spe2e_hires || exit 1;
+    utils/fix_data_dir.sh data/${combined_train_set}_spe2e_hires
+    utils/validate_data_dir.sh data/${combined_train_set}_spe2e_hires
+  fi
+fi
+
+if [ $stage -le 13 ]; then
+  if [ -f data/eval_hires/feats.scp ]; then
+    echo "$0: It seems that features for the test sets already exist."
+    echo "skipping this stage..."
+  else
+    echo "$0: extracting MFCC features for the test sets"
+    for datadir in dev eval; do
+      utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 --mfcc-config conf/mfcc_hires.conf \
+        --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+      steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+      utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+    done
+  fi
+fi
+
+if [ $stage -le 14 ]; then
+  local/chain/run_e2e_tdnn.sh --train-set ${combined_train_set}_spe2e
+fi
+
+combined_train_set=train_shorter_sp_combined
+if [ -f data/${combined_train_set}_hires/feats.scp ]; then
+  echo "$0: It seems that features for the perturbed training data already exist."
+  echo "If you want to extract them anyway, remove them first and run this"
+  echo "stage again. Skipping this stage..."
+else
+  if [ $stage -le 15 ]; then
+    echo "$0: preparing for speed-perturbed data"
+    utils/data/perturb_data_dir_speed_3way.sh data/train_shorter data/train_shorter_sp_hires
+    echo "$0: creating high-resolution MFCC features for speed-perturbed data"
+    mfccdir=data/train_shorter_sp_hires/data
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+      utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+    fi
+
+    # do volume-perturbation on the training data prior to extracting hires
+    # features; this helps make trained nnets more invariant to test data volume.
+    utils/data/perturb_data_dir_volume.sh data/train_shorter_sp_hires || exit 1;
+
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/train_shorter_sp_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_sp_hires || exit 1;
+    utils/fix_data_dir.sh data/train_shorter_sp_hires || exit 1;
+  fi
+
+  if [ $stage -le 16 ]; then
+    for name in $aug_affix; do
+      echo "$0: creating high-resolution MFCC features for train_shorter_${name}"
+      mfccdir=data/train_shorter_${name}_hires/data
+      if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+        utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+      fi
+      utils/copy_data_dir.sh data/train_shorter_${name} data/train_shorter_${name}_hires
+      steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+        --cmd "$train_cmd" data/train_shorter_${name}_hires || exit 1;
+      steps/compute_cmvn_stats.sh data/train_shorter_${name}_hires || exit 1;
+      utils/fix_data_dir.sh data/train_shorter_${name}_hires || exit 1;
+    done
+    eval utils/combine_data.sh data/${combined_train_set}_hires data/train_shorter_sp_hires \
+      data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}_hires
+  fi
+fi
+
+if [ $stage -le 17 ]; then
+  echo "$0: Aligning the training data using the e2e chain model..."
+  steps/nnet3/align.sh --nj 50 --cmd "$train_cmd" \
+                       --use-gpu false \
+                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
+                       data/${combined_train_set}_hires data/lang exp/chain/e2e_tdnn_1a exp/chain/e2e_ali_${combined_train_set}
+fi
+
+if [ $stage -le 18 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
+ local/chain/run_tdnn_e2eali.sh --train-set ${combined_train_set} --e2echain-model-dir exp/chain/e2e_tdnn_1a
+fi
+
+exit 0
diff --git a/egs/mobvoi/v1/steps b/egs/mobvoi/v1/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/mobvoi/v1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/mobvoi/v1/utils b/egs/mobvoi/v1/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/mobvoi/v1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/mobvoihotwords/README.txt b/egs/mobvoihotwords/README.txt
new file mode 100644
index 00000000000..4c43c3f8eb5
--- /dev/null
+++ b/egs/mobvoihotwords/README.txt
@@ -0,0 +1,17 @@
+
+ The MobvoiHotwords dataset is a ~144-hour corpus of wake word corpus which is
+ publicly availble on https://www.openslr.org/87
+
+ For wake word data, wake word utterances contain either 'Hi xiaowen' or 'Nihao
+ Wenwen' are collected. For each wake word, there are about 36k utterances. All
+ wake word data is collected from 788 subjects, ages 3-65, with different
+ distances from the smart speaker (1, 3 and 5 meters). Different noises
+ (typical home environment noises like music and TV) with varying sound
+ pressure levels are played in the background during the collection.
+
+ The recipe is in v1/
+
+ The E2E LF-MMI recipe does not require any prior alignments for training
+ LF-MMI, making the alignment more flexible during training. It can be optionally
+ followed by a regular LF-MMI training to further improve the performance.
+
diff --git a/egs/mobvoihotwords/v1/cmd.sh b/egs/mobvoihotwords/v1/cmd.sh
new file mode 100644
index 00000000000..fc5d4aa9e1c
--- /dev/null
+++ b/egs/mobvoihotwords/v1/cmd.sh
@@ -0,0 +1,24 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl"
+export decode_cmd="queue.pl --mem 4G"
+# the use of cuda_cmd is deprecated, used only in 'nnet1',
+export cuda_cmd="queue.pl --gpu 1"
+
+if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then
+  queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+  export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+  export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+  export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
+
diff --git a/egs/mobvoihotwords/v1/conf/mfcc.conf b/egs/mobvoihotwords/v1/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/mobvoihotwords/v1/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/mobvoihotwords/v1/conf/mfcc_hires.conf b/egs/mobvoihotwords/v1/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..d96b86ddfcb
--- /dev/null
+++ b/egs/mobvoihotwords/v1/conf/mfcc_hires.conf
@@ -0,0 +1,9 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=20    # low cutoff frequency for mel bins
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
diff --git a/egs/mobvoihotwords/v1/conf/online_cmvn.conf b/egs/mobvoihotwords/v1/conf/online_cmvn.conf
new file mode 100644
index 00000000000..a173510e433
--- /dev/null
+++ b/egs/mobvoihotwords/v1/conf/online_cmvn.conf
@@ -0,0 +1,3 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
+--norm-means=true
+--norm-vars=false
diff --git a/egs/mobvoihotwords/v1/local/add_prefix_to_scp.py b/egs/mobvoihotwords/v1/local/add_prefix_to_scp.py
new file mode 120000
index 00000000000..b6750c78e16
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/add_prefix_to_scp.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/add_prefix_to_scp.py
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/chain/build_tree.sh b/egs/mobvoihotwords/v1/local/chain/build_tree.sh
new file mode 120000
index 00000000000..fb4d74cc9ae
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/build_tree.sh
@@ -0,0 +1 @@
+../../../../mobvoi/v1/local/chain/build_tree.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/chain/run_e2e_tdnn.sh b/egs/mobvoihotwords/v1/local/chain/run_e2e_tdnn.sh
new file mode 120000
index 00000000000..891eec02423
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/run_e2e_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/chain/run_tdnn.sh b/egs/mobvoihotwords/v1/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/chain/run_tdnn_e2eali.sh b/egs/mobvoihotwords/v1/local/chain/run_tdnn_e2eali.sh
new file mode 120000
index 00000000000..38f0bd07e6c
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/run_tdnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_e2eali_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/chain/tuning/run_e2e_tdnn_1a.sh b/egs/mobvoihotwords/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
new file mode 100755
index 00000000000..a0df6b0ce14
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
@@ -0,0 +1,258 @@
+#!/bin/bash
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+minibatch_size=150=128,64/300=100,64,32/600=50,32,16/1200=16,8
+common_egs_dir=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_combined_spe2e
+test_sets="dev eval"
+wake_word0="HiXiaowen"
+wake_word1="NihaoWenwen"
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+lang_decode=data/lang_e2e_decode
+tree_dir=exp/chain/e2e_tree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_tdnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang ..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    local/gen_topo.pl 4 1 $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+  mkdir -p $tree_dir
+  id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+  id_word0=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+  id_word1=`cat data/lang/phones.txt | grep "nihaowenwen" | awk '{print $2}'`
+  id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+  cat <<EOF > $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+0 5 $id_sil $id_sil
+1 2 $id_word0 $id_word0
+2 3 $id_sil $id_sil
+1 4 $id_word1 $id_word1
+4 5 $id_sil $id_sil
+1 6 $id_freetext $id_freetext
+6 7 $id_sil $id_sil
+3 2.3
+5 2.3
+7 0.0
+EOF
+  fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+  fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+  mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$train_cmd" \
+                                       data/${train_set}_hires $lang $tree_dir
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-2,-1,0,1,2) $affine_opts dim=$dim
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  linear-component name=prefinal-l dim=30 $linear_opts
+
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
+    --chain.frame-subsampling-factor=3 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.00003 \
+    --trainer.optimization.final-effective-lrate 0.000003 \
+    --trainer.optimization.backstitch-training-scale $bs_scale \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.optimization.momentum=0.0 \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--num-utts-subset 300 --online-cmvn $online_cmvn" \
+    --cleanup.remove-egs=$remove_egs \
+    --feat-dir data/${train_set}_hires \
+    --tree-dir $tree_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    --online-cmvn-config conf/online_cmvn.conf \
+    $lang ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+  for wake_word in $wake_word0 $wake_word1; do
+    if [[ "$wake_word" == "$wake_word0" ]]; then
+      wake_word0_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+      wake_word1_cost_range="0.0"
+    else
+      wake_word0_cost_range="0.0"
+      wake_word1_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+    fi
+    for wake_word0_cost in $wake_word0_cost_range; do
+      for wake_word1_cost in $wake_word1_cost_range; do
+        rm -rf $lang_decode
+        utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+          --position-dependent-phones false \
+          data/local/dict "<sil>" $lang_decode/temp $lang_decode
+
+        sil_id=`cat $lang_decode/words.txt | grep "<sil>" | awk '{print $2}'`
+        freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+        id0=`cat $lang_decode/words.txt | grep $wake_word0 | awk '{print $2}'`
+        id1=`cat $lang_decode/words.txt | grep $wake_word1 | awk '{print $2}'`
+        mkdir -p $lang_decode/lm
+        cat <<EOF > $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.0
+4 0 $sil_id $sil_id
+1 2 $id0 $id0 $wake_word0_cost
+1 3 $id1 $id1 $wake_word1_cost
+2 0 $sil_id $sil_id
+3 0 $sil_id $sil_id
+0
+EOF
+        fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+        set +e
+        fstisstochastic $lang_decode/G.fst
+        set -e
+        utils/validate_lang.pl $lang_decode
+        cp $lang/topo $lang_decode/topo
+
+        utils/lang/check_phones_compatible.sh \
+          data/lang/phones.txt $lang_decode/phones.txt
+        rm -rf $tree_dir/graph_online/HCLG.fst
+        utils/mkgraph.sh \
+          --self-loop-scale 1.0 $lang_decode \
+          $dir $tree_dir/graph_online || exit 1;
+
+        frames_per_chunk=150
+        for data in $test_sets; do
+          (  
+            nj=30
+            steps/online/nnet3/decode_wake_word.sh \
+              --beam 10 --acwt 1.0 \
+              --wake-word $wake_word \
+              --extra-left-context-initial 0 \
+              --frames-per-chunk $frames_per_chunk \
+              --nj $nj --cmd "$decode_cmd" \
+              $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_${wake_word}_cost${wake_word0_cost}_${wake_word1_cost} || exit 1
+          ) || touch $dir/.error &
+        done
+        wait
+        [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+      done
+    done
+  done
+  for data in $test_sets; do
+    for wake_word in $wake_word0 $wake_word1; do
+      echo "Results on $data set with wake word ${wake_word}:"
+      cat ${dir}_online/decode_${data}_${wake_word}_cost*/scoring_kaldi/all_results
+    done
+  done
+fi
diff --git a/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_1a.sh b/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..edb1a8524db
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,312 @@
+#!/bin/bash
+#
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+# Apache 2.0
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=30
+gmm=mono
+train_stage=-5 # starting from -5 to skip phone-lm estimation
+get_egs_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+chunk_width=140,100,160
+common_egs_dir=
+reporting_email=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter
+combined_train_set=train_shorter_sp_combined
+test_sets="dev eval"
+aug_prefix="rev1 noise music babble"
+wake_word0="HiXiaowen"
+wake_word1="NihaoWenwen"
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+lat_dir=exp/chain/${gmm}_${train_set}_sp_lats
+combined_lat_dir=exp/chain/${gmm}_${combined_train_set}_lats
+combined_train_data_dir=data/${combined_train_set}_hires
+lores_train_data_dir=data/${train_set}_sp
+
+lang=data/lang_chain
+lang_decode=data/lang_chain_decode
+tree_dir=exp/chain/tree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/tdnn_${affix}
+
+for f in $combined_train_data_dir/feats.scp \
+  $lores_train_data_dir/feats.scp $gmm_dir/final.mdl $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang ..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    rm -rf $lang
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    local/gen_topo.pl 4 1 $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom)
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 3 ]; then
+  local/copy_lat_dir.sh --nj 75 --cmd "$train_cmd" --utt-prefixes "$aug_prefix" \
+    $combined_train_data_dir $lat_dir $combined_lat_dir
+fi
+
+if [ $stage -le 4 ]; then
+  # Build a tree using our new topology.  We know we have alignments from
+  # steps/align_fmllr.sh, so use those.
+  # The num-leaves is always somewhat less than the num-leaves from the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  local/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --cmd "$train_cmd" ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+
+  echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+  id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+  id_word0=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+  id_word1=`cat data/lang/phones.txt | grep "nihaowenwen" | awk '{print $2}'`
+  id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+  cat <<EOF > $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+0 5 $id_sil $id_sil
+1 2 $id_word0 $id_word0
+2 3 $id_sil $id_sil
+1 4 $id_word1 $id_word1
+4 5 $id_sil $id_sil
+1 6 $id_freetext $id_freetext
+6 7 $id_sil $id_sil
+3 2.3
+5 2.3
+7 0.0
+EOF
+  fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+  fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+  mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=$dim
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  linear-component name=prefinal-l dim=30 $linear_opts
+  
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 6 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  cp $tree_dir/phone_lm.fst $dir/phone_lm.fst
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.00005 \
+    --trainer.optimization.final-effective-lrate 0.000005 \
+    --trainer.optimization.backstitch-training-scale $bs_scale \
+    --trainer.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=0 \
+    --egs.chunk-right-context=0 \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --online-cmvn $online_cmvn" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir $combined_train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir=$combined_lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    --online-cmvn-config conf/online_cmvn.conf \
+    $lang ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+  for wake_word in $wake_word0 $wake_word1; do
+    if [[ "$wake_word" == "$wake_word0" ]]; then
+      wake_word0_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+      wake_word1_cost_range="0.0"
+    else
+      wake_word0_cost_range="0.0"
+      wake_word1_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+    fi
+    for wake_word0_cost in $wake_word0_cost_range; do
+      for wake_word1_cost in $wake_word1_cost_range; do
+        rm -rf $lang_decode
+        utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+          --position-dependent-phones false \
+          data/local/dict "<sil>" $lang_decode/temp $lang_decode
+
+        sil_id=`cat $lang_decode/words.txt | grep "<sil>" | awk '{print $2}'`
+        freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+        id0=`cat $lang_decode/words.txt | grep $wake_word0 | awk '{print $2}'`
+        id1=`cat $lang_decode/words.txt | grep $wake_word1 | awk '{print $2}'`
+        mkdir -p $lang_decode/lm
+        cat <<EOF > $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.0
+4 0 $sil_id $sil_id
+1 2 $id0 $id0 $wake_word0_cost
+1 3 $id1 $id1 $wake_word1_cost
+2 0 $sil_id $sil_id
+3 0 $sil_id $sil_id
+0
+EOF
+        fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+        set +e
+        fstisstochastic $lang_decode/G.fst
+        set -e
+        utils/validate_lang.pl $lang_decode
+        cp $lang/topo $lang_decode/topo
+
+        utils/lang/check_phones_compatible.sh \
+          data/lang/phones.txt $lang_decode/phones.txt
+        rm -rf $tree_dir/graph_online/HCLG.fst
+        utils/mkgraph.sh \
+          --self-loop-scale 1.0 $lang_decode \
+          $dir $tree_dir/graph_online || exit 1;
+
+        frames_per_chunk=150
+        for data in $test_sets; do
+          (  
+            nj=30
+            steps/online/nnet3/decode_wake_word.sh \
+              --beam 10 --acwt 1.0 \
+              --wake-word $wake_word \
+              --extra-left-context-initial 0 \
+              --frames-per-chunk $frames_per_chunk \
+              --nj $nj --cmd "$decode_cmd" \
+              $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_${wake_word}_cost${wake_word0_cost}_${wake_word1_cost} || exit 1
+          ) || touch $dir/.error &
+        done
+        wait
+        [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+      done
+    done
+  done
+  for data in $test_sets; do
+    for wake_word in $wake_word0 $wake_word1; do
+      echo "Results on $data set with wake word ${wake_word}:"
+      cat ${dir}_online/decode_${data}_${wake_word}_cost*/scoring_kaldi/all_results
+    done
+  done
+fi
diff --git a/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh b/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
new file mode 100755
index 00000000000..12c8448f65e
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
@@ -0,0 +1,302 @@
+#!/bin/bash
+#
+# Copyright 2019-2020  Daniel Povey
+#           2019-2020  Yiming Wang
+# Apache 2.0
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=30
+e2echain_model_dir=exp/chain/e2e_tdnn_1a
+train_stage=-5 # starting from -5 to skip phone-lm estimation
+get_egs_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+chunk_width=140,100,160
+common_egs_dir=
+reporting_email=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_sp_combined
+test_sets="dev eval"
+wake_word0="HiXiaowen"
+wake_word1="NihaoWenwen"
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_${train_set}
+lat_dir=exp/chain/e2e_${train_set}_lats
+train_data_dir=data/${train_set}_hires
+
+lang=data/lang_chain
+lang_decode=data/lang_chain_decode
+tree_dir=exp/chain/tree_e2e  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/tdnn_e2eali_${affix}
+
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang ..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    local/gen_topo.pl 4 1 $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom)
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj 75 --cmd "$train_cmd" \
+                      --acoustic-scale 1.0 \
+                      --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                      $train_data_dir data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments from
+  # steps/align_fmllr.sh, so use those.
+  # The num-leaves is always somewhat less than the num-leaves from the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  local/chain/build_tree.sh \
+    --frame-subsampling-factor 3 --cmd "$train_cmd" \
+    $train_data_dir $lang $ali_dir $tree_dir
+
+  echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+  id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+  id_word0=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+  id_word1=`cat data/lang/phones.txt | grep "nihaowenwen" | awk '{print $2}'`
+  id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+  cat <<EOF > $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+0 5 $id_sil $id_sil
+1 2 $id_word0 $id_word0
+2 3 $id_sil $id_sil
+1 4 $id_word1 $id_word1
+4 5 $id_sil $id_sil
+1 6 $id_freetext $id_freetext
+6 7 $id_sil $id_sil
+3 2.3
+5 2.3
+7 0.0
+EOF
+  fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+  fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+  mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=$dim
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  linear-component name=prefinal-l dim=30 $linear_opts
+  
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 6 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  cp $tree_dir/phone_lm.fst $dir/phone_lm.fst
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
+    --chain.alignment-subsampling-factor=1 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.00005 \
+    --trainer.optimization.final-effective-lrate 0.000005 \
+    --trainer.optimization.backstitch-training-scale $bs_scale \
+    --trainer.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=0 \
+    --egs.chunk-right-context=0 \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --online-cmvn $online_cmvn" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    --online-cmvn-config conf/online_cmvn.conf \
+    $lang ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+  for wake_word in $wake_word0 $wake_word1; do
+    if [[ "$wake_word" == "$wake_word0" ]]; then
+      wake_word0_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+      wake_word1_cost_range="0.0"
+    else
+      wake_word0_cost_range="0.0"
+      wake_word1_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+    fi
+    for wake_word0_cost in $wake_word0_cost_range; do
+      for wake_word1_cost in $wake_word1_cost_range; do
+        rm -rf $lang_decode
+        utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+          --position-dependent-phones false \
+          data/local/dict "<sil>" $lang_decode/temp $lang_decode
+
+        sil_id=`cat $lang_decode/words.txt | grep "<sil>" | awk '{print $2}'`
+        freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+        id0=`cat $lang_decode/words.txt | grep $wake_word0 | awk '{print $2}'`
+        id1=`cat $lang_decode/words.txt | grep $wake_word1 | awk '{print $2}'`
+        mkdir -p $lang_decode/lm
+        cat <<EOF > $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.0
+4 0 $sil_id $sil_id
+1 2 $id0 $id0 $wake_word0_cost
+1 3 $id1 $id1 $wake_word1_cost
+2 0 $sil_id $sil_id
+3 0 $sil_id $sil_id
+0
+EOF
+        fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+        set +e
+        fstisstochastic $lang_decode/G.fst
+        set -e
+        utils/validate_lang.pl $lang_decode
+        cp $lang/topo $lang_decode/topo
+
+        utils/lang/check_phones_compatible.sh \
+          data/lang/phones.txt $lang_decode/phones.txt
+        rm -rf $tree_dir/graph_online/HCLG.fst
+        utils/mkgraph.sh \
+          --self-loop-scale 1.0 $lang_decode \
+          $dir $tree_dir/graph_online || exit 1;
+
+        frames_per_chunk=150
+        for data in $test_sets; do
+          (  
+            nj=30
+            steps/online/nnet3/decode_wake_word.sh \
+              --beam 10 --acwt 1.0 \
+              --wake-word $wake_word \
+              --extra-left-context-initial 0 \
+              --frames-per-chunk $frames_per_chunk \
+              --nj $nj --cmd "$decode_cmd" \
+              $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_${wake_word}_cost${wake_word0_cost}_${wake_word1_cost} || exit 1
+          ) || touch $dir/.error &
+        done
+        wait
+        [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+      done
+    done
+  done
+  for data in $test_sets; do
+    for wake_word in $wake_word0 $wake_word1; do
+      echo "Results on $data set with wake word ${wake_word}:"
+      cat ${dir}_online/decode_${data}_${wake_word}_cost*/scoring_kaldi/all_results
+    done
+  done
+fi
diff --git a/egs/mobvoihotwords/v1/local/compute_metrics.py b/egs/mobvoihotwords/v1/local/compute_metrics.py
new file mode 120000
index 00000000000..695a2ca5f6d
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/compute_metrics.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/compute_metrics.py
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/copy_lat_dir.sh b/egs/mobvoihotwords/v1/local/copy_lat_dir.sh
new file mode 120000
index 00000000000..6be684730ad
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/copy_lat_dir.sh
@@ -0,0 +1 @@
+../../../../scripts/wakeword/copy_lat_dir.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/gen_topo.pl b/egs/mobvoihotwords/v1/local/gen_topo.pl
new file mode 120000
index 00000000000..fd5959cebaf
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/gen_topo.pl
@@ -0,0 +1 @@
+../../../../scripts/wakeword/gen_topo.pl
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/get_random_subsegments.py b/egs/mobvoihotwords/v1/local/get_random_subsegments.py
new file mode 120000
index 00000000000..24631471ff6
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/get_random_subsegments.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/get_random_subsegments.py
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/mobvoi_data_download.sh b/egs/mobvoihotwords/v1/local/mobvoi_data_download.sh
new file mode 100755
index 00000000000..9857c97ee80
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/mobvoi_data_download.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# Copyright  2018-2020  Yiming Wang
+#            2018-2020  Daniel Povey
+# Apache 2.0
+
+[ -f ./path.sh ] && . ./path.sh
+
+dl_dir=data/download
+
+mkdir -p $dl_dir
+
+dataset=mobvoi_hotword_dataset.tgz
+resources=mobvoi_hotword_dataset_resources.tgz
+
+# base url for downloads.
+data_url=http://www.openslr.org/resources/87
+
+if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
+  src_path=/export/fs04/a07/ywang/mobvoihotwords
+else
+  src_path=$dl_dir
+fi
+
+if [ ! -f $src_path/$dataset ] || [ ! -f $src_path/$resources ]; then
+  if ! which wget >/dev/null; then
+    echo "$0: wget is not installed."
+    exit 1;
+  fi
+
+  if [ ! -f $src_path/$dataset ]; then
+    echo "$0: downloading data from $data_url/$dataset.  This may take some time, please be patient."
+    if ! wget --no-check-certificate -O $dl_dir/$dataset $data_url/$dataset; then
+      echo "$0: error executing wget $data_url/$dataset"
+      exit 1;
+    fi
+  fi
+
+  if [ ! -f $src_path/$resources ]; then
+    if ! wget --no-check-certificate -O $dl_dir/$resources $data_url/$resources; then
+      echo "$0: error executing wget $data_url/$resources"
+      exit 1;
+    fi
+  fi
+fi
+
+if [ -d $dl_dir/$(basename "$dataset" .tgz) ]; then
+  echo "Not extracting $(basename "$dataset" .tgz) as it is already there."
+else
+  echo "Extracting $dataset..."
+  tar -xvzf $src_path/$dataset -C $dl_dir || exit 1;
+  echo "Done extracting $dataset."
+fi
+
+if [ -d $dl_dir/$(basename "$resources" .tgz) ]; then
+  echo "Not extracting $(basename "$dataset" .tar.gz) as it is already there."
+else
+  echo "Extracting $resources..."
+  tar -xvzf $src_path/$resources -C $dl_dir || exit 1;
+  echo "Done extracting $resources."
+fi
+
+exit 0
diff --git a/egs/mobvoihotwords/v1/local/prepare_data.py b/egs/mobvoihotwords/v1/local/prepare_data.py
new file mode 100755
index 00000000000..3e11d313491
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/prepare_data.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2020  Yiming Wang
+#           2018-2020  Daniel Povey
+# Apache 2.0
+
+""" This script prepares the Mobvoi data into kaldi format.
+"""
+
+
+import argparse
+import os
+import sys
+import json
+
+def main():
+    parser = argparse.ArgumentParser(description="""Prepare data.""")
+    parser.add_argument("wav_dir", type=str,
+                        help="dir containing all the wav files")
+    parser.add_argument("path", type=str,
+                        help="path to the json file")
+    parser.add_argument("out_dir", type=str,
+                        help="out dir")
+    parser.add_argument("--non-wake-word", type=str, default="FREETEXT",
+                        help="non-wake word transcript")
+    args = parser.parse_args()
+
+    assert args.non_wake_word != "HiXiaowen" and args.non_wake_word != "NihaoWenwen"
+    with open(args.path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+        utt_id, spk_id, wav_file, label = [], [], [], []
+        for entry in data:
+            utt_id.append(entry["utt_id"])
+            spk_id.append(entry["speaker_id"])
+            label.append(entry["keyword_id"])
+
+    abs_dir = os.path.abspath(args.wav_dir)
+    with open(os.path.join(args.out_dir, "wav.scp"), "w", encoding="utf-8") as f_wav, \
+        open(os.path.join(args.out_dir, "text"), "w", encoding="utf-8") as f_text, \
+        open(os.path.join(args.out_dir, 'utt2spk'), 'w', encoding="utf-8") as f_utt2spk:
+        for utt, spk, l in zip(utt_id, spk_id, label):
+            if spk is None:
+                spk = utt  # deal with None speaker
+            f_wav.write(spk + "-" + utt + " " + os.path.join(abs_dir, utt + ".wav") + "\n")
+            if l == 0:
+                text = "HiXiaowen"
+            elif l == 1:
+                text = "NihaoWenwen"
+            else:
+                assert l == -1
+                text = args.non_wake_word
+            f_text.write(spk + "-" + utt + " " + text + "\n")
+            f_utt2spk.write(spk + "-" + utt + " " + spk + "\n")
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/mobvoihotwords/v1/local/prepare_dict.sh b/egs/mobvoihotwords/v1/local/prepare_dict.sh
new file mode 100755
index 00000000000..afe17d57d3a
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/prepare_dict.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+
+set -e
+dir=data/local/dict
+
+. ./utils/parse_options.sh
+
+mkdir -p $dir
+
+# First get the set of all letters that occur in data/train/text
+echo "hixiaowen" > $dir/nonsilence_phones.txt
+echo "nihaowenwen" >> $dir/nonsilence_phones.txt
+echo "freetext" >> $dir/nonsilence_phones.txt
+
+echo "HiXiaowen hixiaowen" > $dir/lexicon.txt
+echo "NihaoWenwen nihaowenwen" >> $dir/lexicon.txt
+echo "FREETEXT freetext" >> $dir/lexicon.txt
+echo "<sil> SIL" >> $dir/lexicon.txt
+
+echo SIL > $dir/silence_phones.txt
+
+echo SIL >$dir/optional_silence.txt
+
+echo -n "" >$dir/extra_questions.txt
diff --git a/egs/mobvoihotwords/v1/local/score_online.sh b/egs/mobvoihotwords/v1/local/score_online.sh
new file mode 120000
index 00000000000..c2b12f23b08
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/score_online.sh
@@ -0,0 +1 @@
+../../../mobvoi/v1/local/score_online.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/wer_output_filter b/egs/mobvoihotwords/v1/local/wer_output_filter
new file mode 100755
index 00000000000..bb4de1d1572
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/wer_output_filter
@@ -0,0 +1,24 @@
+#!/usr/bin/env perl
+# Copyright 2012-2014  Johns Hopkins University (Author: Yenda Trmal)
+# Apache 2.0
+use utf8;
+
+use open qw(:encoding(utf8));
+binmode STDIN, ":utf8";
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+
+while (<>) {
+  @F = split " ";
+  print $F[0] . " "; 
+  foreach $s (@F[1..$#F]) {
+    if ($s =~ /\<.*\>/) {
+      print "";
+    } else {
+      print "$s "
+    }
+  }
+  print "\n";
+}
+
+
diff --git a/egs/mobvoihotwords/v1/path.sh b/egs/mobvoihotwords/v1/path.sh
new file mode 100755
index 00000000000..2d17b17a84a
--- /dev/null
+++ b/egs/mobvoihotwords/v1/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/mobvoihotwords/v1/run.sh b/egs/mobvoihotwords/v1/run.sh
new file mode 100755
index 00000000000..6a5adc14527
--- /dev/null
+++ b/egs/mobvoihotwords/v1/run.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+stage=0
+
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -euo pipefail
+
+if [ $stage -le 0 ]; then
+  local/mobvoi_data_download.sh
+  echo "$0: Extracted all datasets into data/download/"
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: Preparing datasets..."
+  wav_dir=data/download/mobvoi_hotword_dataset
+  for folder in train dev eval; do
+    mkdir -p data/$folder
+    for prefix in p n; do
+      mkdir -p data/${prefix}_$folder
+      json_path=data/download/mobvoi_hotword_dataset_resources/${prefix}_$folder.json
+      if [ $folder = "eval" ]; then
+        json_path=data/download/mobvoi_hotword_dataset_resources/${prefix}_test.json
+      fi
+      local/prepare_data.py $wav_dir $json_path data/${prefix}_$folder --non-wake-word "FREETEXT"
+    done
+    cat data/p_$folder/wav.scp data/n_$folder/wav.scp > data/$folder/wav.scp
+    cat data/p_$folder/text data/n_$folder/text > data/$folder/text
+    cat data/p_$folder/utt2spk data/n_$folder/utt2spk > data/$folder/utt2spk
+    rm -rf data/p_$folder data/n_$folder
+  done
+  echo "$0: text, utt2spk and wav.scp have been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Extracting MFCC..."
+  for folder in train dev eval; do
+    dir=data/$folder
+    utils/fix_data_dir.sh $dir
+    steps/make_mfcc.sh --cmd "$train_cmd" --nj 16 $dir
+    steps/compute_cmvn_stats.sh $dir
+    utils/fix_data_dir.sh $dir
+    utils/data/get_utt2dur.sh $dir
+    utils/validate_data_dir.sh $dir
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  local/prepare_dict.sh
+  utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.5 \
+    --position-dependent-phones false \
+    data/local/dict "<sil>" data/lang/temp data/lang
+fi
+
+if [ $stage -le 4 ]; then
+  id_sil=`cat data/lang/words.txt | grep "<sil>" | awk '{print $2}'`
+  id_freetext=`cat data/lang/words.txt | grep "FREETEXT" | awk '{print $2}'`
+  id_word0=`cat data/lang/words.txt | grep "HiXiaowen" | awk '{print $2}'`
+  id_word1=`cat data/lang/words.txt | grep "NihaoWenwen" | awk '{print $2}'`
+  mkdir -p data/lang/lm
+  cat <<EOF > data/lang/lm/fst.txt
+0 1 $id_sil $id_sil
+0 4 $id_sil $id_sil 7.0
+1 4 $id_freetext $id_freetext 0.0
+4 0 $id_sil $id_sil
+1 2 $id_word0 $id_word0 2.3
+2 0 $id_sil $id_sil
+1 3 $id_word1 $id_word1 2.3
+3 0 $id_sil $id_sil
+0
+EOF
+  fstcompile data/lang/lm/fst.txt data/lang/G.fst
+  set +e
+  fstisstochastic data/lang/G.fst
+  set -e
+  utils/validate_lang.pl data/lang
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: subsegmenting for the training data..."
+  srcdir=data/train
+  utils/data/convert_data_dir_to_whole.sh $srcdir ${srcdir}_whole
+
+  utils/data/get_segments_for_data.sh $srcdir > ${srcdir}_whole/segments
+  utils/filter_scp.pl <(awk '{if ($2 == "FREETEXT") print $1}' ${srcdir}_whole/text) \
+    ${srcdir}_whole/segments >${srcdir}_whole/neg_segments
+  utils/filter_scp.pl --exclude ${srcdir}_whole/neg_segments ${srcdir}_whole/segments \
+    >${srcdir}_whole/pos_segments
+  utils/filter_scp.pl ${srcdir}_whole/pos_segments ${srcdir}_whole/utt2dur >${srcdir}_whole/pos_utt2dur
+  local/get_random_subsegments.py --overlap-duration=0.3 --max-remaining-duration=0.3 \
+    ${srcdir}_whole/neg_segments ${srcdir}_whole/pos_utt2dur | \
+    cat ${srcdir}_whole/pos_segments - | sort >${srcdir}_whole/sub_segments
+  utils/data/subsegment_data_dir.sh ${srcdir}_whole \
+    ${srcdir}_whole/sub_segments data/train_segmented
+  awk '{print $1,$2}' ${srcdir}_whole/sub_segments | \
+    utils/apply_map.pl -f 2 ${srcdir}_whole/text >data/train_segmented/text
+  utils/data/extract_wav_segments_data_dir.sh --nj 50 --cmd "$train_cmd" \
+    data/train_segmented data/train_shorter
+  steps/compute_cmvn_stats.sh data/train_shorter
+  utils/fix_data_dir.sh data/train_shorter
+  utils/validate_data_dir.sh data/train_shorter
+fi
+
+# In this section, we augment the training data with reverberation,
+# noise, music, and babble, and combined it with the clean data.
+if [ $stage -le 6 ]; then
+  utils/data/get_utt2dur.sh data/train_shorter
+  cp data/train/utt2dur data/train/reco2dur
+  # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+  [ ! -f rirs_noises.zip ] && wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+  [ ! -d "RIRS_NOISES" ] && unzip rirs_noises.zip
+
+  # Make a version with reverberated speech
+  rvb_opts=()
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+  # Make a reverberated version of the SWBD+SRE list.  Note that we don't add any
+  # additive noise here.
+  steps/data/reverberate_data_dir.py \
+    "${rvb_opts[@]}" \
+    --speech-rvb-probability 1 \
+    --prefix "rev" \
+    --pointsource-noise-addition-probability 0 \
+    --isotropic-noise-addition-probability 0 \
+    --num-replications 1 \
+    --source-sampling-rate 16000 \
+    data/train_shorter data/train_shorter_reverb
+  cat data/train_shorter/utt2dur | awk -v name=rev1 '{print name"-"$0}' >data/train_shorter_reverb/utt2dur
+
+  # Prepare the MUSAN corpus, which consists of music, speech, and noise
+  # suitable for augmentation.
+  steps/data/make_musan.sh /export/corpora/JHU/musan data
+
+  # Get the duration of the MUSAN recordings.  This will be used by the
+  # script augment_data_dir.py.
+  for name in speech noise music; do
+    utils/data/get_utt2dur.sh data/musan_${name}
+    cp data/musan_${name}/utt2dur data/musan_${name}/reco2dur
+  done
+
+  # Augment with musan_noise
+  export LC_ALL=en_US.UTF-8
+  steps/data/augment_data_dir.py --utt-prefix "noise" --modify-spk-id true --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_shorter data/train_shorter_noise
+  # Augment with musan_music
+  steps/data/augment_data_dir.py --utt-prefix "music" --modify-spk-id true --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_shorter data/train_shorter_music
+  # Augment with musan_speech
+  steps/data/augment_data_dir.py --utt-prefix "babble" --modify-spk-id true --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_shorter data/train_shorter_babble
+  export LC_ALL=C
+fi
+
+if [ $stage -le 7 ]; then
+  # Now make MFCC features
+  for name in reverb noise music babble; do
+    steps/make_mfcc.sh --nj 16 --cmd "$train_cmd" \
+      data/train_shorter_${name} || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_${name}
+    utils/fix_data_dir.sh data/train_shorter_${name}
+    utils/validate_data_dir.sh data/train_shorter_${name}
+  done
+fi
+
+# monophone training
+if [ $stage -le 8 ]; then
+  steps/train_mono.sh --nj 50 --cmd "$train_cmd" \
+    data/train_shorter data/lang exp/mono
+  (
+    utils/mkgraph.sh data/lang \
+      exp/mono exp/mono/graph
+  )&
+
+  steps/align_si.sh --nj 50 --cmd "$train_cmd" \
+    data/train_shorter data/lang exp/mono exp/mono_ali_train_shorter
+fi
+
+if [ $stage -le 9 ]; then
+  echo "$0: preparing for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh data/train_shorter data/train_shorter_sp
+  steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 data/train_shorter_sp || exit 1;
+  steps/compute_cmvn_stats.sh data/train_shorter_sp || exit 1;
+  utils/fix_data_dir.sh data/train_shorter_sp
+fi
+
+if [ $stage -le 10 ]; then
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj 50 --cmd "$train_cmd" \
+    data/train_shorter_sp data/lang exp/mono exp/mono_ali_train_shorter_sp || exit 1
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating high-resolution MFCC features"
+  mfccdir=data/train_shorter_sp_hires/data
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(dte +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in train_shorter_sp dev eval; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/train_shorter_sp_hires || exit 1;
+
+  for datadir in train_shorter_sp dev eval; do
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+    utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+  done
+fi
+
+combined_train_set=train_shorter_sp_combined
+aug_affix="reverb noise music babble"
+if [ $stage -le 12 ]; then
+  for name in $aug_affix; do
+    echo "$0: creating high-resolution MFCC features for train_shorter_${name}"
+    mfccdir=data/train_shorter_${name}_hires/data
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+      utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+    fi
+    utils/copy_data_dir.sh data/train_shorter_${name} data/train_shorter_${name}_hires
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/train_shorter_${name}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_${name}_hires || exit 1;
+    utils/fix_data_dir.sh data/train_shorter_${name}_hires || exit 1;
+  done
+  eval utils/combine_data.sh data/${combined_train_set}_hires data/train_shorter_sp_hires \
+    data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}_hires
+fi
+
+
+if [ $stage -le 13 ]; then
+  local/chain/run_tdnn.sh --train-set train_shorter --combined-train-set ${combined_train_set}
+fi
+
+exit 0
+
diff --git a/egs/mobvoihotwords/v1/run_e2e.sh b/egs/mobvoihotwords/v1/run_e2e.sh
new file mode 100755
index 00000000000..540adc0cfb9
--- /dev/null
+++ b/egs/mobvoihotwords/v1/run_e2e.sh
@@ -0,0 +1,299 @@
+#!/bin/bash
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+# Please visit https://github.com/snipsco/keyword-spotting-research-datasets for downloading the dataset.
+
+# This recipe uses E2E LF-MMI training which doesn't require GMM training to obtain alignments.
+# Its performance is slightly better than those based on alignments (cross-entropy or regular LF-MMI)
+# on this dataset.
+
+stage=0
+
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -euo pipefail
+
+if [ $stage -le 0 ]; then
+  local/mobvoi_data_download.sh
+  echo "$0: Extracted all datasets into data/download/"
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: Preparing datasets..."
+  wav_dir=data/download/mobvoi_hotword_dataset
+  for folder in train dev eval; do
+    mkdir -p data/$folder
+    for prefix in p n; do
+      mkdir -p data/${prefix}_$folder
+      json_path=data/download/mobvoi_hotword_dataset_resources/${prefix}_$folder.json
+      if [ $folder = "eval" ]; then
+        json_path=data/download/mobvoi_hotword_dataset_resources/${prefix}_test.json
+      fi
+      local/prepare_data.py $wav_dir $json_path data/${prefix}_$folder --non-wake-word "FREETEXT"
+    done
+    cat data/p_$folder/wav.scp data/n_$folder/wav.scp > data/$folder/wav.scp
+    cat data/p_$folder/text data/n_$folder/text > data/$folder/text
+    cat data/p_$folder/utt2spk data/n_$folder/utt2spk > data/$folder/utt2spk
+    rm -rf data/p_$folder data/n_$folder
+  done
+  echo "$0: text, utt2spk and wav.scp have been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Extracting MFCC..."
+  for folder in train dev eval; do
+    dir=data/$folder
+    utils/fix_data_dir.sh $dir
+    steps/make_mfcc.sh --cmd "$train_cmd" --nj 16 $dir
+    steps/compute_cmvn_stats.sh $dir
+    utils/fix_data_dir.sh $dir
+    utils/data/get_utt2dur.sh $dir
+    utils/validate_data_dir.sh $dir
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  local/prepare_dict.sh
+  utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.5 \
+    --position-dependent-phones false \
+    data/local/dict "<sil>" data/lang/temp data/lang
+fi
+
+if [ $stage -le 4 ]; then
+  id_sil=`cat data/lang/words.txt | grep "<sil>" | awk '{print $2}'`
+  id_freetext=`cat data/lang/words.txt | grep "FREETEXT" | awk '{print $2}'`
+  id_word0=`cat data/lang/words.txt | grep "HiXiaowen" | awk '{print $2}'`
+  id_word1=`cat data/lang/words.txt | grep "NihaoWenwen" | awk '{print $2}'`
+  mkdir -p data/lang/lm
+  cat <<EOF > data/lang/lm/fst.txt
+0 1 $id_sil $id_sil
+0 4 $id_sil $id_sil 7.0
+1 4 $id_freetext $id_freetext 0.0
+4 0 $id_sil $id_sil
+1 2 $id_word0 $id_word0 2.3
+2 0 $id_sil $id_sil
+1 3 $id_word1 $id_word1 2.3
+3 0 $id_sil $id_sil
+0
+EOF
+  fstcompile data/lang/lm/fst.txt data/lang/G.fst
+  set +e
+  fstisstochastic data/lang/G.fst
+  set -e
+  utils/validate_lang.pl data/lang
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: subsegmenting for the training data..."
+  srcdir=data/train
+  utils/data/convert_data_dir_to_whole.sh $srcdir ${srcdir}_whole
+
+  utils/data/get_segments_for_data.sh $srcdir > ${srcdir}_whole/segments
+  utils/filter_scp.pl <(awk '{if ($2 == "FREETEXT") print $1}' ${srcdir}_whole/text) \
+    ${srcdir}_whole/segments >${srcdir}_whole/neg_segments
+  utils/filter_scp.pl --exclude ${srcdir}_whole/neg_segments ${srcdir}_whole/segments \
+    >${srcdir}_whole/pos_segments
+  utils/filter_scp.pl ${srcdir}_whole/pos_segments ${srcdir}_whole/utt2dur >${srcdir}_whole/pos_utt2dur
+  local/get_random_subsegments.py --overlap-duration=0.3 --max-remaining-duration=0.3 \
+    ${srcdir}_whole/neg_segments ${srcdir}_whole/pos_utt2dur | \
+    cat ${srcdir}_whole/pos_segments - | sort >${srcdir}_whole/sub_segments
+  utils/data/subsegment_data_dir.sh ${srcdir}_whole \
+    ${srcdir}_whole/sub_segments data/train_segmented
+  awk '{print $1,$2}' ${srcdir}_whole/sub_segments | \
+    utils/apply_map.pl -f 2 ${srcdir}_whole/text >data/train_segmented/text
+  utils/data/extract_wav_segments_data_dir.sh --nj 50 --cmd "$train_cmd" \
+    data/train_segmented data/train_shorter
+  steps/compute_cmvn_stats.sh data/train_shorter
+  utils/fix_data_dir.sh data/train_shorter
+  utils/validate_data_dir.sh data/train_shorter
+fi
+
+# In this section, we augment the training data with reverberation,
+# noise, music, and babble, and combined it with the clean data.
+if [ $stage -le 6 ]; then
+  utils/data/get_utt2dur.sh data/train_shorter
+  cp data/train_shorter/utt2dur data/train_shorter/reco2dur
+  # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+  [ ! -f rirs_noises.zip ] && wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+  [ ! -d "RIRS_NOISES" ] && unzip rirs_noises.zip
+
+  # Make a version with reverberated speech
+  rvb_opts=()
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+  # Make a reverberated version of the SWBD+SRE list.  Note that we don't add any
+  # additive noise here.
+  python3 steps/data/reverberate_data_dir.py \
+    "${rvb_opts[@]}" \
+    --speech-rvb-probability 1 \
+    --prefix "rev" \
+    --pointsource-noise-addition-probability 0 \
+    --isotropic-noise-addition-probability 0 \
+    --num-replications 1 \
+    --source-sampling-rate 16000 \
+    data/train_shorter data/train_shorter_reverb
+  cat data/train_shorter/utt2dur | awk -v name=rev1 '{print name"-"$0}' >data/train_shorter_reverb/utt2dur
+
+  # Prepare the MUSAN corpus, which consists of music, speech, and noise
+  # suitable for augmentation.
+  steps/data/make_musan.sh /export/corpora/JHU/musan data
+
+  # Get the duration of the MUSAN recordings.  This will be used by the
+  # script augment_data_dir.py.
+  for name in speech noise music; do
+    utils/data/get_utt2dur.sh data/musan_${name}
+    cp data/musan_${name}/utt2dur data/musan_${name}/reco2dur
+  done
+
+  # Augment with musan_noise
+  steps/data/augment_data_dir.py --utt-prefix "noise" --modify-spk-id true --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_shorter data/train_shorter_noise
+  # Augment with musan_music
+  steps/data/augment_data_dir.py --utt-prefix "music" --modify-spk-id true --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_shorter data/train_shorter_music
+  # Augment with musan_speech
+  steps/data/augment_data_dir.py --utt-prefix "babble" --modify-spk-id true --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_shorter data/train_shorter_babble
+fi
+
+if [ $stage -le 7 ]; then
+  # Now make MFCC features
+  for name in reverb noise music babble; do
+    steps/make_mfcc.sh --nj 16 --cmd "$train_cmd" \
+      data/train_shorter_${name} || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_${name}
+    utils/fix_data_dir.sh data/train_shorter_${name}
+    utils/validate_data_dir.sh data/train_shorter_${name}
+  done
+fi
+
+combined_train_set=train_shorter_combined
+aug_affix="reverb noise music babble"
+if [ $stage -le 8 ]; then
+  eval utils/combine_data.sh data/${combined_train_set} data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}
+fi
+
+if [ -f data/${combined_train_set}_spe2e_hires/feats.scp ]; then
+  echo "$0: It seems that features for the perturbed training data already exist."
+  echo "If you want to extract them anyway, remove them first and run this"
+  echo "stage again. Skipping this stage..."
+else
+  if [ $stage -le 9 ]; then
+    echo "$0: perturbing the training data to allowed lengths..."
+    utils/data/get_utt2dur.sh data/${combined_train_set}  # necessary for the next command
+
+    # 12 in the following command means the allowed lengths are spaced
+    # by 12% change in length.
+    utils/data/perturb_speed_to_allowed_lengths.py --speed-perturb false 12 data/${combined_train_set} \
+                                                   data/${combined_train_set}_e2e_hires
+    cat data/${combined_train_set}_e2e_hires/utt2dur | \
+      awk '{print $1 " " substr($1,5)}' >data/${combined_train_set}_e2e_hires/utt2uniq.tmp
+    utils/apply_map.pl -f 2 data/${combined_train_set}/utt2uniq \
+      <data/${combined_train_set}_e2e_hires/utt2uniq.tmp >data/${combined_train_set}_e2e_hires/utt2uniq
+    rm -f data/${combined_train_set}_e2e_hires/utt2uniq.tmp 2>/dev/null || true
+    utils/fix_data_dir.sh data/${combined_train_set}_e2e_hires
+
+    utils/data/get_utt2dur.sh data/train_shorter  # necessary for the next command
+    utils/data/perturb_speed_to_allowed_lengths.py 12 data/train_shorter data/train_shorter_spe2e_hires
+    cat data/train_shorter_spe2e_hires/utt2dur | \
+      awk '{print $1 " " substr($1,5)}' >data/train_shorter_spe2e_hires/utt2uniq
+    utils/fix_data_dir.sh data/train_shorter_spe2e_hires
+    utils/combine_data.sh data/${combined_train_set}_spe2e_hires data/${combined_train_set}_e2e_hires data/train_shorter_spe2e_hires
+    cat data/train_shorter_spe2e_hires/allowed_lengths.txt >data/${combined_train_set}_spe2e_hires/allowed_lengths.txt
+  fi
+
+  if [ $stage -le 10 ]; then
+    echo "$0: extracting MFCC features for the training data..."
+    mfccdir=data/${combined_train_set}_spe2e_hires/data
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+      utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+    fi
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+                       --cmd "$train_cmd" \
+                       data/${combined_train_set}_spe2e_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${combined_train_set}_spe2e_hires || exit 1;
+    utils/fix_data_dir.sh data/${combined_train_set}_spe2e_hires
+    utils/validate_data_dir.sh data/${combined_train_set}_spe2e_hires
+  fi
+fi
+
+if [ $stage -le 11 ]; then
+  if [ -f data/eval_hires/feats.scp ]; then
+    echo "$0: It seems that features for the test sets already exist."
+    echo "skipping this stage..."
+  else
+    echo "$0: extracting MFCC features for the test sets"
+    for datadir in dev eval; do
+      utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 --mfcc-config conf/mfcc_hires.conf \
+        --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+      steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+      utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+    done
+  fi
+fi
+
+if [ $stage -le 12 ]; then
+  local/chain/run_e2e_tdnn.sh --train-set ${combined_train_set}_spe2e
+fi
+
+combined_train_set=train_shorter_sp_combined
+if [ -f data/${combined_train_set}_hires/feats.scp ]; then
+  echo "$0: It seems that features for the perturbed training data already exist."
+  echo "If you want to extract them anyway, remove them first and run this"
+  echo "stage again. Skipping this stage..."
+else
+  if [ $stage -le 13 ]; then
+    echo "$0: preparing for speed-perturbed data"
+    utils/data/perturb_data_dir_speed_3way.sh data/train_shorter data/train_shorter_sp_hires
+    echo "$0: creating high-resolution MFCC features for speed-perturbed data"
+    mfccdir=data/train_shorter_sp_hires/data
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+      utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+    fi
+
+    # do volume-perturbation on the training data prior to extracting hires
+    # features; this helps make trained nnets more invariant to test data volume.
+    utils/data/perturb_data_dir_volume.sh data/train_shorter_sp_hires || exit 1;
+
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/train_shorter_sp_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_sp_hires || exit 1;
+    utils/fix_data_dir.sh data/train_shorter_sp_hires || exit 1;
+  fi
+
+  if [ $stage -le 14 ]; then
+    for name in $aug_affix; do
+      echo "$0: creating high-resolution MFCC features for train_shorter_${name}"
+      mfccdir=data/train_shorter_${name}_hires/data
+      if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+        utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+      fi
+      utils/copy_data_dir.sh data/train_shorter_${name} data/train_shorter_${name}_hires
+      steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+        --cmd "$train_cmd" data/train_shorter_${name}_hires || exit 1;
+      steps/compute_cmvn_stats.sh data/train_shorter_${name}_hires || exit 1;
+      utils/fix_data_dir.sh data/train_shorter_${name}_hires || exit 1;
+    done
+    eval utils/combine_data.sh data/${combined_train_set}_hires data/train_shorter_sp_hires \
+      data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}_hires
+  fi
+fi
+
+if [ $stage -le 15 ]; then
+  echo "$0: Aligning the training data using the e2e chain model..."
+  steps/nnet3/align.sh --nj 50 --cmd "$train_cmd" \
+                       --use-gpu false \
+                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
+                       data/${combined_train_set}_hires data/lang exp/chain/e2e_tdnn_1a exp/chain/e2e_ali_${combined_train_set}
+fi
+
+if [ $stage -le 16 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
+  local/chain/run_tdnn_e2eali.sh --train-set ${combined_train_set} --e2echain-model-dir exp/chain/e2e_tdnn_1a
+fi
+
+exit 0
diff --git a/egs/mobvoihotwords/v1/steps b/egs/mobvoihotwords/v1/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/mobvoihotwords/v1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/utils b/egs/mobvoihotwords/v1/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/mobvoihotwords/v1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/multi_cn/s5/README.md b/egs/multi_cn/s5/README.md
index 4cfcb8d6941..fed933512b9 100644
--- a/egs/multi_cn/s5/README.md
+++ b/egs/multi_cn/s5/README.md
@@ -5,6 +5,7 @@ This is a Chinese speech recognition recipe that trains on all Chinese corpora o
 * Primewords (99 hours)
 * ST-CMDS (110 hours)
 * THCHS-30 (26 hours)
+* optional AISHELL2 (~1000 hours) if available
 
 This recipe was developed by Xingyu Na (Microsoft Corporation) and Hui Bu (AISHELL Foundation).
 
diff --git a/egs/multi_cn/s5/RESULTS b/egs/multi_cn/s5/RESULTS
index 16b50c61cdb..0b9f652a2ff 100644
--- a/egs/multi_cn/s5/RESULTS
+++ b/egs/multi_cn/s5/RESULTS
@@ -6,8 +6,8 @@
 %WER 19.03 [ 19941 / 104765, 725 ins, 1222 del, 17994 sub ] exp/tri3a/decode_aishell_test_tg/cer_13_0.5
 %WER 21.68 [ 22710 / 104765, 902 ins, 2361 del, 19447 sub ] exp/tri4a/decode_aishell_test_tg/cer_14_0.0
 %WER 16.64 [ 17436 / 104765, 857 ins, 706 del, 15873 sub ] exp/tri4a_cleaned/decode_aishell_test_tg/cer_14_0.5
-%WER 6.01 [ 6299 / 104765, 129 ins, 175 del, 5995 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_aishell_tg/cer_11_1.0
-%WER 6.01 [ 6298 / 104765, 128 ins, 176 del, 5994 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_aishell_tg/cer_11_1.0
+%WER 5.90 [ 6176 / 104765, 119 ins, 169 del, 5888 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_aishell_tg/cer_11_1.0
+%WER 5.90 [ 6177 / 104765, 121 ins, 168 del, 5888 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_aishell_tg/cer_11_1.0
 
 # aidatatang test set results
 %WER 33.86 [ 158799 / 468933, 3856 ins, 33811 del, 121132 sub ] exp/tri1b/decode_aidatatang_test_tg/cer_14_0.0
@@ -15,8 +15,8 @@
 %WER 23.67 [ 111009 / 468933, 4535 ins, 19118 del, 87356 sub ] exp/tri3a/decode_aidatatang_test_tg/cer_14_0.0
 %WER 20.01 [ 93829 / 468933, 4563 ins, 16970 del, 72296 sub ] exp/tri4a/decode_aidatatang_test_tg/cer_15_0.0
 %WER 17.85 [ 83717 / 468933, 6506 ins, 13716 del, 63495 sub ] exp/tri4a_cleaned/decode_aidatatang_test_tg/cer_15_0.0
-%WER 4.99 [ 23403 / 468933, 1954 ins, 3371 del, 18078 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_aidatatang_tg/cer_11_0.0
-%WER 4.99 [ 23385 / 468933, 1965 ins, 3356 del, 18064 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_aidatatang_tg/cer_11_0.0
+%WER 4.98 [ 23370 / 468933, 2190 ins, 3188 del, 17992 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_aidatatang_tg/cer_10_0.0
+%WER 4.98 [ 23371 / 468933, 2224 ins, 3171 del, 17976 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_aidatatang_tg/cer_10_0.0
 
 # magicdata test set results
 %WER 27.01 [ 64815 / 239927, 4838 ins, 14852 del, 45125 sub ] exp/tri1b/decode_magicdata_test_tg/cer_17_0.0
@@ -24,8 +24,8 @@
 %WER 22.42 [ 53784 / 239927, 6513 ins, 7409 del, 39862 sub ] exp/tri3a/decode_magicdata_test_tg/cer_17_0.0
 %WER 15.45 [ 37076 / 239927, 3942 ins, 5217 del, 27917 sub ] exp/tri4a/decode_magicdata_test_tg/cer_17_0.0
 %WER 13.99 [ 33568 / 239927, 6267 ins, 3705 del, 23596 sub ] exp/tri4a_cleaned/decode_magicdata_test_tg/cer_17_0.5
-%WER 4.21 [ 10112 / 239927, 1443 ins, 1927 del, 6742 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_magicdata_tg/cer_11_0.5
-%WER 4.23 [ 10158 / 239927, 1299 ins, 2032 del, 6827 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_magicdata_tg/cer_11_1.0
+%WER 4.24 [ 10180 / 239927, 1405 ins, 2001 del, 6774 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_magicdata_tg/cer_11_1.0
+%WER 4.25 [ 10188 / 239927, 1428 ins, 1997 del, 6763 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_magicdata_tg/cer_11_1.0
 
 # thchs test set results
 %WER 35.75 [ 29005 / 81139, 353 ins, 1824 del, 26828 sub ] exp/tri1b/decode_thchs_test_tg/cer_10_1.0
@@ -33,8 +33,8 @@
 %WER 30.26 [ 24549 / 81139, 328 ins, 1412 del, 22809 sub ] exp/tri3a/decode_thchs_test_tg/cer_10_1.0
 %WER 27.67 [ 22449 / 81139, 410 ins, 1102 del, 20937 sub ] exp/tri4a/decode_thchs_test_tg/cer_10_0.5
 %WER 25.41 [ 20615 / 81139, 399 ins, 847 del, 19369 sub ] exp/tri4a_cleaned/decode_thchs_test_tg/cer_11_0.5
-%WER 13.02 [ 10561 / 81139, 134 ins, 261 del, 10166 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_thchs_tg/cer_9_1.0
-%WER 13.00 [ 10552 / 81139, 132 ins, 259 del, 10161 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_thchs_tg/cer_9_1.0
+%WER 12.96 [ 10514 / 81139, 120 ins, 300 del, 10094 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_thchs_tg/cer_10_1.0
+%WER 12.94 [ 10499 / 81139, 120 ins, 299 del, 10080 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_thchs_tg/cer_10_1.0
 
 # GMM results w/ corpus LM
 # ./run.sh --stage 17 --corpus-lm true
diff --git a/egs/multi_cn/s5/local/aidatatang_data_prep.sh b/egs/multi_cn/s5/local/aidatatang_data_prep.sh
index 518a0e99866..93898338722 100755
--- a/egs/multi_cn/s5/local/aidatatang_data_prep.sh
+++ b/egs/multi_cn/s5/local/aidatatang_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Xingyu Na
 # Apache 2.0
diff --git a/egs/multi_cn/s5/local/aidatatang_download_and_untar.sh b/egs/multi_cn/s5/local/aidatatang_download_and_untar.sh
index a2616ba0e20..2cbf88f8190 100755
--- a/egs/multi_cn/s5/local/aidatatang_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/aidatatang_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 #             2017  Xingyu Na
diff --git a/egs/multi_cn/s5/local/aishell2_data_prep.sh b/egs/multi_cn/s5/local/aishell2_data_prep.sh
new file mode 100755
index 00000000000..d281083cb04
--- /dev/null
+++ b/egs/multi_cn/s5/local/aishell2_data_prep.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
+#           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
+# Apache 2.0
+
+# This script is copied from aishell2/s5/local/prepare_data.sh
+# but using difference word segmentation script.
+
+# transform raw AISHELL-2 data to kaldi format
+
+. ./path.sh || exit 1;
+
+tmp=
+dir=
+
+if [ $# != 2 ]; then
+  echo "Usage: $0 <corpus-data-dir> <output-dir>"
+  echo " $0 /export/AISHELL-2/iOS/train data/train"
+  exit 1;
+fi
+
+corpus=$1
+dir=$2/train
+tmp=$2/tmp
+
+echo "prepare_data.sh: Preparing data in $corpus"
+
+mkdir -p $tmp
+mkdir -p $dir
+
+# corpus check
+if [ ! -d $corpus ] || [ ! -f $corpus/wav.scp ] || [ ! -f $corpus/trans.txt ]; then
+  echo "Error: $0 requires wav.scp and trans.txt under $corpus directory."
+  exit 1;
+fi
+
+# validate utt-key list
+awk '{print $1}' $corpus/wav.scp   > $tmp/wav_utt.list
+awk '{print $1}' $corpus/trans.txt > $tmp/trans_utt.list
+utils/filter_scp.pl -f 1 $tmp/wav_utt.list $tmp/trans_utt.list > $tmp/utt.list
+
+# wav.scp
+awk -F'\t' -v path_prefix=$corpus '{printf("%s\t%s/%s\n",$1,path_prefix,$2)}' $corpus/wav.scp > $tmp/tmp_wav.scp
+utils/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_wav.scp | sort -k 1 | uniq > $tmp/wav.scp
+
+# text
+dos2unix < $corpus/trans.txt | \
+  utils/filter_scp.pl -f 1 $tmp/utt.list - | \
+  sort -k 1 | uniq | tr '[a-z]' '[A-Z]' | \
+  local/word_segment.py > $tmp/text
+
+# utt2spk & spk2utt
+awk -F'\t' '{print $2}' $tmp/wav.scp > $tmp/wav.list
+sed -e 's:\.wav::g' $tmp/wav.list | \
+  awk -F'/' '{i=NF-1;printf("%s\t%s\n",$NF,$i)}' > $tmp/tmp_utt2spk
+utils/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_utt2spk | sort -k 1 | uniq > $tmp/utt2spk
+utils/utt2spk_to_spk2utt.pl $tmp/utt2spk | sort -k 1 | uniq > $tmp/spk2utt
+
+# copy prepared resources from tmp_dir to target dir
+mkdir -p $dir
+for f in wav.scp text spk2utt utt2spk; do
+  cp $tmp/$f $dir/$f || exit 1;
+done
+
+utils/data/validate_data_dir.sh --no-feats $dir || exit 1;
+echo "local/prepare_data.sh succeeded"
+exit 0;
diff --git a/egs/multi_cn/s5/local/aishell_data_prep.sh b/egs/multi_cn/s5/local/aishell_data_prep.sh
index 7896e208f33..25f75fc1ae2 100755
--- a/egs/multi_cn/s5/local/aishell_data_prep.sh
+++ b/egs/multi_cn/s5/local/aishell_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Xingyu Na
 # Apache 2.0
diff --git a/egs/multi_cn/s5/local/aishell_download_and_untar.sh b/egs/multi_cn/s5/local/aishell_download_and_untar.sh
index e251a9aae2f..74a8e36cf2a 100755
--- a/egs/multi_cn/s5/local/aishell_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/aishell_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 #             2017  Xingyu Na
diff --git a/egs/multi_cn/s5/local/chain/compare_cer.sh b/egs/multi_cn/s5/local/chain/compare_cer.sh
new file mode 100755
index 00000000000..3daa43834a6
--- /dev/null
+++ b/egs/multi_cn/s5/local/chain/compare_cer.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+
+# This script is modified from egs/librispeech/s5/local/chain/compare_wer.sh
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_cer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_cer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: [--online] <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+include_online=false
+if [ "$1" == "--online" ]; then
+  include_online=true
+  shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_cer.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System                     "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+strings=(
+  "# CER on aidatatang(tg)      "
+  "# CER on aishell(tg)         "
+  "# CER on magicdata(tg)       "
+  "# CER on thchs30(tg)         ")
+
+for n in 0 1 2 3; do
+   echo -n "${strings[$n]}"
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+     decode_names=(aidatatang_tg aishell_tg magicdata_tg thchs_tg)
+
+     wer=$(grep WER $dirname/decode_${decode_names[$n]}/cer_* | utils/best_wer.sh | awk '{print $2}')
+     printf "% 10s" $wer
+   done
+   echo
+   if $include_online; then
+     echo -n "#             [online:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(grep WER ${dirname}_online/decode_${decode_names[$n]}/cer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+done
+
+echo -n "# Final train prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)    "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)    "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Num-parameters             "
+for x in $*; do
+  num_params=$(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
+  printf "% 10d" $num_params
+done
+echo
diff --git a/egs/multi_cn/s5/local/chain/run_chain_common.sh b/egs/multi_cn/s5/local/chain/run_chain_common.sh
index 2f57c4765cf..4b00784f32a 100755
--- a/egs/multi_cn/s5/local/chain/run_chain_common.sh
+++ b/egs/multi_cn/s5/local/chain/run_chain_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script has common stages shared across librispeech chain recipes.
 # It generates a new topology in a new lang directory, gets the alignments as
diff --git a/egs/multi_cn/s5/local/chain/run_ivector_common.sh b/egs/multi_cn/s5/local/chain/run_ivector_common.sh
index 5a09d44a79b..b52b6945a40 100755
--- a/egs/multi_cn/s5/local/chain/run_ivector_common.sh
+++ b/egs/multi_cn/s5/local/chain/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
@@ -75,29 +75,22 @@ if [ $stage -le 3 ]; then
 
   # do volume-perturbation on the training data prior to extracting hires
   # features; this helps make trained nnets more invariant to test data volume.
-  # create MFCC data dir without pitch to extract iVector
   utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires
-  steps/make_mfcc_pitch_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
+  steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
     --cmd "$train_cmd" data/${train_set}_sp_hires || exit 1;
   steps/compute_cmvn_stats.sh data/${train_set}_sp_hires || exit 1;
   utils/fix_data_dir.sh data/${train_set}_sp_hires
-  utils/data/limit_feature_dim.sh 0:39 \
-    data/${train_set}_sp_hires data/${train_set}_sp_hires_nopitch || exit 1;
-  steps/compute_cmvn_stats.sh data/${train_set}_sp_hires_nopitch || exit 1;
 
   for datadir in $test_sets; do
-    steps/make_mfcc_pitch_online.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
+    steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
       --cmd "$train_cmd" data/$datadir/test_hires || exit 1;
     steps/compute_cmvn_stats.sh data/$datadir/test_hires || exit 1;
     utils/fix_data_dir.sh data/$datadir/test_hires
-    utils/data/limit_feature_dim.sh 0:39 \
-      data/$datadir/test_hires data/$datadir/test_hires_nopitch || exit 1;
-    steps/compute_cmvn_stats.sh data/$datadir/test_hires_nopitch || exit 1;
   done
 
   # now create a data subset.  60k is 1/5th of the training dataset (around 200 hours).
-  utils/subset_data_dir.sh data/${train_set}_sp_hires_nopitch 60000 \
-    data/${train_set}_sp_hires_nopitch_60k
+  utils/subset_data_dir.sh data/${train_set}_sp_hires 60000 \
+    data/${train_set}_sp_hires_60k
 fi
 
 
@@ -107,16 +100,16 @@ if [ $stage -le 4 ]; then
   mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
   temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
 
-  num_utts_total=$(wc -l <data/${train_set}_sp_hires_nopitch/utt2spk)
+  num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
   num_utts=$[$num_utts_total/100]
-  utils/data/subset_data_dir.sh data/${train_set}_sp_hires_nopitch \
-     $num_utts ${temp_data_root}/${train_set}_sp_hires_nopitch_subset
+  utils/data/subset_data_dir.sh data/${train_set}_sp_hires \
+     $num_utts ${temp_data_root}/${train_set}_sp_hires_subset
 
   echo "$0: computing a PCA transform from the hires data."
   steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
       --splice-opts "--left-context=3 --right-context=3" \
       --max-utts 10000 --subsample 2 \
-       ${temp_data_root}/${train_set}_sp_hires_nopitch_subset \
+       ${temp_data_root}/${train_set}_sp_hires_subset \
        exp/nnet3${nnet3_affix}/pca_transform
 
   echo "$0: training the diagonal UBM."
@@ -124,7 +117,7 @@ if [ $stage -le 4 ]; then
   steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \
     --num-frames 700000 \
     --num-threads $num_threads_ubm \
-    ${temp_data_root}/${train_set}_sp_hires_nopitch_subset 512 \
+    ${temp_data_root}/${train_set}_sp_hires_subset 512 \
     exp/nnet3${nnet3_affix}/pca_transform exp/nnet3${nnet3_affix}/diag_ubm
 fi
 
@@ -135,7 +128,7 @@ if [ $stage -le 5 ]; then
   # we use just the 60k subset (about one fifth of the data, or 200 hours).
   echo "$0: training the iVector extractor"
   steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
-    --num-processes $num_processes data/${train_set}_sp_hires_nopitch_60k \
+    --num-processes $num_processes data/${train_set}_sp_hires_60k \
     exp/nnet3${nnet3_affix}/diag_ubm exp/nnet3${nnet3_affix}/extractor || exit 1;
 fi
 
@@ -154,10 +147,10 @@ if [ $stage -le 6 ]; then
   # having a larger number of speakers is helpful for generalization, and to
   # handle per-utterance decoding well (iVector starts at zero).
   utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
-    data/${train_set}_sp_hires_nopitch ${ivectordir}/${train_set}_sp_hires_nopitch_max2
+    data/${train_set}_sp_hires ${ivectordir}/${train_set}_sp_hires_max2
 
   steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 60 \
-    ${ivectordir}/${train_set}_sp_hires_nopitch_max2 exp/nnet3${nnet3_affix}/extractor \
+    ${ivectordir}/${train_set}_sp_hires_max2 exp/nnet3${nnet3_affix}/extractor \
     $ivectordir || exit 1;
 fi
 
@@ -165,7 +158,7 @@ if [ $stage -le 7 ]; then
   echo "$0: extracting iVectors for test data"
   for data in $test_sets; do
     steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 10 \
-      data/${data}/test_hires_nopitch exp/nnet3${nnet3_affix}/extractor \
+      data/${data}/test_hires exp/nnet3${nnet3_affix}/extractor \
       exp/nnet3${nnet3_affix}/ivectors_${data}_hires || exit 1;
   done
 fi
diff --git a/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
index d50a77d4d13..0d553ae2ded 100755
--- a/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,34 +1,28 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is copied from librispeech/s5
+# In a previous version, pitch is used with hires mfcc, however,
+# removing pitch does not cause regression, and helps online
+# decoding, so pitch is removed in this recipe.
 
 # This is based on tdnn_1d_sp, but adding cnn as the front-end.
 # The cnn-tdnn-f (tdnn_cnn_1a_sp) outperforms the tdnn-f (tdnn_1d_sp).
 
-# bash local/chain/compare_wer.sh exp/chain_cleaned/tdnn_1d_sp exp/chain_cleaned/tdnn_cnn_1a_sp/
-# System                         tdnn_1d_sp  tdnn_cnn_1a_sp
-# WER on dev(fglarge)               3.29          3.34
-# WER on dev(tglarge)               3.44          3.39
-# WER on dev(tgmed)                 4.22          4.29
-# WER on dev(tgsmall)               4.72          4.77
-# WER on dev_other(fglarge)         8.71          8.62
-# WER on dev_other(tglarge)         9.05          9.00
-# WER on dev_other(tgmed)          11.09         10.93
-# WER on dev_other(tgsmall)        12.13         12.02
-# WER on test(fglarge)              3.80          3.69
-# WER on test(tglarge)              3.89          3.80
-# WER on test(tgmed)                4.72          4.64
-# WER on test(tgsmall)              5.19          5.16
-# WER on test_other(fglarge)        8.76          8.71
-# WER on test_other(tglarge)        9.19          9.11
-# WER on test_other(tgmed)         11.22         11.00
-# WER on test_other(tgsmall)       12.24         12.16
-# Final train prob               -0.0378       -0.0420
-# Final valid prob               -0.0374       -0.0400
-# Final train prob (xent)        -0.6099       -0.6881
-# Final valid prob (xent)        -0.6353       -0.7180
-# Num-parameters                22623456      18100736
-
+# local/chain/compare_cer.sh --online exp/chain_cleaned/tdnn_cnn_1a_pitch_sp exp/chain_nopitch/tdnn_cnn_1a_sp
+# System                      tdnn_cnn_1a_pitch_sp tdnn_cnn_1a_sp
+# CER on aidatatang(tg)            4.99      4.98
+#             [online:]          4.99      4.98
+# CER on aishell(tg)               6.01      5.90
+#             [online:]          6.01      5.90
+# CER on magicdata(tg)             4.21      4.24
+#             [online:]          4.23      4.25
+# CER on thchs30(tg)              13.02     12.96
+#             [online:]         13.00     12.94
+# Final train prob              -0.0436   -0.0438
+# Final valid prob              -0.0553   -0.0544
+# Final train prob (xent)       -0.8083   -0.8157
+# Final valid prob (xent)       -0.8766   -0.8730
+# Num-parameters               19141072  19141072
 
 set -e
 
@@ -53,6 +47,7 @@ common_egs_dir=
 xent_regularize=0.1
 dropout_schedule='0,0@0.20,0.5@0.50,0'
 
+test_sets=""
 test_online_decoding=true  # if true, it will run the last decoding stage.
 
 # End configuration section.
@@ -84,7 +79,7 @@ ali_dir=exp/${gmm}_ali_${train_set}_sp
 tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
 lang=data/lang_chain
 lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
-dir=exp/chain${nnet3_afqstfix}/tdnn${affix:+_$affix}_sp
+dir=exp/chain${nnet3_affix}/tdnn${affix:+_$affix}_sp
 train_data_dir=data/${train_set}_sp_hires
 lores_train_data_dir=data/${train_set}_sp
 train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
@@ -126,7 +121,7 @@ if [ $stage -le 14 ]; then
 
   cat <<EOF > $dir/configs/network.xconfig
   input dim=100 name=ivector
-  input dim=43 name=input
+  input dim=40 name=input
 
   # MFCC to filterbank
   idct-layer name=idct input=input dim=40 cepstral-lifter=22 affine-transform-file=$dir/configs/idct.mat
@@ -237,7 +232,7 @@ if $test_online_decoding && [ $stage -le 18 ]; then
   # note: if the features change (e.g. you add pitch features), you will have to
   # change the options of the following command line.
   steps/online/nnet3/prepare_online_decoding.sh \
-    --mfcc-config conf/mfcc_hires.conf --add-pitch true \
+    --mfcc-config conf/mfcc_hires.conf \
     $lang exp/nnet3${nnet3_affix}/extractor $dir ${dir}_online
 
   rm $dir/.error 2>/dev/null || true
diff --git a/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1b.sh b/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
index e3b8fa71175..dd9a238548a 100755
--- a/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
+++ b/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
@@ -1,23 +1,10 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is copied from mini_librispeech/s5
 
 # 1b is as 1a but adding SpecAugment and removing dropout (which, in
 # combination with SpecAugment, no longer seemed to give an improvement).
 
-#  local/chain/compare_wer.sh --online exp/chain/cnn_tdnn1{a,a2,b,b2}_sp
-# System                cnn_tdnn1a_sp cnn_tdnn1a2_sp cnn_tdnn1b_sp cnn_tdnn1b2_sp
-#WER dev_clean_2 (tgsmall)      10.89     10.96     10.04      9.93
-#             [online:]         10.91     10.93      9.99      9.99
-#WER dev_clean_2 (tglarge)       7.50      7.80      6.94      6.89
-#             [online:]          7.58      7.84      6.97      7.04
-# Final train prob        -0.0476   -0.0470   -0.0577   -0.0575
-# Final valid prob        -0.0754   -0.0760   -0.0742   -0.0746
-# Final train prob (xent)   -1.0930   -1.0995   -1.3090   -1.3043
-# Final valid prob (xent)   -1.2916   -1.2904   -1.4242   -1.4225
-# Num-params                 4492816   4492816   4492816   4492816
-
-
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
 
@@ -26,7 +13,6 @@ set -euo pipefail
 stage=0
 decode_nj=10
 train_set=train_all_cleaned
-test_sets=""
 gmm=tri4a_cleaned
 nnet3_affix=_cleaned
 
@@ -50,6 +36,7 @@ remove_egs=true
 reporting_email=
 
 # decode options
+test_sets=""
 test_online_decoding=true  # if true, it will run the last decoding stage.
 
 
@@ -120,7 +107,7 @@ if [ $stage -le 14 ]; then
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=100 name=ivector
-  input dim=43 name=input
+  input dim=40 name=input
 
   # this takes the MFCCs and generates filterbank coefficients.  The MFCCs
   # are more compressible so we prefer to dump the MFCCs to disk rather
@@ -236,7 +223,7 @@ if $test_online_decoding && [ $stage -le 18 ]; then
   # note: if the features change (e.g. you add pitch features), you will have to
   # change the options of the following command line.
   steps/online/nnet3/prepare_online_decoding.sh \
-    --mfcc-config conf/mfcc_hires.conf --add-pitch true \
+    --mfcc-config conf/mfcc_hires.conf \
     $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online
 
   rm $dir/.error 2>/dev/null || true
diff --git a/egs/multi_cn/s5/local/magicdata_data_prep.sh b/egs/multi_cn/s5/local/magicdata_data_prep.sh
index f8d47716751..4c96a40c9b6 100755
--- a/egs/multi_cn/s5/local/magicdata_data_prep.sh
+++ b/egs/multi_cn/s5/local/magicdata_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019 Xingyu Na
 # Apache 2.0
diff --git a/egs/multi_cn/s5/local/magicdata_download_and_untar.sh b/egs/multi_cn/s5/local/magicdata_download_and_untar.sh
index df8ca8d2296..c322edc98cf 100755
--- a/egs/multi_cn/s5/local/magicdata_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/magicdata_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 #             2019  Xingyu Na
diff --git a/egs/multi_cn/s5/local/prepare_dict.sh b/egs/multi_cn/s5/local/prepare_dict.sh
index 6b160b60580..3a86e160cf1 100755
--- a/egs/multi_cn/s5/local/prepare_dict.sh
+++ b/egs/multi_cn/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is copied from egs/hkust/s5/local/hkust_prepare_dict.sh
 
@@ -316,7 +316,7 @@ cat $dict_dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ",
 
 # Add to the lexicon the silences, noises etc.
 (echo '!SIL SIL'; echo '[SPK] SPN'; echo '[FIL] NSN'; echo '<UNK> SPN' ) | \
- cat - $dict_dir/lexicon1.txt  > $dict_dir/lexicon.txt || exit 1;
+ cat - $dict_dir/lexicon1.txt | sed '/^HH$/d' > $dict_dir/lexicon.txt || exit 1;
 
 echo "$0: dict preparation succeeded"
 exit 0;
diff --git a/egs/multi_cn/s5/local/primewords_data_prep.sh b/egs/multi_cn/s5/local/primewords_data_prep.sh
index bcf3b6698a4..9ce4140f801 100755
--- a/egs/multi_cn/s5/local/primewords_data_prep.sh
+++ b/egs/multi_cn/s5/local/primewords_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019 Xingyu Na
 # Apache 2.0
diff --git a/egs/multi_cn/s5/local/primewords_download_and_untar.sh b/egs/multi_cn/s5/local/primewords_download_and_untar.sh
index 7e716c7a0a6..5828f1e2d7e 100755
--- a/egs/multi_cn/s5/local/primewords_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/primewords_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 #             2017  Xingyu Na
diff --git a/egs/multi_cn/s5/local/run_cleanup_segmentation.sh b/egs/multi_cn/s5/local/run_cleanup_segmentation.sh
index f1ea4a2f574..ea93ab97386 100755
--- a/egs/multi_cn/s5/local/run_cleanup_segmentation.sh
+++ b/egs/multi_cn/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 #           2016  Yiming Wang
diff --git a/egs/multi_cn/s5/local/score.sh b/egs/multi_cn/s5/local/score.sh
index a9786169973..d283ceb68dc 100755
--- a/egs/multi_cn/s5/local/score.sh
+++ b/egs/multi_cn/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 set -x
diff --git a/egs/multi_cn/s5/local/stcmds_data_prep.sh b/egs/multi_cn/s5/local/stcmds_data_prep.sh
index 6375d0d9a1b..a751729ef91 100755
--- a/egs/multi_cn/s5/local/stcmds_data_prep.sh
+++ b/egs/multi_cn/s5/local/stcmds_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2019 Xingyu Na
 # Apache 2.0
diff --git a/egs/multi_cn/s5/local/stcmds_download_and_untar.sh b/egs/multi_cn/s5/local/stcmds_download_and_untar.sh
index ca89b5a292a..37379ab28e6 100755
--- a/egs/multi_cn/s5/local/stcmds_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/stcmds_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 #             2017  Xingyu Na
diff --git a/egs/multi_cn/s5/local/thchs-30_data_prep.sh b/egs/multi_cn/s5/local/thchs-30_data_prep.sh
index 8f48133a1dd..2d3af9fdaba 100755
--- a/egs/multi_cn/s5/local/thchs-30_data_prep.sh
+++ b/egs/multi_cn/s5/local/thchs-30_data_prep.sh
@@ -1,8 +1,8 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2016  Tsinghua University (Author: Dong Wang, Xuewei Zhang).  Apache 2.0.
 #           2016  LeSpeech (Author: Xingyu Na)
 
-#This script pepares the data directory for thchs30 recipe. 
+#This script pepares the data directory for thchs30 recipe.
 #It reads the corpus and get wav.scp and transcriptions.
 
 corpus_dir=$1
@@ -23,13 +23,13 @@ for x in train dev test; do
       spkid=`echo $nn | awk -F"_" '{print "" $1}'`
       spk_char=`echo $spkid | sed 's/\([A-Z]\).*/\1/'`
       spk_num=`echo $spkid | sed 's/[A-Z]\([0-9]\)/\1/'`
-      spkid=$(printf '%s%.2d' "$spk_char" "$spk_num")
+      spkid=$(printf 'TH%s%.2d' "$spk_char" "$spk_num")
       utt_num=`echo $nn | awk -F"_" '{print $2}'`
-      uttid=$(printf '%s%.2d_%.3d' "$spk_char" "$spk_num" "$utt_num")
+      uttid=$(printf 'TH%s%.2d-%.3d' "$spk_char" "$spk_num" "$utt_num")
       echo $uttid $corpus_dir/$x/$nn.wav >> $part/wav.scp
       echo $uttid $spkid >> $part/utt2spk
       echo $uttid `sed -n 1p $corpus_dir/data/$nn.wav.trn` | sed 's/ l =//' >> $part/text
-  done 
+  done
   sort $part/wav.scp -o $part/wav.scp
   sort $part/utt2spk -o $part/utt2spk
   sort $part/text -o $part/text
@@ -40,6 +40,3 @@ done
 utils/data/validate_data_dir.sh --no-feats $data/train || exit 1;
 utils/data/validate_data_dir.sh --no-feats $data/dev || exit 1;
 utils/data/validate_data_dir.sh --no-feats $data/test || exit 1;
-
-
-
diff --git a/egs/multi_cn/s5/local/thchs_download_and_untar.sh b/egs/multi_cn/s5/local/thchs_download_and_untar.sh
index 6294fca7d9b..878e29a80ed 100755
--- a/egs/multi_cn/s5/local/thchs_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/thchs_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey) 
 # Copyright   2016  Tsinghua University (author: Dong Wang)
diff --git a/egs/multi_cn/s5/local/train_corpus_lm.sh b/egs/multi_cn/s5/local/train_corpus_lm.sh
index 181ff4c5522..fe464d41520 100755
--- a/egs/multi_cn/s5/local/train_corpus_lm.sh
+++ b/egs/multi_cn/s5/local/train_corpus_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
@@ -42,13 +42,13 @@ cat $text | awk '{$1=""; print substr($0, 2)}' | awk -v lex=$lexicon 'BEGIN{whil
   {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<UNK> ");} } printf("\n");}' \
   > $cleantext || exit 1;
 
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | sort | uniq -c | \
    sort -nr > $dir/word.counts || exit 1;
 
 # Get counts from acoustic training transcripts, and add  one-count
 # for each word in the lexicon (but not silence, we don't want it
 # in the LM-- we'll add it optionally later).
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | \
   cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
    sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
 
@@ -58,7 +58,7 @@ cat $dir/unigram.counts  | awk '{print $2}' | get_word_map.pl "<s>" "</s>" "<UNK
 
 # note: ignore 1st field of train.txt, it's the utterance-id.
 cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1]=$2;}
-  { for(n=2;n<=NF;n++) { printf map[$n]; if(n<NF){ printf " "; } else { print ""; }}}' | gzip -c >$dir/train.gz \
+  { for(n=1;n<=NF;n++) { printf map[$n]; if(n<NF){ printf " "; } else { print ""; }}}' | gzip -c >$dir/train.gz \
    || exit 1;
 
 if [ `wc -l < $cleantext` -le 10000 ]; then
diff --git a/egs/multi_cn/s5/local/train_lms.sh b/egs/multi_cn/s5/local/train_lms.sh
index ac632538ec5..c0dfc46a298 100755
--- a/egs/multi_cn/s5/local/train_lms.sh
+++ b/egs/multi_cn/s5/local/train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
@@ -29,17 +29,18 @@ fi
 
 cleantext=$dir/text.no_oov
 
+# note: ignore 1st field of text, it's the utterance-id.
 cat $text | awk '{$1=""; print substr($0, 2)}' | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
   {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<UNK> ");} } printf("\n");}' \
   > $cleantext || exit 1;
 
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | sort | uniq -c | \
    sort -nr > $dir/word.counts || exit 1;
 
 # Get counts from acoustic training transcripts, and add  one-count
 # for each word in the lexicon (but not silence, we don't want it
 # in the LM-- we'll add it optionally later).
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | \
   cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
    sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
 
@@ -47,9 +48,8 @@ cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
 cat $dir/unigram.counts  | awk '{print $2}' | get_word_map.pl "<s>" "</s>" "<UNK>" > $dir/word_map \
    || exit 1;
 
-# note: ignore 1st field of train.txt, it's the utterance-id.
 cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1]=$2;}
-  { for(n=2;n<=NF;n++) { printf map[$n]; if(n<NF){ printf " "; } else { print ""; }}}' | gzip -c >$dir/train.gz \
+  { for(n=1;n<=NF;n++) { printf map[$n]; if(n<NF){ printf " "; } else { print ""; }}}' | gzip -c >$dir/train.gz \
    || exit 1;
 
 train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1;
diff --git a/egs/multi_cn/s5/run.sh b/egs/multi_cn/s5/run.sh
index bd03355ea61..3fb48e72263 100755
--- a/egs/multi_cn/s5/run.sh
+++ b/egs/multi_cn/s5/run.sh
@@ -1,13 +1,14 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
-# Copyright 2019 Microsoft Corporation (authors: Xingyu Na)
+# Copyright 2019-2020 Microsoft Corporation (authors: Xingyu Na)
 # Apache 2.0
 
 . ./cmd.sh
 . ./path.sh
 
 stage=0
-dbase=/mnt/data/openslr
+dbase=/mnt/data/openslr            # it is recommanded practice to provide absolute path here,
+                                   # otherwise some data downloading scripts might break.
 aidatatang_url=www.openslr.org/resources/62
 aishell_url=www.openslr.org/resources/33
 magicdata_url=www.openslr.org/resources/68
@@ -18,6 +19,9 @@ thchs_url=www.openslr.org/resources/18
 test_sets="aishell aidatatang magicdata thchs"
 corpus_lm=false   # interpolate with corpus lm
 
+has_aishell2=false  # AISHELL2 train set is not publically downloadable
+                    # with this option true, the script assumes you have it in $dbase
+
 . utils/parse_options.sh
 
 if [ $stage -le 0 ]; then
@@ -42,12 +46,21 @@ if [ $stage -le 1 ]; then
   local/magicdata_data_prep.sh $dbase/magicdata data/magicdata || exit 1;
   local/primewords_data_prep.sh $dbase/primewords data/primewords || exit 1;
   local/stcmds_data_prep.sh $dbase/stcmds data/stcmds || exit 1;
+  if $has_aishell2; then
+    local/aishell2_data_prep.sh $dbase/aishell2/iOS/data data/aishell2 || exit 1;
+  fi
 fi
 
 if [ $stage -le 2 ]; then
   # normalize transcripts
   utils/combine_data.sh data/train_combined \
     data/{aidatatang,aishell,magicdata,primewords,stcmds,thchs}/train || exit 1;
+  if $has_aishell2; then
+    mv data/train_combined data/train_combined_tmp
+    utils/combine_data.sh data/train_combined \
+      data/train_combined_tmp data/aishell2/train || exit 1;
+    rm -rf data/train_combined_tmp
+  fi
   utils/combine_data.sh data/test_combined \
     data/{aidatatang,aishell,magicdata,thchs}/{dev,test} || exit 1;
   local/prepare_dict.sh || exit 1;
@@ -89,6 +102,12 @@ if [ $stage -le 5 ]; then
     ) &
   done
   wait
+  if $has_aishell2; then
+    steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 20 \
+      data/aishell2/train exp/make_mfcc/aishell2/train $mfccdir/aishell2 || exit 1;
+    steps/compute_cmvn_stats.sh data/aishell2/train \
+      exp/make_mfcc/aishell2/train $mfccdir/aishell2 || exit 1;
+  fi
 fi
 
 if [ $stage -le 6 ]; then
@@ -205,6 +224,12 @@ if [ $stage -le 14 ]; then
   # train tri4a using all
   utils/combine_data.sh data/train_all \
     data/{aidatatang,aishell,magicdata,primewords,stcmds,thchs}/train || exit 1;
+  if $has_aishell2; then
+    mv data/train_all data/train_all_tmp
+    utils/combine_data.sh data/train_all \
+      data/train_all_tmp data/aishell2/train || exit 1;
+    rm -rf data/train_all_tmp
+  fi
 
   steps/align_fmllr.sh --cmd "$train_cmd" --nj 100 \
     data/train_all data/lang exp/tri3a exp/tri3a_ali || exit 1;
diff --git a/egs/multi_en/s5/local/ami_ihm_data_prep.sh b/egs/multi_en/s5/local/ami_ihm_data_prep.sh
index 55f8bb22d41..7c202438e6f 100755
--- a/egs/multi_en/s5/local/ami_ihm_data_prep.sh
+++ b/egs/multi_en/s5/local/ami_ihm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/ami/s5/local/ami_ihm_data_prep.sh
@@ -78,7 +78,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \
 awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp >  $dir/wav2.scp
 
 #replace path with an appropriate sox command that select single channel only
-awk '{print $1" sox -c 1 -t wavpcm -s "$2" -r 8000 -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp
+awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -r 8000 -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp
 
 # (1d) reco2file_and_channel
 cat $dir/wav.scp \
diff --git a/egs/multi_en/s5/local/ami_sdm_data_prep.sh b/egs/multi_en/s5/local/ami_sdm_data_prep.sh
index a5d55640d1e..282887b0341 100755
--- a/egs/multi_en/s5/local/ami_sdm_data_prep.sh
+++ b/egs/multi_en/s5/local/ami_sdm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/ami/s5/local/ami_sdm_data_prep.sh
diff --git a/egs/multi_en/s5/local/ami_text_prep.sh b/egs/multi_en/s5/local/ami_text_prep.sh
index fb769a0c019..72e8e770e50 100755
--- a/egs/multi_en/s5/local/ami_text_prep.sh
+++ b/egs/multi_en/s5/local/ami_text_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/ami/s5/local/ami_text_prep.sh
diff --git a/egs/multi_en/s5/local/ami_xml2text.sh b/egs/multi_en/s5/local/ami_xml2text.sh
index 49ce740d44f..fd8b496dab6 100755
--- a/egs/multi_en/s5/local/ami_xml2text.sh
+++ b/egs/multi_en/s5/local/ami_xml2text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/ami/s5/local/ami_xml2text.sh
diff --git a/egs/multi_en/s5/local/chain/run_blstm_6h.sh b/egs/multi_en/s5/local/chain/run_blstm_6h.sh
index 126d29350a1..8840b3b188e 100644
--- a/egs/multi_en/s5/local/chain/run_blstm_6h.sh
+++ b/egs/multi_en/s5/local/chain/run_blstm_6h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh.
diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh
index 96f5fdac8f3..40979b4fd5b 100755
--- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh
+++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Xiaohui Zhang
 #           2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index 62266334962..8a6371d2f44 100755
--- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 #           2018 Xiaohui Zhang
 #           2018 Vimal Manohar
@@ -107,7 +107,7 @@ lang=data/${multi}/lang_${gmm}_chain
 lang_dir=data/lang_${multi}_${gmm}_fsh_sw1_tg
 rescore_lang_dir=data/lang_${multi}_${gmm}_fsh_sw1_fg
 
-local/nnet3/run_ivector_common.sh --stage $stage --nnet3-affix "$nnet3_affix" \
+local/nnet3/run_ivector_common.sh --stage $stage \
   --multi $multi \
   --gmm $gmm \
   --speed-perturb $speed_perturb || exit 1
diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
index 79cd3eb3014..3ac4078d507 100755
--- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Xiaohui Zhang
 #           2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh
index a7170af9431..6e705fa8724 100755
--- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh
+++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Xiaohui Zhang
 #           2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
diff --git a/egs/multi_en/s5/local/cmu_tedlium_prepare_dict.sh b/egs/multi_en/s5/local/cmu_tedlium_prepare_dict.sh
index 972ce491326..16fa98ded9d 100755
--- a/egs/multi_en/s5/local/cmu_tedlium_prepare_dict.sh
+++ b/egs/multi_en/s5/local/cmu_tedlium_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Allen Guo
 #           2017  Xiaohui Zhang
diff --git a/egs/multi_en/s5/local/eval2000_data_prep.sh b/egs/multi_en/s5/local/eval2000_data_prep.sh
index cf6b6a78580..68bd015d8b6 100755
--- a/egs/multi_en/s5/local/eval2000_data_prep.sh
+++ b/egs/multi_en/s5/local/eval2000_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/eval2000_data_prep.sh
diff --git a/egs/multi_en/s5/local/fisher_data_prep.sh b/egs/multi_en/s5/local/fisher_data_prep.sh
index cf46954575c..174d86f9a17 100755
--- a/egs/multi_en/s5/local/fisher_data_prep.sh
+++ b/egs/multi_en/s5/local/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/fisher_data_prep.sh
diff --git a/egs/multi_en/s5/local/hub4_96_data_prep.sh b/egs/multi_en/s5/local/hub4_96_data_prep.sh
index f258ea7b7f5..144f88cffca 100755
--- a/egs/multi_en/s5/local/hub4_96_data_prep.sh
+++ b/egs/multi_en/s5/local/hub4_96_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/hub4_english/s5/local/data_prep/prepare_1996_bn_data.sh
@@ -6,7 +6,7 @@
 # Changes in lower level script/dir names were made
 ###########################################################################################
 
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 #               2017  Vimal Manohar
 # License: Apache 2.0
diff --git a/egs/multi_en/s5/local/hub4_97_data_prep.sh b/egs/multi_en/s5/local/hub4_97_data_prep.sh
index 096c2142c36..86b9482e4c4 100755
--- a/egs/multi_en/s5/local/hub4_97_data_prep.sh
+++ b/egs/multi_en/s5/local/hub4_97_data_prep.sh
@@ -4,7 +4,7 @@
 # No change was made
 ###########################################################################################
 
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal<jtrmal@gmail.com>)
 #               2017  Vimal Manohar
 # License: Apache 2.0
diff --git a/egs/multi_en/s5/local/hub4_en_data_prep.sh b/egs/multi_en/s5/local/hub4_en_data_prep.sh
index e8173111038..6034e012701 100755
--- a/egs/multi_en/s5/local/hub4_en_data_prep.sh
+++ b/egs/multi_en/s5/local/hub4_en_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1996/1997 English Broadcast News training data preparation (HUB4)
 
diff --git a/egs/multi_en/s5/local/librispeech_data_prep.sh b/egs/multi_en/s5/local/librispeech_data_prep.sh
index b34072a4f61..9512fe70b65 100755
--- a/egs/multi_en/s5/local/librispeech_data_prep.sh
+++ b/egs/multi_en/s5/local/librispeech_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/librispeech/s5/local/data_prep.sh
diff --git a/egs/multi_en/s5/local/librispeech_lm_decode.sh b/egs/multi_en/s5/local/librispeech_lm_decode.sh
index 7e79c788636..563f870b721 100755
--- a/egs/multi_en/s5/local/librispeech_lm_decode.sh
+++ b/egs/multi_en/s5/local/librispeech_lm_decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Allen Guo
 # Apache 2.0
diff --git a/egs/multi_en/s5/local/make_partitions.sh b/egs/multi_en/s5/local/make_partitions.sh
index 74f23ae9746..167c5086368 100755
--- a/egs/multi_en/s5/local/make_partitions.sh
+++ b/egs/multi_en/s5/local/make_partitions.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Allen Guo
 #           2017  Xiaohui Zhang
diff --git a/egs/multi_en/s5/local/nnet3/run_ivector_common.sh b/egs/multi_en/s5/local/nnet3/run_ivector_common.sh
index d36cb0e6083..1060d101aec 100755
--- a/egs/multi_en/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/multi_en/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh
diff --git a/egs/multi_en/s5/local/nnet3/run_tdnn.sh b/egs/multi_en/s5/local/nnet3/run_tdnn.sh
index 266c4af52d7..b8441c99009 100755
--- a/egs/multi_en/s5/local/nnet3/run_tdnn.sh
+++ b/egs/multi_en/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/nnet3/run_tdnn.sh
diff --git a/egs/multi_en/s5/local/prepare_dict.sh b/egs/multi_en/s5/local/prepare_dict.sh
index 8bf54a3dddc..8501b14de10 100755
--- a/egs/multi_en/s5/local/prepare_dict.sh
+++ b/egs/multi_en/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Intellisist, Inc. (Author: Navneeth K)
 #           2017  Xiaohui Zhang
diff --git a/egs/multi_en/s5/local/rt03_data_prep.sh b/egs/multi_en/s5/local/rt03_data_prep.sh
index aa1e2ba4cc2..ea252b7753a 100755
--- a/egs/multi_en/s5/local/rt03_data_prep.sh
+++ b/egs/multi_en/s5/local/rt03_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/rt03_data_prep.sh
diff --git a/egs/multi_en/s5/local/score.sh b/egs/multi_en/s5/local/score.sh
index cada400acda..9bc36b3197c 100755
--- a/egs/multi_en/s5/local/score.sh
+++ b/egs/multi_en/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/score.sh
diff --git a/egs/multi_en/s5/local/score_sclite.sh b/egs/multi_en/s5/local/score_sclite.sh
index 07dd63950d5..2a10ba2d1cf 100755
--- a/egs/multi_en/s5/local/score_sclite.sh
+++ b/egs/multi_en/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/score_sclite.sh
diff --git a/egs/multi_en/s5/local/swbd1_data_download.sh b/egs/multi_en/s5/local/swbd1_data_download.sh
index 0c28e480a60..aa85cd7176a 100755
--- a/egs/multi_en/s5/local/swbd1_data_download.sh
+++ b/egs/multi_en/s5/local/swbd1_data_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/swbd1_data_download.sh
@@ -44,7 +44,7 @@ if [ ! -d $SWBD_DIR/transcriptions/swb_ms98_transcriptions ]; then
     if [ ! -d swb_ms98_transcriptions ]; then
       echo " *** Downloading trascriptions and dictionary ***" 
       wget http://www.openslr.org/resources/5/switchboard_word_alignments.tar.gz ||
-      wget http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
+      wget -c http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
       tar -xf switchboard_word_alignments.tar.gz
     fi
   )
diff --git a/egs/multi_en/s5/local/swbd1_data_prep.sh b/egs/multi_en/s5/local/swbd1_data_prep.sh
index 4c1b6c7a9e6..551e870ae62 100755
--- a/egs/multi_en/s5/local/swbd1_data_prep.sh
+++ b/egs/multi_en/s5/local/swbd1_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/swbd1_data_prep.sh
diff --git a/egs/multi_en/s5/local/swbd1_prepare_dict.sh b/egs/multi_en/s5/local/swbd1_prepare_dict.sh
index 78e208f720d..87292bef94d 100755
--- a/egs/multi_en/s5/local/swbd1_prepare_dict.sh
+++ b/egs/multi_en/s5/local/swbd1_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/swbd/s5c/local/swbd1_prepare_dict.sh
diff --git a/egs/multi_en/s5/local/tedlium_lm_decode.sh b/egs/multi_en/s5/local/tedlium_lm_decode.sh
index 1df850648f8..e9755f47a1e 100755
--- a/egs/multi_en/s5/local/tedlium_lm_decode.sh
+++ b/egs/multi_en/s5/local/tedlium_lm_decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Allen Guo
 # Apache 2.0
diff --git a/egs/multi_en/s5/local/tedlium_prepare_data.sh b/egs/multi_en/s5/local/tedlium_prepare_data.sh
index 22f79b0b117..2f240c953ff 100755
--- a/egs/multi_en/s5/local/tedlium_prepare_data.sh
+++ b/egs/multi_en/s5/local/tedlium_prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/tedlium/s5_r2/local/prepare_data.sh
diff --git a/egs/multi_en/s5/local/train_lms.sh b/egs/multi_en/s5/local/train_lms.sh
index 02fd66e0368..302ff75c8ff 100755
--- a/egs/multi_en/s5/local/train_lms.sh
+++ b/egs/multi_en/s5/local/train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/fisher_swbd/s5/local/fisher_train_lms.sh
diff --git a/egs/multi_en/s5/local/wsj_data_prep.sh b/egs/multi_en/s5/local/wsj_data_prep.sh
index cc11f179eca..e42b9f09cd3 100755
--- a/egs/multi_en/s5/local/wsj_data_prep.sh
+++ b/egs/multi_en/s5/local/wsj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/wsj/s5/local/wsj_data_prep.sh
diff --git a/egs/multi_en/s5/local/wsj_format_data.sh b/egs/multi_en/s5/local/wsj_format_data.sh
index 00ef0f49fd5..a54ad9c8d78 100755
--- a/egs/multi_en/s5/local/wsj_format_data.sh
+++ b/egs/multi_en/s5/local/wsj_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ###########################################################################################
 # This script was copied from egs/wsj/s5/local/wsj_format_data.sh
diff --git a/egs/multi_en/s5/run.sh b/egs/multi_en/s5/run.sh
index 034ffeb4e66..229c0939d5c 100755
--- a/egs/multi_en/s5/run.sh
+++ b/egs/multi_en/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Allen Guo
 #           2017  Xiaohui Zhang
diff --git a/egs/ptb/s5/local/rnnlm/download_ptb.sh b/egs/ptb/s5/local/rnnlm/download_ptb.sh
index 858e152bff9..129c90e10b0 100755
--- a/egs/ptb/s5/local/rnnlm/download_ptb.sh
+++ b/egs/ptb/s5/local/rnnlm/download_ptb.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . path.sh
 data_dir=data/ptb
diff --git a/egs/ptb/s5/local/rnnlm/prepare_rnnlm_data.sh b/egs/ptb/s5/local/rnnlm/prepare_rnnlm_data.sh
index 3e3b6087495..be3b6cc6848 100755
--- a/egs/ptb/s5/local/rnnlm/prepare_rnnlm_data.sh
+++ b/egs/ptb/s5/local/rnnlm/prepare_rnnlm_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # To be run from the directory egs/ptb/s5.
 
diff --git a/egs/ptb/s5/local/rnnlm/train_backoff_lm.sh b/egs/ptb/s5/local/rnnlm/train_backoff_lm.sh
index 17bdab92878..1281693db10 100644
--- a/egs/ptb/s5/local/rnnlm/train_backoff_lm.sh
+++ b/egs/ptb/s5/local/rnnlm/train_backoff_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is to train a small, pruned n-gram backoff LM to be used for sampling
 # purposes during RNNLM training.  We ue pocolm for this because it's good at pruning,
diff --git a/egs/ptb/s5/local/rnnlm/train_backoff_lm2.sh b/egs/ptb/s5/local/rnnlm/train_backoff_lm2.sh
index 98eb1b64e21..28b53fef079 100644
--- a/egs/ptb/s5/local/rnnlm/train_backoff_lm2.sh
+++ b/egs/ptb/s5/local/rnnlm/train_backoff_lm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is to train a small, pruned n-gram backoff LM to be used for sampling
 # purposes during RNNLM training.  It uses the C++ tool that we wrote for this
diff --git a/egs/ptb/s5/local/rnnlm/tuning/run_tdnn_a.sh b/egs/ptb/s5/local/rnnlm/tuning/run_tdnn_a.sh
index f0cac23231e..94065ace401 100755
--- a/egs/ptb/s5/local/rnnlm/tuning/run_tdnn_a.sh
+++ b/egs/ptb/s5/local/rnnlm/tuning/run_tdnn_a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # To be run from the directory egs/ptb/s5.
 # This is to be done after local/prepare_rnnlm_data.sh.
diff --git a/egs/ptb/s5/run.sh b/egs/ptb/s5/run.sh
index d62be6871f3..2dcd8101860 100755
--- a/egs/ptb/s5/run.sh
+++ b/egs/ptb/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 stage=0
 
diff --git a/egs/reverb/s5/local/chain/compare_wer.sh b/egs/reverb/s5/local/chain/compare_wer.sh
index cd6be14ed88..736a3177f17 100755
--- a/egs/reverb/s5/local/chain/compare_wer.sh
+++ b/egs/reverb/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh
index c8b4997161e..a6767db3652 100755
--- a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index 4723400c76b..294d3d505e1 100755
--- a/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
diff --git a/egs/reverb/s5/local/compute_se_scores.sh b/egs/reverb/s5/local/compute_se_scores.sh
index 8168c2c46a2..4c6b8c2da0e 100755
--- a/egs/reverb/s5/local/compute_se_scores.sh
+++ b/egs/reverb/s5/local/compute_se_scores.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/reverb/s5/local/download_se_eval_tool.sh b/egs/reverb/s5/local/download_se_eval_tool.sh
index 0d7bb8305ea..aa29766b927 100755
--- a/egs/reverb/s5/local/download_se_eval_tool.sh
+++ b/egs/reverb/s5/local/download_se_eval_tool.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # This script downloads the official REVERB challenge SE scripts and SRMR toolbox
 # This script also downloads and compiles PESQ
diff --git a/egs/reverb/s5/local/generate_data.sh b/egs/reverb/s5/local/generate_data.sh
index 3228f0e1b3c..91c9c96a260 100755
--- a/egs/reverb/s5/local/generate_data.sh
+++ b/egs/reverb/s5/local/generate_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2018  Johns Hopkins University (Author: Shinji Watanabe)
 # Apache 2.0
diff --git a/egs/reverb/s5/local/get_results.sh b/egs/reverb/s5/local/get_results.sh
index 8867961dcdd..5945109708a 100755
--- a/egs/reverb/s5/local/get_results.sh
+++ b/egs/reverb/s5/local/get_results.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # "Our baselines"
 echo "########################################"
diff --git a/egs/reverb/s5/local/nnet3/compare_wer.sh b/egs/reverb/s5/local/nnet3/compare_wer.sh
index 095e85cc338..4888de1f159 100755
--- a/egs/reverb/s5/local/nnet3/compare_wer.sh
+++ b/egs/reverb/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/reverb/s5/local/nnet3/run_ivector_common.sh b/egs/reverb/s5/local/nnet3/run_ivector_common.sh
index 3af3ad77565..4963ce3cf7c 100755
--- a/egs/reverb/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/reverb/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/reverb/s5/local/prepare_real_data.sh b/egs/reverb/s5/local/prepare_real_data.sh
index 2da51b9786b..5cf3ec56b26 100755
--- a/egs/reverb/s5/local/prepare_real_data.sh
+++ b/egs/reverb/s5/local/prepare_real_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe)
 # Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh
index 8757021ddd7..a229ffd5115 100755
--- a/egs/reverb/s5/local/prepare_simu_data.sh
+++ b/egs/reverb/s5/local/prepare_simu_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe)
 # Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
diff --git a/egs/reverb/s5/local/run_beamform.sh b/egs/reverb/s5/local/run_beamform.sh
index 1c8aade7287..ab62b7a6fdc 100755
--- a/egs/reverb/s5/local/run_beamform.sh
+++ b/egs/reverb/s5/local/run_beamform.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
 # Copyright 2018, Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
diff --git a/egs/reverb/s5/local/run_wpe.sh b/egs/reverb/s5/local/run_wpe.sh
index d1ea56c6c55..5ed4a5b9832 100755
--- a/egs/reverb/s5/local/run_wpe.sh
+++ b/egs/reverb/s5/local/run_wpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
 # Apache 2.0
 
diff --git a/egs/reverb/s5/local/score.sh b/egs/reverb/s5/local/score.sh
index 66bc976333f..87867679fd6 100755
--- a/egs/reverb/s5/local/score.sh
+++ b/egs/reverb/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/reverb/s5/local/wsj_prepare_beep_dict.sh b/egs/reverb/s5/local/wsj_prepare_beep_dict.sh
index 879ef956844..73e434373c8 100755
--- a/egs/reverb/s5/local/wsj_prepare_beep_dict.sh
+++ b/egs/reverb/s5/local/wsj_prepare_beep_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013 MERL (author: Felix Weninger)
 # Contains some code by Microsoft Corporation, Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/reverb/s5/local/wsjcam0_data_prep.sh b/egs/reverb/s5/local/wsjcam0_data_prep.sh
index cf87aa355d4..65fe9c7f3af 100755
--- a/egs/reverb/s5/local/wsjcam0_data_prep.sh
+++ b/egs/reverb/s5/local/wsjcam0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013 MERL (author: Felix Weninger)
 # Contains some code by Microsoft Corporation, Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/reverb/s5/local/wsjcam0_format_data.sh b/egs/reverb/s5/local/wsjcam0_format_data.sh
index 883cb20ed0e..26b5172f338 100755
--- a/egs/reverb/s5/local/wsjcam0_format_data.sh
+++ b/egs/reverb/s5/local/wsjcam0_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013 MERL (author: Felix Weninger)
 # Contains some code by Microsoft Corporation, Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh
index 999ec98e637..a7e3dd75167 100755
--- a/egs/reverb/s5/run.sh
+++ b/egs/reverb/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013-2014 MERL (author: Felix Weninger and Shinji Watanabe)
 #                     Johns Hopkins University (author: Szu-Jui Chen)
diff --git a/egs/rimes/v1/local/chain/compare_wer.sh b/egs/rimes/v1/local/chain/compare_wer.sh
index 4a2cc29481c..ae575b29d4f 100755
--- a/egs/rimes/v1/local/chain/compare_wer.sh
+++ b/egs/rimes/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index 33eb9dcb98c..b8be489e7d1 100755
--- a/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1a is a 6 cnn layer 3 tdnn layer model with dropout, l2-regularization, batch-normalization
 
diff --git a/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 9d28a41316d..da6a0bf5cb2 100755
--- a/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/rimes/v1/local/extract_features.sh b/egs/rimes/v1/local/extract_features.sh
index ec3bc8a268c..e4adf8bf85d 100755
--- a/egs/rimes/v1/local/extract_features.sh
+++ b/egs/rimes/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
diff --git a/egs/rimes/v1/local/prepare_data.sh b/egs/rimes/v1/local/prepare_data.sh
index 502718e7777..232ecf7c9ef 100755
--- a/egs/rimes/v1/local/prepare_data.sh
+++ b/egs/rimes/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script creates traing and validations splits, downloads text corpus for language modeling,
 #  prepares the training, validation and test data for rimes dataset 
diff --git a/egs/rimes/v1/local/score.sh b/egs/rimes/v1/local/score.sh
index 0cfbda9b556..bb325ab793c 100755
--- a/egs/rimes/v1/local/score.sh
+++ b/egs/rimes/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 cmd=run.pl
diff --git a/egs/rimes/v1/local/score_paragraph.sh b/egs/rimes/v1/local/score_paragraph.sh
index c6ef4da1d5b..810e9a6ccc7 100755
--- a/egs/rimes/v1/local/score_paragraph.sh
+++ b/egs/rimes/v1/local/score_paragraph.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 min_lmwt=7
 max_lmwt=17
diff --git a/egs/rimes/v1/local/train_lm.sh b/egs/rimes/v1/local/train_lm.sh
index 51927b7a97e..29579cbca23 100755
--- a/egs/rimes/v1/local/train_lm.sh
+++ b/egs/rimes/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/rimes/v1/run_end2end.sh b/egs/rimes/v1/run_end2end.sh
index d3e3da2be13..a56d54f9727 100755
--- a/egs/rimes/v1/run_end2end.sh
+++ b/egs/rimes/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018    Hossein Hadian
 #                   Ashish Arora
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh
index c393a9aa28b..035c607ecc1 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is modified from run_tdnn_5f.sh, to use the old topology, as a baseline
 # to test the modified transition-model code (by which we hope to be able to
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh
index 131bcf98de9..f608c13260b 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is a modified version of run_tdnn_5g.sh. It uses
 # the new transition model and the python version of training scripts.
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh
index db5944fdbea..32b50cde7f4 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is a modified version of run_tdnn_5n.sh. It uses
 # a new configs convention for chain model after kaldi 5.2.
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5p.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5p.sh
index 37073a53eba..67f67fbdfd8 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_5p.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is a modified version of run_tdnn_5o.sh. It uses online-cmn
 # for input features, both for ivector extractor and the chain model.
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh
index f77ebb2a071..c457a75d6fc 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script uses weight transfer as a transfer learning method to transfer
 # already trained neural net model on wsj to rm.
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh
index e38fa0b231c..8f15a1ed1d9 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # _1b is as _1a, but different as follows
 # 1) It uses wsj phone set phones.txt and new lexicon generated using word pronunciation
 #    in swj lexincon.txt. rm words, that are not presented in wsj, are added as oov
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh
index 04bef13fab0..4d1cb76bdc1 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # _1c is as _1b but it uses source chain-trained DNN model instead of GMM model
 # to generate alignments for RM using WSJ model.
 
diff --git a/egs/rm/s5/local/nnet/run_autoencoder.sh b/egs/rm/s5/local/nnet/run_autoencoder.sh
index d9a309deee0..a0a0be33237 100755
--- a/egs/rm/s5/local/nnet/run_autoencoder.sh
+++ b/egs/rm/s5/local/nnet/run_autoencoder.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_blocksoftmax.sh b/egs/rm/s5/local/nnet/run_blocksoftmax.sh
index 175a6021778..81a5ecabafa 100755
--- a/egs/rm/s5/local/nnet/run_blocksoftmax.sh
+++ b/egs/rm/s5/local/nnet/run_blocksoftmax.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_blstm.sh b/egs/rm/s5/local/nnet/run_blstm.sh
index ce0baecb5c6..b2fd495f851 100755
--- a/egs/rm/s5/local/nnet/run_blstm.sh
+++ b/egs/rm/s5/local/nnet/run_blstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_cnn.sh b/egs/rm/s5/local/nnet/run_cnn.sh
index 8c5730a1c85..b57d6e47111 100755
--- a/egs/rm/s5/local/nnet/run_cnn.sh
+++ b/egs/rm/s5/local/nnet/run_cnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_cnn2d.sh b/egs/rm/s5/local/nnet/run_cnn2d.sh
index be17bce7a57..e493cf44497 100755
--- a/egs/rm/s5/local/nnet/run_cnn2d.sh
+++ b/egs/rm/s5/local/nnet/run_cnn2d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_dnn.sh b/egs/rm/s5/local/nnet/run_dnn.sh
index c2ba26970ad..9059cff43c2 100755
--- a/egs/rm/s5/local/nnet/run_dnn.sh
+++ b/egs/rm/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_dnn_fbank.sh b/egs/rm/s5/local/nnet/run_dnn_fbank.sh
index ff6916346c8..bfc0bdd3ed4 100755
--- a/egs/rm/s5/local/nnet/run_dnn_fbank.sh
+++ b/egs/rm/s5/local/nnet/run_dnn_fbank.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_dnn_fbank_relu.sh b/egs/rm/s5/local/nnet/run_dnn_fbank_relu.sh
index 11b1547051d..b3b544ad3f0 100755
--- a/egs/rm/s5/local/nnet/run_dnn_fbank_relu.sh
+++ b/egs/rm/s5/local/nnet/run_dnn_fbank_relu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_dummy_ivec.sh b/egs/rm/s5/local/nnet/run_dummy_ivec.sh
index 956d22d2e54..f92e4460af8 100755
--- a/egs/rm/s5/local/nnet/run_dummy_ivec.sh
+++ b/egs/rm/s5/local/nnet/run_dummy_ivec.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_lstm.sh b/egs/rm/s5/local/nnet/run_lstm.sh
index 48e8592fd7b..6985443b86b 100755
--- a/egs/rm/s5/local/nnet/run_lstm.sh
+++ b/egs/rm/s5/local/nnet/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_multilingual.sh b/egs/rm/s5/local/nnet/run_multilingual.sh
index 126f616c34d..74c722cceb9 100755
--- a/egs/rm/s5/local/nnet/run_multilingual.sh
+++ b/egs/rm/s5/local/nnet/run_multilingual.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  University of Illinois (Author: Amit Das)
 # Copyright 2012-2015  Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/rm/s5/local/nnet2/run_4a.sh b/egs/rm/s5/local/nnet2/run_4a.sh
index 42695abdccb..81b3fe08b4d 100755
--- a/egs/rm/s5/local/nnet2/run_4a.sh
+++ b/egs/rm/s5/local/nnet2/run_4a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/rm/s5/local/nnet2/run_4b.sh b/egs/rm/s5/local/nnet2/run_4b.sh
index 741340412c0..a160799a804 100755
--- a/egs/rm/s5/local/nnet2/run_4b.sh
+++ b/egs/rm/s5/local/nnet2/run_4b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 stage=0
diff --git a/egs/rm/s5/local/nnet2/run_4b_gpu.sh b/egs/rm/s5/local/nnet2/run_4b_gpu.sh
index 9cde9f1694e..904a78e91a8 100755
--- a/egs/rm/s5/local/nnet2/run_4b_gpu.sh
+++ b/egs/rm/s5/local/nnet2/run_4b_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 stage=0
diff --git a/egs/rm/s5/local/nnet2/run_4c.sh b/egs/rm/s5/local/nnet2/run_4c.sh
index 7a2bd0360d7..28a7bb27e33 100755
--- a/egs/rm/s5/local/nnet2/run_4c.sh
+++ b/egs/rm/s5/local/nnet2/run_4c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This is neural net training on top of adapted 40-dimensional features.
diff --git a/egs/rm/s5/local/nnet2/run_4d.sh b/egs/rm/s5/local/nnet2/run_4d.sh
index e7765fb28d5..425f82dc51d 100755
--- a/egs/rm/s5/local/nnet2/run_4d.sh
+++ b/egs/rm/s5/local/nnet2/run_4d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # local/nnet2/run_4d.sh is the new, faster version of the p-norm training script.
diff --git a/egs/rm/s5/local/nnet2/run_4d2.sh b/egs/rm/s5/local/nnet2/run_4d2.sh
index 5f64b3aef2e..e9d60d134a0 100755
--- a/egs/rm/s5/local/nnet2/run_4d2.sh
+++ b/egs/rm/s5/local/nnet2/run_4d2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 4d2 is as 4d but adding perturbed training with multiplier=1.0
 
diff --git a/egs/rm/s5/local/nnet2/run_4d3.sh b/egs/rm/s5/local/nnet2/run_4d3.sh
index 3e486acabb4..d204092a7d1 100755
--- a/egs/rm/s5/local/nnet2/run_4d3.sh
+++ b/egs/rm/s5/local/nnet2/run_4d3.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_4d3.sh is as run_4d.sh, but using a newer version of the scripts that
diff --git a/egs/rm/s5/local/nnet2/run_4e.sh b/egs/rm/s5/local/nnet2/run_4e.sh
index 425af853bf1..85ba114adda 100755
--- a/egs/rm/s5/local/nnet2/run_4e.sh
+++ b/egs/rm/s5/local/nnet2/run_4e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/rm/s5/local/nnet2/run_4e_gpu.sh b/egs/rm/s5/local/nnet2/run_4e_gpu.sh
index 9fe72669802..df471cca273 100755
--- a/egs/rm/s5/local/nnet2/run_4e_gpu.sh
+++ b/egs/rm/s5/local/nnet2/run_4e_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is GPU based pnorm neural net ensemble training on top of adapted 40-dimensional features.
 
diff --git a/egs/rm/s5/local/nnet2/run_5c.sh b/egs/rm/s5/local/nnet2/run_5c.sh
index edcf366f0fd..77beac9c4cd 100755
--- a/egs/rm/s5/local/nnet2/run_5c.sh
+++ b/egs/rm/s5/local/nnet2/run_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is neural net training on top of adapted 40-dimensional features.
 # This version of the script uses GPUs.  We distinguish it by putting "_gpu"
diff --git a/egs/rm/s5/local/nnet2/run_5c_gpu.sh b/egs/rm/s5/local/nnet2/run_5c_gpu.sh
index 219e2cb808e..edabae780f8 100755
--- a/egs/rm/s5/local/nnet2/run_5c_gpu.sh
+++ b/egs/rm/s5/local/nnet2/run_5c_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script demonstrates discriminative training of neural nets.
diff --git a/egs/rm/s5/local/nnet2/run_5d.sh b/egs/rm/s5/local/nnet2/run_5d.sh
index 3617ea0b126..8e3321435ec 100755
--- a/egs/rm/s5/local/nnet2/run_5d.sh
+++ b/egs/rm/s5/local/nnet2/run_5d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script demonstrates discriminative training of p-norm neural nets.
diff --git a/egs/rm/s5/local/nnet2/run_5d_gpu.sh b/egs/rm/s5/local/nnet2/run_5d_gpu.sh
index f83cd3db20a..e665ca7e3f1 100755
--- a/egs/rm/s5/local/nnet2/run_5d_gpu.sh
+++ b/egs/rm/s5/local/nnet2/run_5d_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script demonstrates discriminative training of p-norm neural nets.
diff --git a/egs/rm/s5/local/nnet2/run_5e_gpu.sh b/egs/rm/s5/local/nnet2/run_5e_gpu.sh
index 37c9fb4238d..9d0f43f9279 100755
--- a/egs/rm/s5/local/nnet2/run_5e_gpu.sh
+++ b/egs/rm/s5/local/nnet2/run_5e_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script demonstrates discriminative training of ensemble-trained p-norm neural nets.
diff --git a/egs/rm/s5/local/online/run_gmm.sh b/egs/rm/s5/local/online/run_gmm.sh
index 90a2a48437d..b9bf04dfd56 100755
--- a/egs/rm/s5/local/online/run_gmm.sh
+++ b/egs/rm/s5/local/online/run_gmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/rm/s5/local/online/run_gmm_pitch.sh b/egs/rm/s5/local/online/run_gmm_pitch.sh
index 65388fda3c2..ad87935d281 100755
--- a/egs/rm/s5/local/online/run_gmm_pitch.sh
+++ b/egs/rm/s5/local/online/run_gmm_pitch.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/rm/s5/local/online/run_nnet2.sh b/egs/rm/s5/local/online/run_nnet2.sh
index 243be25764e..035ab75678f 100755
--- a/egs/rm/s5/local/online/run_nnet2.sh
+++ b/egs/rm/s5/local/online/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/rm/s5/local/online/run_nnet2_baseline.sh b/egs/rm/s5/local/online/run_nnet2_baseline.sh
index de977b29f43..cc4708b6a33 100755
--- a/egs/rm/s5/local/online/run_nnet2_baseline.sh
+++ b/egs/rm/s5/local/online/run_nnet2_baseline.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # this is a baseline for ./run_nnet2.sh, without
diff --git a/egs/rm/s5/local/online/run_nnet2_common.sh b/egs/rm/s5/local/online/run_nnet2_common.sh
index dfacfcf9c9f..6a721f93e1a 100755
--- a/egs/rm/s5/local/online/run_nnet2_common.sh
+++ b/egs/rm/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # This script extracts mfcc features using mfcc_config and trains ubm model and
 # ivector extractor and extracts ivector for train and test.
 . ./cmd.sh
diff --git a/egs/rm/s5/local/online/run_nnet2_multisplice.sh b/egs/rm/s5/local/online/run_nnet2_multisplice.sh
index a05d6856a0e..e8bb0762710 100755
--- a/egs/rm/s5/local/online/run_nnet2_multisplice.sh
+++ b/egs/rm/s5/local/online/run_nnet2_multisplice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/rm/s5/local/online/run_nnet2_multisplice_disc.sh b/egs/rm/s5/local/online/run_nnet2_multisplice_disc.sh
index c1d424ab58b..044e32c0891 100755
--- a/egs/rm/s5/local/online/run_nnet2_multisplice_disc.sh
+++ b/egs/rm/s5/local/online/run_nnet2_multisplice_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is to be run after run_nnet2_multisplice.sh.
 # It demonstrates discriminative training for the online-nnet2 models
diff --git a/egs/rm/s5/local/online/run_nnet2_perturbed.sh b/egs/rm/s5/local/online/run_nnet2_perturbed.sh
index 5583ad745ea..cede0620b5a 100755
--- a/egs/rm/s5/local/online/run_nnet2_perturbed.sh
+++ b/egs/rm/s5/local/online/run_nnet2_perturbed.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/rm/s5/local/online/run_nnet2_wsj.sh b/egs/rm/s5/local/online/run_nnet2_wsj.sh
index 5a5b293f790..79bb22866e1 100755
--- a/egs/rm/s5/local/online/run_nnet2_wsj.sh
+++ b/egs/rm/s5/local/online/run_nnet2_wsj.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # note: see the newer, better script run_nnet2_wsj_joint.sh
 
diff --git a/egs/rm/s5/local/online/run_nnet2_wsj_joint.sh b/egs/rm/s5/local/online/run_nnet2_wsj_joint.sh
index 68a25f49b3a..6e108065a88 100755
--- a/egs/rm/s5/local/online/run_nnet2_wsj_joint.sh
+++ b/egs/rm/s5/local/online/run_nnet2_wsj_joint.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is the latest version of training that combines RM and WSJ, in a setup where
 # there are no shared phones (so it's like a multilingual setup).
diff --git a/egs/rm/s5/local/online/run_nnet2_wsj_joint_disc.sh b/egs/rm/s5/local/online/run_nnet2_wsj_joint_disc.sh
index c7d31427e8f..87808e4c4a0 100755
--- a/egs/rm/s5/local/online/run_nnet2_wsj_joint_disc.sh
+++ b/egs/rm/s5/local/online/run_nnet2_wsj_joint_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # this script is discriminative training after multi-language training (as
diff --git a/egs/rm/s5/local/prepare_wsj_rm_lang.sh b/egs/rm/s5/local/prepare_wsj_rm_lang.sh
index fd8cb958925..9be949e887b 100755
--- a/egs/rm/s5/local/prepare_wsj_rm_lang.sh
+++ b/egs/rm/s5/local/prepare_wsj_rm_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 Pegah Ghahremani
 
 # This script prepares a dictionary for wsj-to-rm transfer learning experiment,
diff --git a/egs/rm/s5/local/rm_data_prep.sh b/egs/rm/s5/local/rm_data_prep.sh
index 9fe759aef4a..4c7fa015f93 100755
--- a/egs/rm/s5/local/rm_data_prep.sh
+++ b/egs/rm/s5/local/rm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/rm/s5/local/rm_prepare_grammar.sh b/egs/rm/s5/local/rm_prepare_grammar.sh
index 20c31b7d208..4255e48eb77 100755
--- a/egs/rm/s5/local/rm_prepare_grammar.sh
+++ b/egs/rm/s5/local/rm_prepare_grammar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/rm/s5/local/rm_prepare_grammar_ug.sh b/egs/rm/s5/local/rm_prepare_grammar_ug.sh
index 427635caadc..e42efd22aa5 100755
--- a/egs/rm/s5/local/rm_prepare_grammar_ug.sh
+++ b/egs/rm/s5/local/rm_prepare_grammar_ug.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/rm/s5/local/run_dnn_convert_nnet2.sh b/egs/rm/s5/local/run_dnn_convert_nnet2.sh
index 664ecf3f80b..72c3b83bbc2 100755
--- a/egs/rm/s5/local/run_dnn_convert_nnet2.sh
+++ b/egs/rm/s5/local/run_dnn_convert_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script demonstrates some commands that you could run after run_dnn.sh,
 # that relate to conversion to the nnet2 model format.
diff --git a/egs/rm/s5/local/run_nnet2.sh b/egs/rm/s5/local/run_nnet2.sh
index 1d874324856..3605b6dcbac 100755
--- a/egs/rm/s5/local/run_nnet2.sh
+++ b/egs/rm/s5/local/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # You don't have to run all these.  
 # you can pick and choose.  Look at the RESULTS file..
diff --git a/egs/rm/s5/local/run_pitch.sh b/egs/rm/s5/local/run_pitch.sh
index d123fc8901b..cab78b28305 100755
--- a/egs/rm/s5/local/run_pitch.sh
+++ b/egs/rm/s5/local/run_pitch.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is like ../run.sh but with pitch; it's included to demonstrate the
 # online-decoding with pitch.
diff --git a/egs/rm/s5/local/run_raw_fmllr.sh b/egs/rm/s5/local/run_raw_fmllr.sh
index e02002aa1d0..2889cf1f5c5 100755
--- a/egs/rm/s5/local/run_raw_fmllr.sh
+++ b/egs/rm/s5/local/run_raw_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/rm/s5/local/run_sgmm2.sh b/egs/rm/s5/local/run_sgmm2.sh
index 95a40141892..808a52dc95c 100755
--- a/egs/rm/s5/local/run_sgmm2.sh
+++ b/egs/rm/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is as run_sgmm.sh but using the "sgmm2" code, which uses "state-clustered tied mixtures"
 # and the symmetric SGMM, and one or two other small changes (e.g. no updating of M for a few
diff --git a/egs/rm/s5/local/run_sgmm2x.sh b/egs/rm/s5/local/run_sgmm2x.sh
index 00730697693..1c76f57754e 100755
--- a/egs/rm/s5/local/run_sgmm2x.sh
+++ b/egs/rm/s5/local/run_sgmm2x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is as run_sgmm2.sh but excluding the "speaker-dependent weights",
 # so not doing the symmetric SGMM.
diff --git a/egs/rm/s5/local/run_sgmm_multiling.sh b/egs/rm/s5/local/run_sgmm_multiling.sh
index 42369cd2937..a3e138a64eb 100755
--- a/egs/rm/s5/local/run_sgmm_multiling.sh
+++ b/egs/rm/s5/local/run_sgmm_multiling.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Multilingual setup for SGMMs.
 # Caution: this is just a stub, intended to show some others what to do, it
diff --git a/egs/rm/s5/local/run_vtln.sh b/egs/rm/s5/local/run_vtln.sh
index 793829653d2..032884d50a9 100755
--- a/egs/rm/s5/local/run_vtln.sh
+++ b/egs/rm/s5/local/run_vtln.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This scripts tests the VTLN estimation where the system used to get the
 # VTLN warps is based on delta+delta-deltas.
diff --git a/egs/rm/s5/local/run_vtln2.sh b/egs/rm/s5/local/run_vtln2.sh
index b87030d2e3d..5f02aeb1d73 100755
--- a/egs/rm/s5/local/run_vtln2.sh
+++ b/egs/rm/s5/local/run_vtln2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 featdir=mfcc
diff --git a/egs/rm/s5/local/test_decoders.sh b/egs/rm/s5/local/test_decoders.sh
index 2b1d4172139..d4080351828 100755
--- a/egs/rm/s5/local/test_decoders.sh
+++ b/egs/rm/s5/local/test_decoders.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 dir=exp/tri1/decode/tmp
diff --git a/egs/rm/s5/run.sh b/egs/rm/s5/run.sh
index 61dcaa0e34a..2a8e5add17c 100755
--- a/egs/rm/s5/run.sh
+++ b/egs/rm/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e # exit on error
diff --git a/egs/sitw/v1/local/make_sitw.sh b/egs/sitw/v1/local/make_sitw.sh
index 7c0bcd0fea1..699b6f7cea2 100755
--- a/egs/sitw/v1/local/make_sitw.sh
+++ b/egs/sitw/v1/local/make_sitw.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyrigh       2017  Ignacio Viñals
 #           2017-2018  David Snyder
 #
diff --git a/egs/sitw/v1/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/sitw/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
index 480b2cc2fe8..ebf0a2cd21f 100755
--- a/egs/sitw/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
+++ b/egs/sitw/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copied from egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh (commit 3ea534070fd2cccd2e4ee21772132230033022ce).
 #
diff --git a/egs/sitw/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/sitw/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
index 892c1ad55bd..54a226e1fc9 100755
--- a/egs/sitw/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
+++ b/egs/sitw/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright      2017   David Snyder
 #                2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2017   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/sitw/v1/run.sh b/egs/sitw/v1/run.sh
index 797451df263..592d9f213a0 100755
--- a/egs/sitw/v1/run.sh
+++ b/egs/sitw/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #             2017   Johns Hopkins University (Author: Daniel Povey)
 #        2017-2018   David Snyder
diff --git a/egs/sitw/v2/run.sh b/egs/sitw/v2/run.sh
index aad58e4a853..7f382dc5fd5 100755
--- a/egs/sitw/v2/run.sh
+++ b/egs/sitw/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017   Johns Hopkins University (Author: Daniel Povey)
 #              2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #              2018   Ewald Enzinger
diff --git a/egs/snips/README.txt b/egs/snips/README.txt
new file mode 100644
index 00000000000..6eb871140fa
--- /dev/null
+++ b/egs/snips/README.txt
@@ -0,0 +1,15 @@
+
+ The SNIPS dataset is a ~54-hour corpus of wake word corpus covering 3300 speakers.
+ The wake word is "Hey Snips" pronounced with no pause between the two words.
+ It contains a large variety of English accents and recording environments.
+ Negative samples have been recorded in the same conditions than wake-word utterances.
+ To download the dataset you need to follow the instructions on
+ https://github.com/snipsco/keyword-spotting-research-datasets. It is provided
+ by Snips, Paris, France (https://snips.ai)
+
+ The recipe is in v1/
+
+ The E2E LF-MMI recipe does not require any prior alignments for training
+ LF-MMI, making the alignment more flexible during training. It can be optionally
+ followed by a regular LF-MMI training to further improve the performance.
+
diff --git a/egs/snips/v1/cmd.sh b/egs/snips/v1/cmd.sh
new file mode 100644
index 00000000000..fc5d4aa9e1c
--- /dev/null
+++ b/egs/snips/v1/cmd.sh
@@ -0,0 +1,24 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl"
+export decode_cmd="queue.pl --mem 4G"
+# the use of cuda_cmd is deprecated, used only in 'nnet1',
+export cuda_cmd="queue.pl --gpu 1"
+
+if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then
+  queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+  export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+  export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+  export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
+
diff --git a/egs/snips/v1/conf/mfcc.conf b/egs/snips/v1/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/snips/v1/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/snips/v1/conf/mfcc_hires.conf b/egs/snips/v1/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..d96b86ddfcb
--- /dev/null
+++ b/egs/snips/v1/conf/mfcc_hires.conf
@@ -0,0 +1,9 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=20    # low cutoff frequency for mel bins
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
diff --git a/egs/snips/v1/conf/online_cmvn.conf b/egs/snips/v1/conf/online_cmvn.conf
new file mode 100644
index 00000000000..a173510e433
--- /dev/null
+++ b/egs/snips/v1/conf/online_cmvn.conf
@@ -0,0 +1,3 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
+--norm-means=true
+--norm-vars=false
diff --git a/egs/snips/v1/local/add_prefix_to_scp.py b/egs/snips/v1/local/add_prefix_to_scp.py
new file mode 120000
index 00000000000..b6750c78e16
--- /dev/null
+++ b/egs/snips/v1/local/add_prefix_to_scp.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/add_prefix_to_scp.py
\ No newline at end of file
diff --git a/egs/snips/v1/local/chain/build_tree.sh b/egs/snips/v1/local/chain/build_tree.sh
new file mode 120000
index 00000000000..fb4d74cc9ae
--- /dev/null
+++ b/egs/snips/v1/local/chain/build_tree.sh
@@ -0,0 +1 @@
+../../../../mobvoi/v1/local/chain/build_tree.sh
\ No newline at end of file
diff --git a/egs/snips/v1/local/chain/run_e2e_tdnn.sh b/egs/snips/v1/local/chain/run_e2e_tdnn.sh
new file mode 120000
index 00000000000..891eec02423
--- /dev/null
+++ b/egs/snips/v1/local/chain/run_e2e_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/snips/v1/local/chain/run_tdnn.sh b/egs/snips/v1/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/snips/v1/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/snips/v1/local/chain/run_tdnn_e2eali.sh b/egs/snips/v1/local/chain/run_tdnn_e2eali.sh
new file mode 120000
index 00000000000..38f0bd07e6c
--- /dev/null
+++ b/egs/snips/v1/local/chain/run_tdnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_e2eali_1a.sh
\ No newline at end of file
diff --git a/egs/snips/v1/local/chain/tuning/run_e2e_tdnn_1a.sh b/egs/snips/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
new file mode 100755
index 00000000000..4085f923d2e
--- /dev/null
+++ b/egs/snips/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
@@ -0,0 +1,239 @@
+#!/bin/bash
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=3
+num_jobs_initial=2
+num_jobs_final=5
+minibatch_size=150=128,64/300=100,64,32/600=50,32,16/1200=16,8
+common_egs_dir=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_combined_spe2e
+test_sets="dev eval"
+wake_word="HeySnips"
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+lang_decode=data/lang_e2e_decode
+tree_dir=exp/chain/e2e_tree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_tdnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang ..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    local/gen_topo.pl 4 1 $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+  mkdir -p $tree_dir
+  id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+  id_word=`cat data/lang/phones.txt | grep "heysnips" | awk '{print $2}'`
+  id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+  cat <<EOF > $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+0 5 $id_sil $id_sil
+1 2 $id_word $id_word
+2 3 $id_sil $id_sil
+1 4 $id_freetext $id_freetext
+4 5 $id_sil $id_sil
+3 2.09
+5 0.0
+EOF
+  fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+  fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+  mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$train_cmd" \
+                                       data/${train_set}_hires $lang $tree_dir
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-2,-1,0,1,2) $affine_opts dim=$dim
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  linear-component name=prefinal-l dim=30 $linear_opts
+
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
+    --chain.frame-subsampling-factor=3 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.00003 \
+    --trainer.optimization.final-effective-lrate 0.000003 \
+    --trainer.optimization.backstitch-training-scale $bs_scale \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.optimization.momentum=0.0 \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--num-utts-subset 300 --online-cmvn $online_cmvn" \
+    --cleanup.remove-egs=$remove_egs \
+    --feat-dir data/${train_set}_hires \
+    --tree-dir $tree_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    --online-cmvn-config conf/online_cmvn.conf \
+    $lang ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+  for wake_word_cost in -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5; do
+    rm -rf $lang_decode
+    utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+      --position-dependent-phones false \
+      data/local/dict "<sil>" $lang_decode/temp $lang_decode
+
+    sil_id=`cat $lang_decode/words.txt | grep "<sil>" | awk '{print $2}'`
+    freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+    id=`cat $lang_decode/words.txt | grep $wake_word | awk '{print $2}'`
+    mkdir -p $lang_decode/lm
+    cat <<EOF > $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.0
+4 0 $sil_id $sil_id
+1 2 $id $id $wake_word_cost
+2 0 $sil_id $sil_id
+0
+EOF
+    fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+    set +e
+    fstisstochastic $lang_decode/G.fst
+    set -e
+    utils/validate_lang.pl $lang_decode
+    cp $lang/topo $lang_decode/topo
+
+    utils/lang/check_phones_compatible.sh \
+      data/lang/phones.txt $lang_decode/phones.txt
+    rm -rf $tree_dir/graph_online/HCLG.fst
+    utils/mkgraph.sh \
+      --self-loop-scale 1.0 $lang_decode \
+      $dir $tree_dir/graph_online || exit 1;
+
+    frames_per_chunk=150
+    for data in $test_sets; do
+      (
+        nj=30
+        steps/online/nnet3/decode_wake_word.sh \
+          --beam 200 --acwt 1.0 \
+          --wake-word $wake_word \
+          --extra-left-context-initial 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nj --cmd "$decode_cmd" \
+          $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_cost$wake_word_cost || exit 1
+      ) || touch $dir/.error &
+    done
+    wait
+    [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+  done
+  for data in $test_sets; do
+    echo "Results on $data set:"
+    cat ${dir}_online/decode_${data}_cost*/scoring_kaldi/all_results
+  done
+ 
+fi
+
diff --git a/egs/snips/v1/local/chain/tuning/run_tdnn_1a.sh b/egs/snips/v1/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..e38f2942f31
--- /dev/null
+++ b/egs/snips/v1/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,290 @@
+#!/bin/bash
+#
+# Copyright 2019-2020  Daniel Povey
+#           2019-2020  Yiming Wang
+# Apache 2.0
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=30
+gmm=mono
+train_stage=-5 # starting from -5 to skip phone-lm estimation
+get_egs_stage=-10
+affix=1a
+remove_egs=false
+online_cmvn=true
+xent_regularize=0.1
+
+# training options
+srand=0
+num_epochs=3
+num_jobs_initial=2
+num_jobs_final=5
+chunk_width=140,100,160
+common_egs_dir=
+reporting_email=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter
+combined_train_set=train_shorter_sp_combined
+test_sets="dev eval"
+aug_prefix="rev1 noise music babble"
+wake_word="HeySnips"
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+lat_dir=exp/chain/${gmm}_${train_set}_sp_lats
+combined_lat_dir=exp/chain/${gmm}_${combined_train_set}_lats
+combined_train_data_dir=data/${combined_train_set}_hires
+lores_train_data_dir=data/${train_set}_sp
+
+lang=data/lang_chain
+lang_decode=data/lang_chain_decode
+tree_dir=exp/chain/tree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/tdnn_${affix}
+
+for f in $combined_train_data_dir/feats.scp \
+  $lores_train_data_dir/feats.scp $gmm_dir/final.mdl $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang ..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    local/gen_topo.pl 4 1 $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom)
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 3 ]; then
+  local/copy_lat_dir.sh --nj 75 --cmd "$train_cmd" --utt-prefixes "$aug_prefix" \
+    $combined_train_data_dir $lat_dir $combined_lat_dir
+fi
+
+if [ $stage -le 4 ]; then
+  # Build a tree using our new topology.  We know we have alignments from
+  # steps/align_fmllr.sh, so use those.
+  # The num-leaves is always somewhat less than the num-leaves from the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  local/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --cmd "$train_cmd" ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+
+  echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+  id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+  id_word=`cat data/lang/phones.txt | grep "heysnips" | awk '{print $2}'`
+  id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+  cat <<EOF > $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+1 2 $id_word $id_word
+2 3 $id_sil $id_sil
+1 4 $id_freetext $id_freetext
+4 5 $id_sil $id_sil
+3 2.09
+5 0.0
+EOF
+  fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+  fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+  mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=$dim
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  linear-component name=prefinal-l dim=30 $linear_opts
+  
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 6 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  cp $tree_dir/phone_lm.fst $dir/phone_lm.fst
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.00005 \
+    --trainer.optimization.final-effective-lrate 0.000005 \
+    --trainer.optimization.backstitch-training-scale $bs_scale \
+    --trainer.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=0 \
+    --egs.chunk-right-context=0 \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --online-cmvn $online_cmvn" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir $combined_train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir=$combined_lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    --online-cmvn-config conf/online_cmvn.conf \
+    $lang ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for wake_word_cost in -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0; do
+    rm -rf $lang_decode
+    utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+      --position-dependent-phones false \
+      data/local/dict "<sil>" $lang_decode/temp $lang_decode
+
+    sil_id=`cat $lang_decode/words.txt | grep "<sil>" | awk '{print $2}'`
+    freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+    id=`cat $lang_decode/words.txt | grep $wake_word | awk '{print $2}'`
+    mkdir -p $lang_decode/lm
+    cat <<EOF > $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.0
+4 0 $sil_id $sil_id
+1 2 $id $id $wake_word_cost
+2 0 $sil_id $sil_id
+0
+EOF
+    fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+    set +e
+    fstisstochastic $lang_decode/G.fst
+    set -e
+    utils/validate_lang.pl $lang_decode
+    cp $lang/topo $lang_decode/topo
+
+    utils/lang/check_phones_compatible.sh \
+    data/lang/phones.txt $lang_decode/phones.txt
+    rm -rf $tree_dir/graph_online/HCLG.fst
+    utils/mkgraph.sh \
+      --self-loop-scale 1.0 $lang_decode \
+      $dir $tree_dir/graph_online || exit 1;
+
+    frames_per_chunk=150
+    for data in $test_sets; do
+      (
+        nj=30
+        steps/online/nnet3/decode_wake_word.sh \
+          --beam 200 --acwt 1.0 \
+          --wake-word $wake_word \
+          --extra-left-context-initial 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nj --cmd "$decode_cmd" \
+          $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_cost$wake_word_cost || exit 1
+      ) || touch $dir/.error &
+    done
+    wait
+    [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+  done
+  for data in $test_sets; do
+    echo "Results on $data set:"
+    cat ${dir}_online/decode_${data}_cost*/scoring_kaldi/all_results
+  done
+fi
diff --git a/egs/snips/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh b/egs/snips/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
new file mode 100755
index 00000000000..14f1802fa13
--- /dev/null
+++ b/egs/snips/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
@@ -0,0 +1,282 @@
+#!/bin/bash
+#
+# Copyright 2019-2020  Daniel Povey
+#           2019-2020  Yiming Wang
+# Apache 2.0
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=30
+e2echain_model_dir=exp/chain/e2e_tdnn_1a
+train_stage=-5 # starting from -5 to skip phone-lm estimation
+get_egs_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=3
+num_jobs_initial=2
+num_jobs_final=5
+chunk_width=140,100,160
+common_egs_dir=
+reporting_email=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_sp_combined
+test_sets="dev eval"
+wake_word="HeySnips"
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_${train_set}
+lat_dir=exp/chain/e2e_${train_set}_lats
+train_data_dir=data/${train_set}_hires
+
+lang=data/lang_chain
+lang_decode=data/lang_chain_decode
+tree_dir=exp/chain/tree_e2e  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/tdnn_e2eali_${affix}
+
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang ..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    local/gen_topo.pl 4 1 $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom)
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj 75 --cmd "$train_cmd" \
+                      --acoustic-scale 1.0 \
+                      --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                      $train_data_dir data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments from
+  # steps/align_fmllr.sh, so use those.
+  # The num-leaves is always somewhat less than the num-leaves from the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+  local/chain/build_tree.sh \
+    --frame-subsampling-factor 3 --cmd "$train_cmd" \
+    $train_data_dir $lang $ali_dir $tree_dir
+
+  echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+  id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+  id_word=`cat data/lang/phones.txt | grep "heysnips" | awk '{print $2}'`
+  id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+  cat <<EOF > $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+1 2 $id_word $id_word
+2 3 $id_sil $id_sil
+1 4 $id_freetext $id_freetext
+4 5 $id_sil $id_sil
+3 2.09
+5 0.0
+EOF
+  fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+  fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+  mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=$dim
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+  linear-component name=prefinal-l dim=30 $linear_opts
+  
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 6 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  cp $tree_dir/phone_lm.fst $dir/phone_lm.fst
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
+    --chain.alignment-subsampling-factor=1 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=$num_epochs \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.00005 \
+    --trainer.optimization.final-effective-lrate 0.000005 \
+    --trainer.optimization.backstitch-training-scale $bs_scale \
+    --trainer.num-chunk-per-minibatch=128,64 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=0 \
+    --egs.chunk-right-context=0 \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --online-cmvn $online_cmvn" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    --online-cmvn-config conf/online_cmvn.conf \
+    $lang ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for wake_word_cost in -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 4.0 5.0 6.0; do
+    rm -rf $lang_decode
+    utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+      --position-dependent-phones false \
+      data/local/dict "<sil>" $lang_decode/temp $lang_decode
+
+    sil_id=`cat $lang_decode/words.txt | grep "<sil>" | awk '{print $2}'`
+    freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+    id=`cat $lang_decode/words.txt | grep $wake_word | awk '{print $2}'`
+    mkdir -p $lang_decode/lm
+    cat <<EOF > $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.0
+4 0 $sil_id $sil_id
+1 2 $id $id $wake_word_cost
+2 0 $sil_id $sil_id
+0
+EOF
+    fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+    set +e
+    fstisstochastic $lang_decode/G.fst
+    set -e
+    utils/validate_lang.pl $lang_decode
+    cp $lang/topo $lang_decode/topo
+
+    utils/lang/check_phones_compatible.sh \
+      data/lang/phones.txt $lang_decode/phones.txt
+    rm -rf $tree_dir/graph_online/HCLG.fst
+    utils/mkgraph.sh \
+      --self-loop-scale 1.0 $lang_decode \
+      $dir $tree_dir/graph_online || exit 1;
+
+    frames_per_chunk=150
+    for data in $test_sets; do
+      (
+        nj=30
+        steps/online/nnet3/decode_wake_word.sh \
+          --beam 200 --acwt 1.0 \
+          --wake-word $wake_word \
+          --extra-left-context-initial 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nj --cmd "$decode_cmd" \
+          $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_cost$wake_word_cost || exit 1
+      ) || touch $dir/.error &
+    done
+    wait
+    [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+  done
+  for data in $test_sets; do
+    echo "Results on $data set:"
+    cat ${dir}_online/decode_${data}_cost*/scoring_kaldi/all_results
+  done
+fi
+
diff --git a/egs/snips/v1/local/compute_metrics.py b/egs/snips/v1/local/compute_metrics.py
new file mode 120000
index 00000000000..695a2ca5f6d
--- /dev/null
+++ b/egs/snips/v1/local/compute_metrics.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/compute_metrics.py
\ No newline at end of file
diff --git a/egs/snips/v1/local/copy_lat_dir.sh b/egs/snips/v1/local/copy_lat_dir.sh
new file mode 120000
index 00000000000..6be684730ad
--- /dev/null
+++ b/egs/snips/v1/local/copy_lat_dir.sh
@@ -0,0 +1 @@
+../../../../scripts/wakeword/copy_lat_dir.sh
\ No newline at end of file
diff --git a/egs/snips/v1/local/gen_topo.pl b/egs/snips/v1/local/gen_topo.pl
new file mode 120000
index 00000000000..fd5959cebaf
--- /dev/null
+++ b/egs/snips/v1/local/gen_topo.pl
@@ -0,0 +1 @@
+../../../../scripts/wakeword/gen_topo.pl
\ No newline at end of file
diff --git a/egs/snips/v1/local/get_random_subsegments.py b/egs/snips/v1/local/get_random_subsegments.py
new file mode 120000
index 00000000000..24631471ff6
--- /dev/null
+++ b/egs/snips/v1/local/get_random_subsegments.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/get_random_subsegments.py
\ No newline at end of file
diff --git a/egs/snips/v1/local/prepare_data.py b/egs/snips/v1/local/prepare_data.py
new file mode 100755
index 00000000000..0aea6c3eec3
--- /dev/null
+++ b/egs/snips/v1/local/prepare_data.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2020  Yiming Wang
+#           2018-2020  Daniel Povey
+# Apache 2.0
+
+""" This script prepares the SNIPS data into kaldi format.
+"""
+
+
+import argparse
+import os
+import sys
+import json
+
+def main():
+    parser = argparse.ArgumentParser(description="""Prepare data.""")
+    parser.add_argument('path', type=str,
+                        help='path to the json file')
+    parser.add_argument('out_dir', type=str,
+                        help='out dir')
+    parser.add_argument('--wake-word', type=str, default='HeySnips',
+                        help='wake word transcript')
+    parser.add_argument('--non-wake-word', type=str, default='FREETEXT',
+                        help='non-wake word transcript')
+    args = parser.parse_args()
+
+    with open(args.path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+        utt_id, spk_id, wav_file, label = [], [], [], []
+        for entry in data:
+            utt_id.append(entry['id'])
+            spk_id.append(entry['worker_id'])
+            wav_file.append(entry['audio_file_path'])
+            label.append(entry['is_hotword'])
+
+    dir_prefix = os.path.dirname(os.path.abspath(args.path))
+    with open(os.path.join(args.out_dir, 'wav.scp'), 'w', encoding='utf-8') as f_wav, \
+        open(os.path.join(args.out_dir, 'text'), 'w', encoding='utf-8') as f_text, \
+        open(os.path.join(args.out_dir, 'utt2spk'), 'w', encoding='utf-8') as f_utt2spk:
+        count = 0
+        for utt, spk, wav, l in zip(utt_id, spk_id, wav_file, label):
+            f_wav.write(spk + '-' + utt + ' ' + os.path.join(dir_prefix, wav) + '\n')
+            f_text.write(spk + '-' + utt + ' ' + (args.wake_word if l == 1 else args.non_wake_word) + '\n')
+            f_utt2spk.write(spk + '-' + utt + ' ' + spk + '\n')
+            if l == 1:
+                count += 1
+        print(str(count) + '/' + str(len(label)) +' of utterances are wake word.')
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/snips/v1/local/prepare_dict.sh b/egs/snips/v1/local/prepare_dict.sh
new file mode 100755
index 00000000000..41de6d2fbfe
--- /dev/null
+++ b/egs/snips/v1/local/prepare_dict.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+
+set -e
+dir=data/local/dict
+
+. ./utils/parse_options.sh
+
+mkdir -p $dir
+
+# First get the set of all letters that occur in data/train/text
+echo "heysnips" > $dir/nonsilence_phones.txt
+echo "freetext" >> $dir/nonsilence_phones.txt
+
+echo "HeySnips heysnips" > $dir/lexicon.txt
+echo "FREETEXT freetext" >> $dir/lexicon.txt
+echo "<sil> SIL" >> $dir/lexicon.txt
+
+echo SIL > $dir/silence_phones.txt
+
+echo SIL >$dir/optional_silence.txt
+
+echo -n "" >$dir/extra_questions.txt
diff --git a/egs/snips/v1/local/score_online.sh b/egs/snips/v1/local/score_online.sh
new file mode 120000
index 00000000000..c2b12f23b08
--- /dev/null
+++ b/egs/snips/v1/local/score_online.sh
@@ -0,0 +1 @@
+../../../mobvoi/v1/local/score_online.sh
\ No newline at end of file
diff --git a/egs/snips/v1/local/snips_data_download.sh b/egs/snips/v1/local/snips_data_download.sh
new file mode 100755
index 00000000000..e469df34040
--- /dev/null
+++ b/egs/snips/v1/local/snips_data_download.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# Copyright  2018-2020  Yiming Wang
+#            2018-2020  Daniel Povey
+# Apache 2.0
+
+# This script loads the SNIPS dataset.
+[ -f ./path.sh ] && . ./path.sh
+
+dl_dir=data/download
+
+mkdir -p $dl_dir
+
+src_dir=/export/fs04/a07/ywang/snips-wake-word-corpus
+
+dataset=hey_snips_kws_4.0.tar.gz
+if [ -d $dl_dir/hey_snips_research_6k_en_train_eval_clean_ter ]; then
+  echo "Not extracting $dl_dir/hey_snips_research_6k_en_train_eval_clean_ter) as it is already there."
+else
+  tar -xvzf $src_dir/$dataset -C $dl_dir || exit 1;
+  echo "Done extracting $dataset."
+fi
+
diff --git a/egs/snips/v1/local/wer_output_filter b/egs/snips/v1/local/wer_output_filter
new file mode 100755
index 00000000000..bb4de1d1572
--- /dev/null
+++ b/egs/snips/v1/local/wer_output_filter
@@ -0,0 +1,24 @@
+#!/usr/bin/env perl
+# Copyright 2012-2014  Johns Hopkins University (Author: Yenda Trmal)
+# Apache 2.0
+use utf8;
+
+use open qw(:encoding(utf8));
+binmode STDIN, ":utf8";
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+
+while (<>) {
+  @F = split " ";
+  print $F[0] . " "; 
+  foreach $s (@F[1..$#F]) {
+    if ($s =~ /\<.*\>/) {
+      print "";
+    } else {
+      print "$s "
+    }
+  }
+  print "\n";
+}
+
+
diff --git a/egs/snips/v1/path.sh b/egs/snips/v1/path.sh
new file mode 100755
index 00000000000..2d17b17a84a
--- /dev/null
+++ b/egs/snips/v1/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/snips/v1/run.sh b/egs/snips/v1/run.sh
new file mode 100755
index 00000000000..1a603ab0cea
--- /dev/null
+++ b/egs/snips/v1/run.sh
@@ -0,0 +1,232 @@
+#!/bin/bash
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+# Please visit https://github.com/snipsco/keyword-spotting-research-datasets for downloading the dataset.
+
+stage=0
+
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -euo pipefail
+
+if [ $stage -le 0 ]; then
+  local/snips_data_download.sh
+  echo "$0: Extracted all datasets into data/download/"
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: Preparing datasets..."
+  for folder in train dev eval; do
+    mkdir -p data/$folder
+    json_path=data/download/hey_snips_research_6k_en_train_eval_clean_ter/$folder.json
+    if [ $folder = "eval" ]; then
+      json_path=data/download/hey_snips_research_6k_en_train_eval_clean_ter/test.json
+    fi
+    local/prepare_data.py $json_path data/$folder --wake-word "HeySnips" --non-wake-word "FREETEXT"
+  done
+  echo "$0: text, utt2spk and wav.scp have been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Extracting MFCC..."
+  for folder in train dev eval; do
+    dir=data/$folder
+    utils/fix_data_dir.sh $dir
+    steps/make_mfcc.sh --cmd "$train_cmd" --nj 16 $dir
+    steps/compute_cmvn_stats.sh $dir
+    utils/fix_data_dir.sh $dir
+    utils/data/get_utt2dur.sh $dir
+    utils/validate_data_dir.sh $dir
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  local/prepare_dict.sh
+  utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.5 \
+    --position-dependent-phones false \
+    data/local/dict "<sil>" data/lang/temp data/lang
+fi
+
+if [ $stage -le 4 ]; then
+  id_sil=`cat data/lang/words.txt | grep "<sil>" | awk '{print $2}'`
+  id_freetext=`cat data/lang/words.txt | grep "FREETEXT" | awk '{print $2}'`
+  id_word=`cat data/lang/words.txt | grep "HeySnips" | awk '{print $2}'`
+  mkdir -p data/lang/lm
+  cat <<EOF > data/lang/lm/fst.txt
+0 1 $id_sil $id_sil
+0 4 $id_sil $id_sil 7.0
+1 4 $id_freetext $id_freetext 0.0
+4 0 $id_sil $id_sil
+1 2 $id_word $id_word 2.09
+2 0 $id_sil $id_sil
+0
+EOF
+  fstcompile data/lang/lm/fst.txt data/lang/G.fst
+  set +e
+  fstisstochastic data/lang/G.fst
+  set -e
+  utils/validate_lang.pl data/lang
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: subsegmenting for the training data..."
+  srcdir=data/train
+  utils/data/convert_data_dir_to_whole.sh $srcdir ${srcdir}_whole
+
+  utils/data/get_segments_for_data.sh $srcdir > ${srcdir}_whole/segments
+  utils/filter_scp.pl <(awk '{if ($2 == "FREETEXT") print $1}' ${srcdir}_whole/text) \
+    ${srcdir}_whole/segments >${srcdir}_whole/neg_segments
+  utils/filter_scp.pl --exclude ${srcdir}_whole/neg_segments ${srcdir}_whole/segments \
+    >${srcdir}_whole/pos_segments
+  utils/filter_scp.pl ${srcdir}_whole/pos_segments ${srcdir}_whole/utt2dur >${srcdir}_whole/pos_utt2dur
+  local/get_random_subsegments.py --overlap-duration=0.3 --max-remaining-duration=0.3 \
+    ${srcdir}_whole/neg_segments ${srcdir}_whole/pos_utt2dur | \
+    cat ${srcdir}_whole/pos_segments - | sort >${srcdir}_whole/sub_segments
+  utils/data/subsegment_data_dir.sh ${srcdir}_whole \
+    ${srcdir}_whole/sub_segments data/train_segmented
+  awk '{print $1,$2}' ${srcdir}_whole/sub_segments | \
+    utils/apply_map.pl -f 2 ${srcdir}_whole/text >data/train_segmented/text
+  utils/data/extract_wav_segments_data_dir.sh --nj 50 --cmd "$train_cmd" \
+    data/train_segmented data/train_shorter
+  steps/compute_cmvn_stats.sh data/train_shorter
+  utils/fix_data_dir.sh data/train_shorter
+  utils/validate_data_dir.sh data/train_shorter
+fi
+
+# In this section, we augment the training data with reverberation,
+# noise, music, and babble, and combined it with the clean data.
+if [ $stage -le 6 ]; then
+  utils/data/get_utt2dur.sh data/train_shorter
+  cp data/train_shorter/utt2dur data/train_shorter/reco2dur
+  # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+  [ ! -f rirs_noises.zip ] && wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+  [ ! -d "RIRS_NOISES" ] && unzip rirs_noises.zip
+
+  # Make a version with reverberated speech
+  rvb_opts=()
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+  # Make a reverberated version of the SWBD+SRE list.  Note that we don't add any
+  # additive noise here.
+  python3 steps/data/reverberate_data_dir.py \
+    "${rvb_opts[@]}" \
+    --speech-rvb-probability 1 \
+    --prefix "rev" \
+    --pointsource-noise-addition-probability 0 \
+    --isotropic-noise-addition-probability 0 \
+    --num-replications 1 \
+    --source-sampling-rate 16000 \
+    data/train_shorter data/train_shorter_reverb
+  cat data/train_shorter/utt2dur | awk -v name=rev1 '{print name"-"$0}' >data/train_shorter_reverb/utt2dur
+
+  # Prepare the MUSAN corpus, which consists of music, speech, and noise
+  # suitable for augmentation.
+  steps/data/make_musan.sh /export/corpora/JHU/musan data
+
+  # Get the duration of the MUSAN recordings.  This will be used by the
+  # script augment_data_dir.py.
+  for name in speech noise music; do
+    utils/data/get_utt2dur.sh data/musan_${name}
+    cp data/musan_${name}/utt2dur data/musan_${name}/reco2dur
+  done
+
+  # Augment with musan_noise
+  steps/data/augment_data_dir.py --utt-prefix "noise" --modify-spk-id true --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_shorter data/train_shorter_noise
+  # Augment with musan_music
+  steps/data/augment_data_dir.py --utt-prefix "music" --modify-spk-id true --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_shorter data/train_shorter_music
+  # Augment with musan_speech
+  steps/data/augment_data_dir.py --utt-prefix "babble" --modify-spk-id true --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_shorter data/train_shorter_babble
+fi
+
+if [ $stage -le 7 ]; then
+  # Now make MFCC features
+  for name in reverb noise music babble; do
+    steps/make_mfcc.sh --nj 16 --cmd "$train_cmd" \
+      data/train_shorter_${name} || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_${name}
+    utils/fix_data_dir.sh data/train_shorter_${name}
+    utils/validate_data_dir.sh data/train_shorter_${name}
+  done
+fi
+
+# monophone training
+if [ $stage -le 8 ]; then
+  steps/train_mono.sh --nj 50 --cmd "$train_cmd" \
+    data/train_shorter data/lang exp/mono
+  (
+    utils/mkgraph.sh data/lang \
+      exp/mono exp/mono/graph
+  )&
+
+  steps/align_si.sh --nj 20 --cmd "$train_cmd" \
+    data/train_shorter data/lang exp/mono exp/mono_ali_train_shorter
+fi
+
+if [ $stage -le 9 ]; then
+  echo "$0: preparing for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh data/train_shorter data/train_shorter_sp
+  steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 data/train_shorter_sp || exit 1;
+  steps/compute_cmvn_stats.sh data/train_shorter_sp || exit 1;
+  utils/fix_data_dir.sh data/train_shorter_sp
+fi
+
+if [ $stage -le 10 ]; then
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj 50 --cmd "$train_cmd" \
+    data/train_shorter_sp data/lang exp/mono exp/mono_ali_train_shorter_sp || exit 1
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating high-resolution MFCC features" 
+  mfccdir=data/train_shorter_sp_hires/data
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in train_shorter_sp dev eval; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/train_shorter_sp_hires || exit 1;
+
+  for datadir in train_shorter_sp dev eval; do
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+    utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+  done
+fi
+
+combined_train_set=train_shorter_sp_combined
+aug_affix="reverb noise music babble"
+if [ $stage -le 12 ]; then
+  for name in $aug_affix; do
+    echo "$0: creating high-resolution MFCC features for train_shorter_${name}"
+    mfccdir=data/train_shorter_${name}_hires/data
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+      utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+    fi
+    utils/copy_data_dir.sh data/train_shorter_${name} data/train_shorter_${name}_hires
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/train_shorter_${name}_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_${name}_hires || exit 1;
+    utils/fix_data_dir.sh data/train_shorter_${name}_hires || exit 1;
+  done
+  eval utils/combine_data.sh data/${combined_train_set}_hires data/train_shorter_sp_hires \
+    data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}_hires
+fi
+
+if [ $stage -le 13 ]; then
+  local/chain/run_tdnn.sh --train-set train_shorter --combined-train-set ${combined_train_set}
+fi
+
+exit 0
+
diff --git a/egs/snips/v1/run_e2e.sh b/egs/snips/v1/run_e2e.sh
new file mode 100755
index 00000000000..4b168ecb9e3
--- /dev/null
+++ b/egs/snips/v1/run_e2e.sh
@@ -0,0 +1,288 @@
+#!/bin/bash
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+# Please visit https://github.com/snipsco/keyword-spotting-research-datasets for downloading the dataset.
+
+# This recipe uses E2E LF-MMI training which doesn't require GMM training to obtain alignments.
+# Its performance is slightly better than those based on alignments (cross-entropy or regular LF-MMI)
+# on this dataset.
+
+stage=0
+
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -euo pipefail
+
+if [ $stage -le 0 ]; then
+  local/snips_data_download.sh
+  echo "$0: Extracted all datasets into data/download/"
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: Preparing datasets..."
+  for folder in train dev eval; do
+    mkdir -p data/$folder
+    json_path=data/download/hey_snips_research_6k_en_train_eval_clean_ter/$folder.json
+    if [ $folder = "eval" ]; then
+      json_path=data/download/hey_snips_research_6k_en_train_eval_clean_ter/test.json
+    fi
+    local/prepare_data.py $json_path data/$folder --wake-word "HeySnips" --non-wake-word "FREETEXT"
+  done
+  echo "$0: text, utt2spk and wav.scp have been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Extracting MFCC..."
+  for folder in train dev eval; do
+    dir=data/$folder
+    utils/fix_data_dir.sh $dir
+    steps/make_mfcc.sh --cmd "$train_cmd" --nj 16 $dir
+    steps/compute_cmvn_stats.sh $dir
+    utils/fix_data_dir.sh $dir
+    utils/data/get_utt2dur.sh $dir
+    utils/validate_data_dir.sh $dir
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  local/prepare_dict.sh
+  utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.5 \
+    --position-dependent-phones false \
+    data/local/dict "<sil>" data/lang/temp data/lang
+fi
+
+if [ $stage -le 4 ]; then
+  id_sil=`cat data/lang/words.txt | grep "<sil>" | awk '{print $2}'`
+  id_freetext=`cat data/lang/words.txt | grep "FREETEXT" | awk '{print $2}'`
+  id_word=`cat data/lang/words.txt | grep "HeySnips" | awk '{print $2}'`
+  mkdir -p data/lang/lm
+  cat <<EOF > data/lang/lm/fst.txt
+0 1 $id_sil $id_sil
+0 4 $id_sil $id_sil 7.0
+1 4 $id_freetext $id_freetext 0.0
+4 0 $id_sil $id_sil
+1 2 $id_word $id_word 2.09
+2 0 $id_sil $id_sil
+0
+EOF
+  fstcompile data/lang/lm/fst.txt data/lang/G.fst
+  set +e
+  fstisstochastic data/lang/G.fst
+  set -e
+  utils/validate_lang.pl data/lang
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: subsegmenting for the training data..."
+  srcdir=data/train
+  utils/data/convert_data_dir_to_whole.sh $srcdir ${srcdir}_whole
+
+  utils/data/get_segments_for_data.sh $srcdir > ${srcdir}_whole/segments
+  utils/filter_scp.pl <(awk '{if ($2 == "FREETEXT") print $1}' ${srcdir}_whole/text) \
+    ${srcdir}_whole/segments >${srcdir}_whole/neg_segments
+  utils/filter_scp.pl --exclude ${srcdir}_whole/neg_segments ${srcdir}_whole/segments \
+    >${srcdir}_whole/pos_segments
+  utils/filter_scp.pl ${srcdir}_whole/pos_segments ${srcdir}_whole/utt2dur >${srcdir}_whole/pos_utt2dur
+  local/get_random_subsegments.py --overlap-duration=0.3 --max-remaining-duration=0.3 \
+    ${srcdir}_whole/neg_segments ${srcdir}_whole/pos_utt2dur | \
+    cat ${srcdir}_whole/pos_segments - | sort >${srcdir}_whole/sub_segments
+  utils/data/subsegment_data_dir.sh ${srcdir}_whole \
+    ${srcdir}_whole/sub_segments data/train_segmented
+  awk '{print $1,$2}' ${srcdir}_whole/sub_segments | \
+    utils/apply_map.pl -f 2 ${srcdir}_whole/text >data/train_segmented/text
+  utils/data/extract_wav_segments_data_dir.sh --nj 50 --cmd "$train_cmd" \
+    data/train_segmented data/train_shorter
+  steps/compute_cmvn_stats.sh data/train_shorter
+  utils/fix_data_dir.sh data/train_shorter
+  utils/validate_data_dir.sh data/train_shorter
+fi
+
+# In this section, we augment the training data with reverberation,
+# noise, music, and babble, and combined it with the clean data.
+if [ $stage -le 6 ]; then
+  utils/data/get_utt2dur.sh data/train_shorter
+  cp data/train_shorter/utt2dur data/train_shorter/reco2dur
+  # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+  [ ! -f rirs_noises.zip ] && wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+  [ ! -d "RIRS_NOISES" ] && unzip rirs_noises.zip
+
+  # Make a version with reverberated speech
+  rvb_opts=()
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+  rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+  # Make a reverberated version of the SWBD+SRE list.  Note that we don't add any
+  # additive noise here.
+  python3 steps/data/reverberate_data_dir.py \
+    "${rvb_opts[@]}" \
+    --speech-rvb-probability 1 \
+    --prefix "rev" \
+    --pointsource-noise-addition-probability 0 \
+    --isotropic-noise-addition-probability 0 \
+    --num-replications 1 \
+    --source-sampling-rate 16000 \
+    data/train_shorter data/train_shorter_reverb
+  cat data/train_shorter/utt2dur | awk -v name=rev1 '{print name"-"$0}' >data/train_shorter_reverb/utt2dur
+
+  # Prepare the MUSAN corpus, which consists of music, speech, and noise
+  # suitable for augmentation.
+  steps/data/make_musan.sh /export/corpora/JHU/musan data
+
+  # Get the duration of the MUSAN recordings.  This will be used by the
+  # script augment_data_dir.py.
+  for name in speech noise music; do
+    utils/data/get_utt2dur.sh data/musan_${name}
+    cp data/musan_${name}/utt2dur data/musan_${name}/reco2dur
+  done
+
+  # Augment with musan_noise
+  steps/data/augment_data_dir.py --utt-prefix "noise" --modify-spk-id true --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_shorter data/train_shorter_noise
+  # Augment with musan_music
+  steps/data/augment_data_dir.py --utt-prefix "music" --modify-spk-id true --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_shorter data/train_shorter_music
+  # Augment with musan_speech
+  steps/data/augment_data_dir.py --utt-prefix "babble" --modify-spk-id true --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_shorter data/train_shorter_babble
+fi
+
+if [ $stage -le 7 ]; then
+  # Now make MFCC features
+  for name in reverb noise music babble; do
+    steps/make_mfcc.sh --nj 16 --cmd "$train_cmd" \
+      data/train_shorter_${name} || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_${name}
+    utils/fix_data_dir.sh data/train_shorter_${name}
+    utils/validate_data_dir.sh data/train_shorter_${name}
+  done
+fi
+
+combined_train_set=train_shorter_combined
+aug_affix="reverb noise music babble"
+if [ $stage -le 8 ]; then
+  eval utils/combine_data.sh data/${combined_train_set} data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}
+fi
+
+if [ -f data/${combined_train_set}_spe2e_hires/feats.scp ]; then
+  echo "$0: It seems that features for the perturbed training data already exist."
+  echo "If you want to extract them anyway, remove them first and run this"
+  echo "stage again. Skipping this stage..."
+else
+  if [ $stage -le 9 ]; then
+    echo "$0: perturbing the training data to allowed lengths..."
+    utils/data/get_utt2dur.sh data/${combined_train_set}  # necessary for the next command
+
+    # 12 in the following command means the allowed lengths are spaced
+    # by 12% change in length.
+    utils/data/perturb_speed_to_allowed_lengths.py --speed-perturb false 12 data/${combined_train_set} \
+                                                   data/${combined_train_set}_e2e_hires
+    cat data/${combined_train_set}_e2e_hires/utt2dur | \
+      awk '{print $1 " " substr($1,5)}' >data/${combined_train_set}_e2e_hires/utt2uniq.tmp
+    utils/apply_map.pl -f 2 data/${combined_train_set}/utt2uniq \
+      <data/${combined_train_set}_e2e_hires/utt2uniq.tmp >data/${combined_train_set}_e2e_hires/utt2uniq
+    rm -f data/${combined_train_set}_e2e_hires/utt2uniq.tmp 2>/dev/null || true
+    utils/fix_data_dir.sh data/${combined_train_set}_e2e_hires
+
+    utils/data/get_utt2dur.sh data/train_shorter  # necessary for the next command
+    utils/data/perturb_speed_to_allowed_lengths.py 12 data/train_shorter data/train_shorter_spe2e_hires
+    cat data/train_shorter_spe2e_hires/utt2dur | \
+      awk '{print $1 " " substr($1,5)}' >data/train_shorter_spe2e_hires/utt2uniq
+    utils/fix_data_dir.sh data/train_shorter_spe2e_hires
+    utils/combine_data.sh data/${combined_train_set}_spe2e_hires data/${combined_train_set}_e2e_hires data/train_shorter_spe2e_hires
+    cat data/train_shorter_spe2e_hires/allowed_lengths.txt >data/${combined_train_set}_spe2e_hires/allowed_lengths.txt
+  fi
+
+  if [ $stage -le 10 ]; then
+    echo "$0: extracting MFCC features for the training data..."
+    mfccdir=data/${combined_train_set}_spe2e_hires/data
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+      utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+    fi
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+                       --cmd "$train_cmd" \
+                       data/${combined_train_set}_spe2e_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/${combined_train_set}_spe2e_hires || exit 1;
+    utils/fix_data_dir.sh data/${combined_train_set}_spe2e_hires
+    utils/validate_data_dir.sh data/${combined_train_set}_spe2e_hires
+  fi
+fi
+
+if [ $stage -le 11 ]; then
+  if [ -f data/eval_hires/feats.scp ]; then
+    echo "$0: It seems that features for the test sets already exist."
+    echo "skipping this stage..."
+  else
+    echo "$0: extracting MFCC features for the test sets"
+    for datadir in dev eval; do
+      utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+      steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 --mfcc-config conf/mfcc_hires.conf \
+        --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+      steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+      utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+    done
+  fi
+fi
+
+if [ $stage -le 12 ]; then
+  local/chain/run_e2e_tdnn.sh --train-set ${combined_train_set}_spe2e
+fi
+
+combined_train_set=train_shorter_sp_combined
+if [ -f data/${combined_train_set}_hires/feats.scp ]; then
+  echo "$0: It seems that features for the perturbed training data already exist."
+  echo "If you want to extract them anyway, remove them first and run this"
+  echo "stage again. Skipping this stage..."
+else
+  if [ $stage -le 13 ]; then
+    echo "$0: preparing for speed-perturbed data"
+    utils/data/perturb_data_dir_speed_3way.sh data/train_shorter data/train_shorter_sp_hires
+    echo "$0: creating high-resolution MFCC features for speed-perturbed data"
+    mfccdir=data/train_shorter_sp_hires/data
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+      utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+    fi
+
+    # do volume-perturbation on the training data prior to extracting hires
+    # features; this helps make trained nnets more invariant to test data volume.
+    utils/data/perturb_data_dir_volume.sh data/train_shorter_sp_hires || exit 1;
+
+    steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/train_shorter_sp_hires || exit 1;
+    steps/compute_cmvn_stats.sh data/train_shorter_sp_hires || exit 1;
+    utils/fix_data_dir.sh data/train_shorter_sp_hires || exit 1;
+  fi
+
+  if [ $stage -le 14 ]; then
+    for name in $aug_affix; do
+      echo "$0: creating high-resolution MFCC features for train_shorter_${name}"
+      mfccdir=data/train_shorter_${name}_hires/data
+      if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+        utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+      fi
+      utils/copy_data_dir.sh data/train_shorter_${name} data/train_shorter_${name}_hires
+      steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+        --cmd "$train_cmd" data/train_shorter_${name}_hires || exit 1;
+      steps/compute_cmvn_stats.sh data/train_shorter_${name}_hires || exit 1;
+      utils/fix_data_dir.sh data/train_shorter_${name}_hires || exit 1;
+    done
+    eval utils/combine_data.sh data/${combined_train_set}_hires data/train_shorter_sp_hires \
+      data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}_hires
+  fi
+fi
+
+if [ $stage -le 15 ]; then
+  echo "$0: Aligning the training data using the e2e chain model..."
+  steps/nnet3/align.sh --nj 50 --cmd "$train_cmd" \
+                       --use-gpu false \
+                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
+                       data/${combined_train_set}_hires data/lang exp/chain/e2e_tdnn_1a exp/chain/e2e_ali_${combined_train_set}
+fi
+
+if [ $stage -le 16 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
+  local/chain/run_tdnn_e2eali.sh --train-set ${combined_train_set} --e2echain-model-dir exp/chain/e2e_tdnn_1a
+fi
+
+exit 0
diff --git a/egs/snips/v1/steps b/egs/snips/v1/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/snips/v1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/snips/v1/utils b/egs/snips/v1/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/snips/v1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/spanish_dimex100/s5/local/data_prep.sh b/egs/spanish_dimex100/s5/local/data_prep.sh
index 50cb3de4f9c..eadbd39039a 100755
--- a/egs/spanish_dimex100/s5/local/data_prep.sh
+++ b/egs/spanish_dimex100/s5/local/data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ## Only run this file from the example root directory
 ##      $ ./local/data_prep.sh
diff --git a/egs/spanish_dimex100/s5/local/lang_prep.sh b/egs/spanish_dimex100/s5/local/lang_prep.sh
index 1ba49bac6d6..7239ed1dce5 100755
--- a/egs/spanish_dimex100/s5/local/lang_prep.sh
+++ b/egs/spanish_dimex100/s5/local/lang_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ## Only run this file from the example root directory
 ##      $ ./local/data_prep.sh
diff --git a/egs/spanish_dimex100/s5/local/lm_prep.sh b/egs/spanish_dimex100/s5/local/lm_prep.sh
index 82c3c22cddd..ca7d0fd76bf 100755
--- a/egs/spanish_dimex100/s5/local/lm_prep.sh
+++ b/egs/spanish_dimex100/s5/local/lm_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 ## Install SRILM in the `tools` directory (install_srilm.sh)
 
diff --git a/egs/spanish_dimex100/s5/local/score.sh b/egs/spanish_dimex100/s5/local/score.sh
index 0be7d192282..096db28a832 100755
--- a/egs/spanish_dimex100/s5/local/score.sh
+++ b/egs/spanish_dimex100/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/spanish_dimex100/s5/run.sh b/egs/spanish_dimex100/s5/run.sh
index 30f1ad0397f..4ed5b21fcad 100755
--- a/egs/spanish_dimex100/s5/run.sh
+++ b/egs/spanish_dimex100/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./path.sh || exit 1
 . ./cmd.sh || exit 1
diff --git a/egs/sprakbanken/s5/local/chain/compare_wer_general.sh b/egs/sprakbanken/s5/local/chain/compare_wer_general.sh
index 4074b0c12c3..c2c0e7a1592 100755
--- a/egs/sprakbanken/s5/local/chain/compare_wer_general.sh
+++ b/egs/sprakbanken/s5/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Prints a table makes it easy to compare WER and objective values across nnet3
 # and chain training runs
diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh
index 47557f93696..54ac70acfe3 100755
--- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh
+++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_lstm_1a.sh is a first attempt at an LSTM system, based on xconfigs-- it's
 # probably not very well configured, e.g. the num-params might be too small.
diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh
index 7afa1b7f902..da42338f63b 100755
--- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh
+++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_lstm_1b.sh is as run_lstm_1a.sh but replacing the projected LSTM
 # with a regular LSTM.  This is done in order to have an LSTM-only baseline
diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh
index e69e499e152..327a045ca1f 100755
--- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh
+++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_lstm_1c.sh is like run_lstm_1b.sh but changing from the old LSTM
diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh
index 86e0352828c..0678bbc23ac 100755
--- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh
+++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_lstm_1d.sh is like run_lstm_1c.sh, but switching back to projected
diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh
index 313f899a471..f88c6cd0ed8 100755
--- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh
+++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # (From the original script:
 # run_lstm_1e.sh is like run_lstm_1d.sh, but reducing non-recurrent-projection-dim
diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh
index 4991326a86d..827b658862f 100755
--- a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is the original TDNN script before we introduced xconfigs.
 # See run_tdnn_1b.sh for comparative results.
diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh
index 600f27ddf86..b7f399b61a3 100755
--- a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # steps/info/chain_dir_info.pl exp/chain/tdnn_sp_bi/
 # exp/chain/tdnn_sp_bi/: num-iters=384 nj=2..12 num-params=7.0M dim=40+100->3557 combine=-0.08->-0.08 xent:train/valid[255,383,final]=(-0.954,-0.911,-0.911/-0.979,-0.953,-0.952) logprob:train/valid[255,383,final]=(-0.071,-0.064,-0.064/-0.084,-0.079,-0.079)
diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index cedc448464a..4c86012a3bb 100755
--- a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # steps/info/chain_dir_info.pl exp/chain/tdnn_lstm1a_sp_bi/
 # exp/chain/tdnn_lstm1a_sp_bi/: num-iters=384 nj=2..12 num-params=9.5M dim=40+100->3557 combine=-0.05->-0.05 xent:train/valid[255,383,final]=(-0.579,-0.518,-0.523/-0.651,-0.616,-0.619) logprob:train/valid[255,383,final]=(-0.046,-0.038,-0.038/-0.063,-0.060,-0.059)
diff --git a/egs/sprakbanken/s5/local/copy_dict.sh b/egs/sprakbanken/s5/local/copy_dict.sh
index 5ae5e9697b1..c16d63f64f6 100755
--- a/egs/sprakbanken/s5/local/copy_dict.sh
+++ b/egs/sprakbanken/s5/local/copy_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2014-15 Mirsk Digital ApS  (Author: Andreas Kirkedal)
diff --git a/egs/sprakbanken/s5/local/create_datasets.sh b/egs/sprakbanken/s5/local/create_datasets.sh
index 891771dbce1..532d8b6d686 100755
--- a/egs/sprakbanken/s5/local/create_datasets.sh
+++ b/egs/sprakbanken/s5/local/create_datasets.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Mirsk Digital ApS  (Author: Andreas Kirkedal)
 
diff --git a/egs/sprakbanken/s5/local/dict_prep.sh b/egs/sprakbanken/s5/local/dict_prep.sh
index 1e37460dbe5..b14c632d1df 100755
--- a/egs/sprakbanken/s5/local/dict_prep.sh
+++ b/egs/sprakbanken/s5/local/dict_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2014 Mirsk Digital ApS  (Author: Andreas Kirkedal)
diff --git a/egs/sprakbanken/s5/local/nnet/run_dnn.sh b/egs/sprakbanken/s5/local/nnet/run_dnn.sh
index 680a6ca31f0..5deb2c805bc 100755
--- a/egs/sprakbanken/s5/local/nnet/run_dnn.sh
+++ b/egs/sprakbanken/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/sprakbanken/s5/local/nnet2/run_5b.sh b/egs/sprakbanken/s5/local/nnet2/run_5b.sh
index 676f340ecd1..6666eee769c 100755
--- a/egs/sprakbanken/s5/local/nnet2/run_5b.sh
+++ b/egs/sprakbanken/s5/local/nnet2/run_5b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 stage=0
diff --git a/egs/sprakbanken/s5/local/nnet2/run_5b_gpu.sh b/egs/sprakbanken/s5/local/nnet2/run_5b_gpu.sh
index 1335eee70ec..4c86853bb71 100755
--- a/egs/sprakbanken/s5/local/nnet2/run_5b_gpu.sh
+++ b/egs/sprakbanken/s5/local/nnet2/run_5b_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 stage=0
diff --git a/egs/sprakbanken/s5/local/nnet2/run_5c.sh b/egs/sprakbanken/s5/local/nnet2/run_5c.sh
index 27f0db36f67..10b658b6da6 100755
--- a/egs/sprakbanken/s5/local/nnet2/run_5c.sh
+++ b/egs/sprakbanken/s5/local/nnet2/run_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is neural net training on top of adapted 40-dimensional features.
 # 
diff --git a/egs/sprakbanken/s5/local/nnet2/run_5c2_gpu.sh b/egs/sprakbanken/s5/local/nnet2/run_5c2_gpu.sh
index 00bd16bf00f..9002f1bce55 100755
--- a/egs/sprakbanken/s5/local/nnet2/run_5c2_gpu.sh
+++ b/egs/sprakbanken/s5/local/nnet2/run_5c2_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is neural net training on top of adapted 40-dimensional features.
 # This is an alternative to the run_5c_gpu.sh that will train faster if you
diff --git a/egs/sprakbanken/s5/local/nnet2/run_5c_gpu.sh b/egs/sprakbanken/s5/local/nnet2/run_5c_gpu.sh
index 2bf13a0a399..31e032f0bd6 100755
--- a/egs/sprakbanken/s5/local/nnet2/run_5c_gpu.sh
+++ b/egs/sprakbanken/s5/local/nnet2/run_5c_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is neural net training on top of adapted 40-dimensional features.
 #
diff --git a/egs/sprakbanken/s5/local/nnet2/run_5d_gpu.sh b/egs/sprakbanken/s5/local/nnet2/run_5d_gpu.sh
index 1b87fec6419..31a250ff14f 100755
--- a/egs/sprakbanken/s5/local/nnet2/run_5d_gpu.sh
+++ b/egs/sprakbanken/s5/local/nnet2/run_5d_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is pnorm neural net training on top of adapted 40-dimensional features.
 
diff --git a/egs/sprakbanken/s5/local/nnet2/run_6c_gpu.sh b/egs/sprakbanken/s5/local/nnet2/run_6c_gpu.sh
index 777d8a99754..7f0c886eb46 100755
--- a/egs/sprakbanken/s5/local/nnet2/run_6c_gpu.sh
+++ b/egs/sprakbanken/s5/local/nnet2/run_6c_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script demonstrates discriminative training of neural nets.  It's on top
diff --git a/egs/sprakbanken/s5/local/nnet2/sprak_run_5c.sh b/egs/sprakbanken/s5/local/nnet2/sprak_run_5c.sh
index 4ce59dbf86d..cac9a337f47 100755
--- a/egs/sprakbanken/s5/local/nnet2/sprak_run_5c.sh
+++ b/egs/sprakbanken/s5/local/nnet2/sprak_run_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is neural net training on top of adapted 40-dimensional features.
 #
diff --git a/egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh b/egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh
index 91e90710af2..2608e3a7371 100755
--- a/egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/sprakbanken/s5/local/nnet3/run_lstm.sh b/egs/sprakbanken/s5/local/nnet3/run_lstm.sh
index 17619e6ea6f..e0f14b5c539 100755
--- a/egs/sprakbanken/s5/local/nnet3/run_lstm.sh
+++ b/egs/sprakbanken/s5/local/nnet3/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #    This is the standard "lstm" system, built in nnet3; this script
 # is the version that's meant to run with data-cleanup, that doesn't
diff --git a/egs/sprakbanken/s5/local/nnet3/run_tdnn.sh b/egs/sprakbanken/s5/local/nnet3/run_tdnn.sh
index 4d4a5e318d0..8abbe4cf2bd 100755
--- a/egs/sprakbanken/s5/local/nnet3/run_tdnn.sh
+++ b/egs/sprakbanken/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #    This is the standard "tdnn" system, built in nnet3
 
diff --git a/egs/sprakbanken/s5/local/norm_dk/expand_abbr_medical.sh b/egs/sprakbanken/s5/local/norm_dk/expand_abbr_medical.sh
index a96b019cb0d..0fb122f5f39 100755
--- a/egs/sprakbanken/s5/local/norm_dk/expand_abbr_medical.sh
+++ b/egs/sprakbanken/s5/local/norm_dk/expand_abbr_medical.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Mirsk Digital ApS  (Author: Andreas Kirkedal)
 
diff --git a/egs/sprakbanken/s5/local/norm_dk/expand_dates.sh b/egs/sprakbanken/s5/local/norm_dk/expand_dates.sh
index ed8d63bde91..37f5f4829f5 100755
--- a/egs/sprakbanken/s5/local/norm_dk/expand_dates.sh
+++ b/egs/sprakbanken/s5/local/norm_dk/expand_dates.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Andreas Kirkedal
 
diff --git a/egs/sprakbanken/s5/local/norm_dk/format_punct.sh b/egs/sprakbanken/s5/local/norm_dk/format_punct.sh
index ee54469d365..7bd04f1c873 100755
--- a/egs/sprakbanken/s5/local/norm_dk/format_punct.sh
+++ b/egs/sprakbanken/s5/local/norm_dk/format_punct.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Andreas Kirkedal
 
diff --git a/egs/sprakbanken/s5/local/norm_dk/format_text.sh b/egs/sprakbanken/s5/local/norm_dk/format_text.sh
index abbf975dbdf..ecba92b52aa 100755
--- a/egs/sprakbanken/s5/local/norm_dk/format_text.sh
+++ b/egs/sprakbanken/s5/local/norm_dk/format_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Andreas Kirkedal
 
diff --git a/egs/sprakbanken/s5/local/norm_dk/remove_annotation.sh b/egs/sprakbanken/s5/local/norm_dk/remove_annotation.sh
index c05388a4035..85005458954 100755
--- a/egs/sprakbanken/s5/local/norm_dk/remove_annotation.sh
+++ b/egs/sprakbanken/s5/local/norm_dk/remove_annotation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Author: Andreas Kirkedal
 
diff --git a/egs/sprakbanken/s5/local/norm_dk/sent_split.sh b/egs/sprakbanken/s5/local/norm_dk/sent_split.sh
index f5730bfda17..7f0a3e9bc9d 100755
--- a/egs/sprakbanken/s5/local/norm_dk/sent_split.sh
+++ b/egs/sprakbanken/s5/local/norm_dk/sent_split.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Andreas Kirkedal
 
diff --git a/egs/sprakbanken/s5/local/norm_dk/write_out_formatting.sh b/egs/sprakbanken/s5/local/norm_dk/write_out_formatting.sh
index bdac92eb7a1..66fbbc3a123 100755
--- a/egs/sprakbanken/s5/local/norm_dk/write_out_formatting.sh
+++ b/egs/sprakbanken/s5/local/norm_dk/write_out_formatting.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Mirsk Digital ApS  (Author: Andreas Kirkedal)
 
diff --git a/egs/sprakbanken/s5/local/norm_dk/write_punct.sh b/egs/sprakbanken/s5/local/norm_dk/write_punct.sh
index 3b8decaf376..fd29e686511 100755
--- a/egs/sprakbanken/s5/local/norm_dk/write_punct.sh
+++ b/egs/sprakbanken/s5/local/norm_dk/write_punct.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Mirsk Digital ApS  (Author: Andreas Kirkedal)
 
diff --git a/egs/sprakbanken/s5/local/run_nnet2.sh b/egs/sprakbanken/s5/local/run_nnet2.sh
index ad1ebb2e7a7..95205e363fb 100755
--- a/egs/sprakbanken/s5/local/run_nnet2.sh
+++ b/egs/sprakbanken/s5/local/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/sprakbanken/s5/local/run_rnnlms_sgmm5b.sh b/egs/sprakbanken/s5/local/run_rnnlms_sgmm5b.sh
index 67fcee50a93..867294d2e77 100755
--- a/egs/sprakbanken/s5/local/run_rnnlms_sgmm5b.sh
+++ b/egs/sprakbanken/s5/local/run_rnnlms_sgmm5b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 for test in dev93 eval92; do
 
diff --git a/egs/sprakbanken/s5/local/run_rnnlms_tri3b.sh b/egs/sprakbanken/s5/local/run_rnnlms_tri3b.sh
index fac8842f960..32d5f55af91 100755
--- a/egs/sprakbanken/s5/local/run_rnnlms_tri3b.sh
+++ b/egs/sprakbanken/s5/local/run_rnnlms_tri3b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/sprakbanken/s5/local/run_sgmm2.sh b/egs/sprakbanken/s5/local/run_sgmm2.sh
index 2eb70785bcb..c129ff47f2e 100755
--- a/egs/sprakbanken/s5/local/run_sgmm2.sh
+++ b/egs/sprakbanken/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is invoked from ../run.sh
 # It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
diff --git a/egs/sprakbanken/s5/local/score.sh b/egs/sprakbanken/s5/local/score.sh
index 9fcafdc0b5c..31a34577931 100755
--- a/egs/sprakbanken/s5/local/score.sh
+++ b/egs/sprakbanken/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/sprakbanken/s5/local/score_combine.sh b/egs/sprakbanken/s5/local/score_combine.sh
index 65caab06ecc..c4d3c13886a 100755
--- a/egs/sprakbanken/s5/local/score_combine.sh
+++ b/egs/sprakbanken/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/sprakbanken/s5/local/score_mbr.sh b/egs/sprakbanken/s5/local/score_mbr.sh
index 04b84ccce5a..8c752368906 100755
--- a/egs/sprakbanken/s5/local/score_mbr.sh
+++ b/egs/sprakbanken/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script for minimum bayes risk decoding.
 
diff --git a/egs/sprakbanken/s5/local/sprak_data_prep.sh b/egs/sprakbanken/s5/local/sprak_data_prep.sh
index c336b06e8af..47fe1587715 100755
--- a/egs/sprakbanken/s5/local/sprak_data_prep.sh
+++ b/egs/sprakbanken/s5/local/sprak_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2013-2014  Mirsk Digital Aps (Author: Andreas Kirkedal)
diff --git a/egs/sprakbanken/s5/local/sprak_prep_lm.sh b/egs/sprakbanken/s5/local/sprak_prep_lm.sh
index ec6e2bd4051..095ae44db32 100755
--- a/egs/sprakbanken/s5/local/sprak_prep_lm.sh
+++ b/egs/sprakbanken/s5/local/sprak_prep_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 
diff --git a/egs/sprakbanken/s5/local/sprak_prepare_dict.sh b/egs/sprakbanken/s5/local/sprak_prepare_dict.sh
index e45da3338e9..79b5d9a2dba 100755
--- a/egs/sprakbanken/s5/local/sprak_prepare_dict.sh
+++ b/egs/sprakbanken/s5/local/sprak_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2014 Mirsk Digital ApS  (Author: Andreas Kirkedal)
diff --git a/egs/sprakbanken/s5/local/sprak_run_nnet_cpu.sh b/egs/sprakbanken/s5/local/sprak_run_nnet_cpu.sh
index 19a20da7285..890efb56dca 100755
--- a/egs/sprakbanken/s5/local/sprak_run_nnet_cpu.sh
+++ b/egs/sprakbanken/s5/local/sprak_run_nnet_cpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/sprakbanken/s5/local/sprak_run_rnnlms_tri3b.sh b/egs/sprakbanken/s5/local/sprak_run_rnnlms_tri3b.sh
index 5aeeefa6789..b39f284a735 100755
--- a/egs/sprakbanken/s5/local/sprak_run_rnnlms_tri3b.sh
+++ b/egs/sprakbanken/s5/local/sprak_run_rnnlms_tri3b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/sprakbanken/s5/local/sprak_run_sgmm2.sh b/egs/sprakbanken/s5/local/sprak_run_sgmm2.sh
index b540fe2451a..f42a28300d1 100755
--- a/egs/sprakbanken/s5/local/sprak_run_sgmm2.sh
+++ b/egs/sprakbanken/s5/local/sprak_run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is invoked from ../run.sh
 # It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
diff --git a/egs/sprakbanken/s5/local/sprak_train_irstlm.sh b/egs/sprakbanken/s5/local/sprak_train_irstlm.sh
index 33b27cc3e4c..ce98d3d1957 100755
--- a/egs/sprakbanken/s5/local/sprak_train_irstlm.sh
+++ b/egs/sprakbanken/s5/local/sprak_train_irstlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013-2014  Mirsk Digital Aps (Author: Andreas Kirkedal)
 # Apache 2.0
diff --git a/egs/sprakbanken/s5/local/train_irstlm.sh b/egs/sprakbanken/s5/local/train_irstlm.sh
index c91b68f8aab..15f0cb16a24 100755
--- a/egs/sprakbanken/s5/local/train_irstlm.sh
+++ b/egs/sprakbanken/s5/local/train_irstlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Mirsk Digital ApS (Author: Andreas Kirkedal)
 # Apache 2.0
diff --git a/egs/sprakbanken/s5/run.sh b/egs/sprakbanken/s5/run.sh
index 64a24deeabf..fe6d7e3dbe7 100755
--- a/egs/sprakbanken/s5/run.sh
+++ b/egs/sprakbanken/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
diff --git a/egs/sprakbanken_swe/s5/local/copy_dict.sh b/egs/sprakbanken_swe/s5/local/copy_dict.sh
index e0a9632d219..e194e69e68c 100755
--- a/egs/sprakbanken_swe/s5/local/copy_dict.sh
+++ b/egs/sprakbanken_swe/s5/local/copy_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2014 Mirsk Digital ApS  (Author: Andreas Kirkedal)
diff --git a/egs/sprakbanken_swe/s5/local/create_datasets.sh b/egs/sprakbanken_swe/s5/local/create_datasets.sh
index 55ed1001a7f..1a305f5969d 100755
--- a/egs/sprakbanken_swe/s5/local/create_datasets.sh
+++ b/egs/sprakbanken_swe/s5/local/create_datasets.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Mirsk Digital ApS  (Author: Andreas Kirkedal)
 # Copyright 2016 KTH Royal Institute of Technology (Author: Emelie Kullmann)
diff --git a/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh b/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh
index 19751815208..407b8b4a8e3 100755
--- a/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh
+++ b/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2013-2014  Mirsk Digital Aps (Author: Andreas Kirkedal)
diff --git a/egs/sprakbanken_swe/s5/local/sprak_run_5c.sh b/egs/sprakbanken_swe/s5/local/sprak_run_5c.sh
index bad0cd72679..e489a4a354d 100755
--- a/egs/sprakbanken_swe/s5/local/sprak_run_5c.sh
+++ b/egs/sprakbanken_swe/s5/local/sprak_run_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is neural net training on top of adapted 40-dimensional features.
 # 
diff --git a/egs/sprakbanken_swe/s5/local/sprak_run_nnet_cpu.sh b/egs/sprakbanken_swe/s5/local/sprak_run_nnet_cpu.sh
index 19a20da7285..890efb56dca 100755
--- a/egs/sprakbanken_swe/s5/local/sprak_run_nnet_cpu.sh
+++ b/egs/sprakbanken_swe/s5/local/sprak_run_nnet_cpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/sprakbanken_swe/s5/local/train_irstlm.sh b/egs/sprakbanken_swe/s5/local/train_irstlm.sh
index c91b68f8aab..15f0cb16a24 100755
--- a/egs/sprakbanken_swe/s5/local/train_irstlm.sh
+++ b/egs/sprakbanken_swe/s5/local/train_irstlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Mirsk Digital ApS (Author: Andreas Kirkedal)
 # Apache 2.0
diff --git a/egs/sprakbanken_swe/s5/run.sh b/egs/sprakbanken_swe/s5/run.sh
index 3b8cc781619..0a196a3b9cd 100644
--- a/egs/sprakbanken_swe/s5/run.sh
+++ b/egs/sprakbanken_swe/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
diff --git a/egs/sre08/v1/local/det_curve_example.sh b/egs/sre08/v1/local/det_curve_example.sh
index c80153a5bb5..37fcec1ca49 100644
--- a/egs/sre08/v1/local/det_curve_example.sh
+++ b/egs/sre08/v1/local/det_curve_example.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This shell-script is just to show how you would plot det curves
 # using DETWare.
diff --git a/egs/sre08/v1/local/make_fisher.sh b/egs/sre08/v1/local/make_fisher.sh
index 3968ad49284..ff9e7aecbca 100755
--- a/egs/sre08/v1/local/make_fisher.sh
+++ b/egs/sre08/v1/local/make_fisher.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2013  Daniel Povey
 # Apache 2.0
diff --git a/egs/sre08/v1/local/make_sre_2008_test.sh b/egs/sre08/v1/local/make_sre_2008_test.sh
index 424a51293a6..ff1509c0a65 100755
--- a/egs/sre08/v1/local/make_sre_2008_test.sh
+++ b/egs/sre08/v1/local/make_sre_2008_test.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Daniel Povey
 # Apache 2.0.
diff --git a/egs/sre08/v1/local/run_more_data.sh b/egs/sre08/v1/local/run_more_data.sh
index e6fdf7c91f9..0e74a820d61 100755
--- a/egs/sre08/v1/local/run_more_data.sh
+++ b/egs/sre08/v1/local/run_more_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013   Daniel Povey
 # Apache 2.0.
 #
diff --git a/egs/sre08/v1/local/score_sre08.sh b/egs/sre08/v1/local/score_sre08.sh
index c1584946735..1207145ec28 100755
--- a/egs/sre08/v1/local/score_sre08.sh
+++ b/egs/sre08/v1/local/score_sre08.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2013  Daniel Povey
 # Apache 2.0.
diff --git a/egs/sre08/v1/run.sh b/egs/sre08/v1/run.sh
index 4337a0dab7a..0640df9e8b6 100755
--- a/egs/sre08/v1/run.sh
+++ b/egs/sre08/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013   Daniel Povey
 #      2014-2016   David Snyder
 # Apache 2.0.
diff --git a/egs/sre08/v1/sid/compute_vad_decision_gmm.sh b/egs/sre08/v1/sid/compute_vad_decision_gmm.sh
index b1fee318f34..7191eae8e2e 100755
--- a/egs/sre08/v1/sid/compute_vad_decision_gmm.sh
+++ b/egs/sre08/v1/sid/compute_vad_decision_gmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2015 David Snyder
 # Apache 2.0
diff --git a/egs/sre08/v1/sid/extract_ivectors.sh b/egs/sre08/v1/sid/extract_ivectors.sh
index d01bc86ba8e..d66d3b5e2ad 100755
--- a/egs/sre08/v1/sid/extract_ivectors.sh
+++ b/egs/sre08/v1/sid/extract_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright     2013  Daniel Povey
 #               2014  David Snyder
diff --git a/egs/sre08/v1/sid/extract_ivectors_dnn.sh b/egs/sre08/v1/sid/extract_ivectors_dnn.sh
index 8d21da3fcde..aa97feab6a1 100755
--- a/egs/sre08/v1/sid/extract_ivectors_dnn.sh
+++ b/egs/sre08/v1/sid/extract_ivectors_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright     2013  Daniel Povey
 #          2014-2017  David Snyder
diff --git a/egs/sre08/v1/sid/gender_id.sh b/egs/sre08/v1/sid/gender_id.sh
index fa2a16274a2..0e8861f0456 100755
--- a/egs/sre08/v1/sid/gender_id.sh
+++ b/egs/sre08/v1/sid/gender_id.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2013  Daniel Povey
 #              2014  David Snyder
diff --git a/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh b/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh
index c6b508a7206..abb62feb413 100755
--- a/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh
+++ b/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2017   David Snyder
 #           2015        Johns Hopkins University (Author: Daniel Garcia-Romero)
 #           2015        Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/sre08/v1/sid/music_id.sh b/egs/sre08/v1/sid/music_id.sh
index 4233b5752fd..31828f6dacc 100755
--- a/egs/sre08/v1/sid/music_id.sh
+++ b/egs/sre08/v1/sid/music_id.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2015  David Snyder
 # Apache 2.0.
diff --git a/egs/sre08/v1/sid/nnet2/get_egs2.sh b/egs/sre08/v1/sid/nnet2/get_egs2.sh
index f8696a221c1..01b9dddf285 100755
--- a/egs/sre08/v1/sid/nnet2/get_egs2.sh
+++ b/egs/sre08/v1/sid/nnet2/get_egs2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).
 #                2015 David Snyder
diff --git a/egs/sre08/v1/sid/nnet2/get_lda.sh b/egs/sre08/v1/sid/nnet2/get_lda.sh
index 89594a20f84..8cb6347bb88 100755
--- a/egs/sre08/v1/sid/nnet2/get_lda.sh
+++ b/egs/sre08/v1/sid/nnet2/get_lda.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).
 #           2015 David Snyder
diff --git a/egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh b/egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh
index c56e89b5d94..22469b3f9c2 100755
--- a/egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh
+++ b/egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Xiaohui Zhang
diff --git a/egs/sre08/v1/sid/nnet3/xvector/extract_xvectors.sh b/egs/sre08/v1/sid/nnet3/xvector/extract_xvectors.sh
index 2cfda807c8c..6ba53051508 100755
--- a/egs/sre08/v1/sid/nnet3/xvector/extract_xvectors.sh
+++ b/egs/sre08/v1/sid/nnet3/xvector/extract_xvectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright     2017  David Snyder
 #               2017  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh b/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh
index 3f2200c115c..216d1cad318 100755
--- a/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh
+++ b/egs/sre08/v1/sid/nnet3/xvector/get_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2017 Johns Hopkins University (Author: Daniel Povey)
 #                2017 Johns Hopkins University (Author: Daniel Garcia-Romero)
diff --git a/egs/sre08/v1/sid/train_diag_ubm.sh b/egs/sre08/v1/sid/train_diag_ubm.sh
index b584014963c..fe4930d4805 100755
--- a/egs/sre08/v1/sid/train_diag_ubm.sh
+++ b/egs/sre08/v1/sid/train_diag_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2012  Johns Hopkins University (Author: Daniel Povey)
 #             2013  Daniel Povey
diff --git a/egs/sre08/v1/sid/train_full_ubm.sh b/egs/sre08/v1/sid/train_full_ubm.sh
index 3dec3faaf65..0e3536ab287 100755
--- a/egs/sre08/v1/sid/train_full_ubm.sh
+++ b/egs/sre08/v1/sid/train_full_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #           2013  Daniel Povey
 
diff --git a/egs/sre08/v1/sid/train_ivector_extractor.sh b/egs/sre08/v1/sid/train_ivector_extractor.sh
index 84d32797157..3b3697cfdcf 100755
--- a/egs/sre08/v1/sid/train_ivector_extractor.sh
+++ b/egs/sre08/v1/sid/train_ivector_extractor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2013  Daniel Povey
 #             2014  David Snyder
diff --git a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh
index c64b83c5a4b..40f34a63155 100755
--- a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh
+++ b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Daniel Povey
 #      2014-2017  David Snyder
diff --git a/egs/sre10/v1/local/cosine_scoring.sh b/egs/sre10/v1/local/cosine_scoring.sh
index 99d2945e89d..0adffd422bc 100755
--- a/egs/sre10/v1/local/cosine_scoring.sh
+++ b/egs/sre10/v1/local/cosine_scoring.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/sre10/v1/local/det_curve_example.sh b/egs/sre10/v1/local/det_curve_example.sh
index c80153a5bb5..37fcec1ca49 100644
--- a/egs/sre10/v1/local/det_curve_example.sh
+++ b/egs/sre10/v1/local/det_curve_example.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This shell-script is just to show how you would plot det curves
 # using DETWare.
diff --git a/egs/sre10/v1/local/dnn/fisher_create_test_lang.sh b/egs/sre10/v1/local/dnn/fisher_create_test_lang.sh
index f0926d2ceab..250257e071d 100755
--- a/egs/sre10/v1/local/dnn/fisher_create_test_lang.sh
+++ b/egs/sre10/v1/local/dnn/fisher_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/sre10/v1/local/dnn/fisher_data_prep.sh b/egs/sre10/v1/local/dnn/fisher_data_prep.sh
index 771c868064d..fc96c491f51 100755
--- a/egs/sre10/v1/local/dnn/fisher_data_prep.sh
+++ b/egs/sre10/v1/local/dnn/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh b/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh
index 521b50dc404..978ad7d9eb1 100755
--- a/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh
+++ b/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 # To be run from one directory above this script.
diff --git a/egs/sre10/v1/local/dnn/fisher_train_lms.sh b/egs/sre10/v1/local/dnn/fisher_train_lms.sh
index 354882a3760..3497da32213 100755
--- a/egs/sre10/v1/local/dnn/fisher_train_lms.sh
+++ b/egs/sre10/v1/local/dnn/fisher_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/sre10/v1/local/dnn/remove_dup_utts.sh b/egs/sre10/v1/local/dnn/remove_dup_utts.sh
index 1211e0e04fd..f40a7b781c8 100755
--- a/egs/sre10/v1/local/dnn/remove_dup_utts.sh
+++ b/egs/sre10/v1/local/dnn/remove_dup_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Remove excess utterances once they appear  more than a specified
 # number of times with the same transcription, in a data set.
diff --git a/egs/sre10/v1/local/dnn/run_nnet2_common.sh b/egs/sre10/v1/local/dnn/run_nnet2_common.sh
index 2d0703b51c0..032282c11ef 100755
--- a/egs/sre10/v1/local/dnn/run_nnet2_common.sh
+++ b/egs/sre10/v1/local/dnn/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Make the features.
 
diff --git a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh
index 97b9789af0c..b5549136629 100755
--- a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh
+++ b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on run_nnet2_multisplice.sh in
 # egs/fisher_english/s5/local/online. It has been modified
diff --git a/egs/sre10/v1/local/dnn/train_dnn.sh b/egs/sre10/v1/local/dnn/train_dnn.sh
index 98da891ac24..956c8270b78 100755
--- a/egs/sre10/v1/local/dnn/train_dnn.sh
+++ b/egs/sre10/v1/local/dnn/train_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is based on egs/fisher_english/s5/run.sh. It trains a
 # multisplice time-delay neural network used in the DNN-based speaker
diff --git a/egs/sre10/v1/local/lda_scoring.sh b/egs/sre10/v1/local/lda_scoring.sh
index 2876693115c..a4b84ab68cf 100755
--- a/egs/sre10/v1/local/lda_scoring.sh
+++ b/egs/sre10/v1/local/lda_scoring.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/sre10/v1/local/make_sre.sh b/egs/sre10/v1/local/make_sre.sh
index c51a7752645..5423b1ec7c0 100755
--- a/egs/sre10/v1/local/make_sre.sh
+++ b/egs/sre10/v1/local/make_sre.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/sre10/v1/local/plda_scoring.sh b/egs/sre10/v1/local/plda_scoring.sh
index 63d4a4f0d4c..d3e9c0c0e9d 100755
--- a/egs/sre10/v1/local/plda_scoring.sh
+++ b/egs/sre10/v1/local/plda_scoring.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/sre10/v1/local/scoring_common.sh b/egs/sre10/v1/local/scoring_common.sh
index 6440d541ad4..d5ce1d04ecf 100755
--- a/egs/sre10/v1/local/scoring_common.sh
+++ b/egs/sre10/v1/local/scoring_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/sre10/v1/run.sh b/egs/sre10/v1/run.sh
index b3714e3d21c..879982270d7 100755
--- a/egs/sre10/v1/run.sh
+++ b/egs/sre10/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2017   David Snyder
 #                2015   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2015   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/sre10/v2/run.sh b/egs/sre10/v2/run.sh
index 4519b1f78c0..9886d030839 100755
--- a/egs/sre10/v2/run.sh
+++ b/egs/sre10/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015-2017   David Snyder
 #                2015   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2015   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/sre16/v1/local/make_mx6.sh b/egs/sre16/v1/local/make_mx6.sh
index 4e0df1350a1..f40308ee2b4 100755
--- a/egs/sre16/v1/local/make_mx6.sh
+++ b/egs/sre16/v1/local/make_mx6.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/sre16/v1/local/make_sre.sh b/egs/sre16/v1/local/make_sre.sh
index 45e75ac8ae2..282ac95f5f7 100755
--- a/egs/sre16/v1/local/make_sre.sh
+++ b/egs/sre16/v1/local/make_sre.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017   David Snyder
 # Apache 2.0.
 #
diff --git a/egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
index a35b94150c4..97b49571676 100755
--- a/egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
+++ b/egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Apache 2.0.
 
diff --git a/egs/sre16/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/sre16/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
index 6a103ea8bf0..b2424bd93f2 100755
--- a/egs/sre16/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
+++ b/egs/sre16/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright      2017   David Snyder
 #                2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2017   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/sre16/v1/run.sh b/egs/sre16/v1/run.sh
index 2315d7ac78a..5e238abc02a 100755
--- a/egs/sre16/v1/run.sh
+++ b/egs/sre16/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright      2017   David Snyder
 #                2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2017   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/sre16/v2/run.sh b/egs/sre16/v2/run.sh
index 7780c30560b..3feb98da3ee 100755
--- a/egs/sre16/v2/run.sh
+++ b/egs/sre16/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright      2017   David Snyder
 #                2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2017   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/svhn/v1/local/nnet3/compare.sh b/egs/svhn/v1/local/nnet3/compare.sh
index 8ef1485ecc5..5b9b78cd8c9 100755
--- a/egs/svhn/v1/local/nnet3/compare.sh
+++ b/egs/svhn/v1/local/nnet3/compare.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing trained models between systems.
 # e.g. local/nnet3/compare.sh exp/resnet1{b,c}
diff --git a/egs/svhn/v1/local/nnet3/tuning/run_cnn_1a.sh b/egs/svhn/v1/local/nnet3/tuning/run_cnn_1a.sh
index 7df25f41af9..414b28c3cb4 100755
--- a/egs/svhn/v1/local/nnet3/tuning/run_cnn_1a.sh
+++ b/egs/svhn/v1/local/nnet3/tuning/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # steps/info/nnet3_dir_info.pl exp/cnn1a
 # exp/cnn1a: num-iters=108 nj=2..4 num-params=0.5M dim=96->10 combine=-0.09->-0.09 loglike:train/valid[71,107,final]=(-0.101,-0.074,-0.067/-0.189,-0.144,-0.136) accuracy:train/valid[71,107,final]=(0.973,0.9834,0.9850/0.949,0.963,0.966)
diff --git a/egs/svhn/v1/local/nnet3/tuning/run_cnn_aug_1a.sh b/egs/svhn/v1/local/nnet3/tuning/run_cnn_aug_1a.sh
index e89ff125102..b58647fafb9 100755
--- a/egs/svhn/v1/local/nnet3/tuning/run_cnn_aug_1a.sh
+++ b/egs/svhn/v1/local/nnet3/tuning/run_cnn_aug_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # nnet topology similar to 1a but bigger and with more epochs and data augmentation (improved 95 --> 97)
 
diff --git a/egs/svhn/v1/local/nnet3/tuning/run_cnn_aug_1b.sh b/egs/svhn/v1/local/nnet3/tuning/run_cnn_aug_1b.sh
index cf2f92590d2..f98e2ae1285 100755
--- a/egs/svhn/v1/local/nnet3/tuning/run_cnn_aug_1b.sh
+++ b/egs/svhn/v1/local/nnet3/tuning/run_cnn_aug_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_cnn_aug_1b.sh is like run_cnn_aug_1a.sh but setting
 # num-minibatches-history=40.0 (longer history for natural gradient),
diff --git a/egs/svhn/v1/local/nnet3/tuning/run_resnet_1b.sh b/egs/svhn/v1/local/nnet3/tuning/run_resnet_1b.sh
index 7f0540e90fe..009e77b073f 100755
--- a/egs/svhn/v1/local/nnet3/tuning/run_resnet_1b.sh
+++ b/egs/svhn/v1/local/nnet3/tuning/run_resnet_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # exp/resnet1b: num-iters=130 nj=2..4 num-params=1.3M dim=96->10 combine=-0.04->-0.04 loglike:train/valid[85,129,final]=(-0.049,-0.044,-0.036/-0.098,-0.085,-0.076) accuracy:train/valid[85,129,final]=(0.9904,0.9908,0.9940/0.9764,0.9804,0.9831)
 
diff --git a/egs/svhn/v1/local/nnet3/tuning/run_resnet_1c.sh b/egs/svhn/v1/local/nnet3/tuning/run_resnet_1c.sh
index d0b872ae52e..b518871f642 100755
--- a/egs/svhn/v1/local/nnet3/tuning/run_resnet_1c.sh
+++ b/egs/svhn/v1/local/nnet3/tuning/run_resnet_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # resnet1c is as resnet1b but adding "num-minibatches-history=40.0" to
 # all layers to increase the history size of natural gradient
diff --git a/egs/svhn/v1/local/nnet3/tuning/run_resnet_1d.sh b/egs/svhn/v1/local/nnet3/tuning/run_resnet_1d.sh
index 2dfbc98b4eb..c80c80ca2dd 100755
--- a/egs/svhn/v1/local/nnet3/tuning/run_resnet_1d.sh
+++ b/egs/svhn/v1/local/nnet3/tuning/run_resnet_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1d is like 1c except it uses l2-regularize instead of proportional-shrink
 
diff --git a/egs/svhn/v1/local/prepare_data.sh b/egs/svhn/v1/local/prepare_data.sh
index 715b4ed1bce..bc950153f50 100755
--- a/egs/svhn/v1/local/prepare_data.sh
+++ b/egs/svhn/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Johns Hopkins University (author: Hossein Hadian)
 # Apache 2.0
diff --git a/egs/svhn/v1/run.sh b/egs/svhn/v1/run.sh
index 720f4a13e29..0257cb1d5bd 100755
--- a/egs/svhn/v1/run.sh
+++ b/egs/svhn/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 stage=0
 
diff --git a/egs/swahili/s5/local/prepare_data.sh b/egs/swahili/s5/local/prepare_data.sh
index b0fb93f09bc..cd861ed3f62 100755
--- a/egs/swahili/s5/local/prepare_data.sh
+++ b/egs/swahili/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 if [[ $1 && $2 ]]; then
 
   local=`pwd`/local
diff --git a/egs/swahili/s5/local/prepare_dict.sh b/egs/swahili/s5/local/prepare_dict.sh
index 8981b0779cd..d2dfa3276fb 100755
--- a/egs/swahili/s5/local/prepare_dict.sh
+++ b/egs/swahili/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 mkdir -p data/lang data/local/dict
 
diff --git a/egs/swahili/s5/local/prepare_lm.sh b/egs/swahili/s5/local/prepare_lm.sh
index 028aaa421f2..932708cb049 100755
--- a/egs/swahili/s5/local/prepare_lm.sh
+++ b/egs/swahili/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./path.sh || die "path.sh expected";
 
diff --git a/egs/swahili/s5/local/score.sh b/egs/swahili/s5/local/score.sh
index abd8149a672..332f038c575 100755
--- a/egs/swahili/s5/local/score.sh
+++ b/egs/swahili/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/swahili/s5/local/score_combine.sh b/egs/swahili/s5/local/score_combine.sh
index 65caab06ecc..c4d3c13886a 100755
--- a/egs/swahili/s5/local/score_combine.sh
+++ b/egs/swahili/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/swahili/s5/local/score_mbr.sh b/egs/swahili/s5/local/score_mbr.sh
index 04b84ccce5a..8c752368906 100755
--- a/egs/swahili/s5/local/score_mbr.sh
+++ b/egs/swahili/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script for minimum bayes risk decoding.
 
diff --git a/egs/swahili/s5/path.sh b/egs/swahili/s5/path.sh
index 8b61dce675e..b3c471a2b93 100755
--- a/egs/swahili/s5/path.sh
+++ b/egs/swahili/s5/path.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 export KALDI_ROOT=`pwd`/../../..
 [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
 export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
diff --git a/egs/swahili/s5/run.sh b/egs/swahili/s5/run.sh
index 3da3a30cc3b..c4baa3e0874 100755
--- a/egs/swahili/s5/run.sh
+++ b/egs/swahili/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # initialization PATH
 . ./path.sh  || die "path.sh expected";
diff --git a/egs/swbd/s5/local/eval1997_data_prep_edin.sh b/egs/swbd/s5/local/eval1997_data_prep_edin.sh
index 705ab8c3874..d9610004646 100755
--- a/egs/swbd/s5/local/eval1997_data_prep_edin.sh
+++ b/egs/swbd/s5/local/eval1997_data_prep_edin.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Hub-5 Eval 1997 data preparation
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/swbd/s5/local/eval2000_data_prep.sh b/egs/swbd/s5/local/eval2000_data_prep.sh
index fb13b286878..028d04f4c69 100755
--- a/egs/swbd/s5/local/eval2000_data_prep.sh
+++ b/egs/swbd/s5/local/eval2000_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 # To be run from one directory above this script.
diff --git a/egs/swbd/s5/local/eval2000_data_prep_edin.sh b/egs/swbd/s5/local/eval2000_data_prep_edin.sh
index 8dcd5de0395..aacece6bd35 100755
--- a/egs/swbd/s5/local/eval2000_data_prep_edin.sh
+++ b/egs/swbd/s5/local/eval2000_data_prep_edin.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Hub-5 Eval 2000 data preparation 
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/swbd/s5/local/eval2001_data_prep_edin.sh b/egs/swbd/s5/local/eval2001_data_prep_edin.sh
index 052b30d6df8..bdc585a6c8b 100755
--- a/egs/swbd/s5/local/eval2001_data_prep_edin.sh
+++ b/egs/swbd/s5/local/eval2001_data_prep_edin.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Hub-5 Eval 2001 data preparation 
 # Author:  Arnab Ghoshal (March 2013)
diff --git a/egs/swbd/s5/local/run_sgmm2.sh b/egs/swbd/s5/local/run_sgmm2.sh
index 194dfa05e61..97621e4c251 100755
--- a/egs/swbd/s5/local/run_sgmm2.sh
+++ b/egs/swbd/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/swbd/s5/local/run_tandem_uc.sh b/egs/swbd/s5/local/run_tandem_uc.sh
index 94d4a61d5a4..afbebb40756 100755
--- a/egs/swbd/s5/local/run_tandem_uc.sh
+++ b/egs/swbd/s5/local/run_tandem_uc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/swbd/s5/local/score.sh b/egs/swbd/s5/local/score.sh
index da71d126a86..06bfd0e9202 100755
--- a/egs/swbd/s5/local/score.sh
+++ b/egs/swbd/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 orig_args=
diff --git a/egs/swbd/s5/local/score_basic.sh b/egs/swbd/s5/local/score_basic.sh
index 6cefe566f4b..ae57745e652 100755
--- a/egs/swbd/s5/local/score_basic.sh
+++ b/egs/swbd/s5/local/score_basic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/swbd/s5/local/score_sclite.sh b/egs/swbd/s5/local/score_sclite.sh
index 88905189f78..36d1082fa35 100755
--- a/egs/swbd/s5/local/score_sclite.sh
+++ b/egs/swbd/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/swbd/s5/local/score_sclite_conf.sh b/egs/swbd/s5/local/score_sclite_conf.sh
index bb354fd50cf..d6d052ee763 100755
--- a/egs/swbd/s5/local/score_sclite_conf.sh
+++ b/egs/swbd/s5/local/score_sclite_conf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/swbd/s5/local/swbd1_data_prep_edin.sh b/egs/swbd/s5/local/swbd1_data_prep_edin.sh
index 5a64f62fa07..7de36d36630 100755
--- a/egs/swbd/s5/local/swbd1_data_prep_edin.sh
+++ b/egs/swbd/s5/local/swbd1_data_prep_edin.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Switchboard-1 training data preparation customized for Edinburgh
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/swbd/s5/local/swbd1_prepare_dict.sh b/egs/swbd/s5/local/swbd1_prepare_dict.sh
index 9cf25f74902..8cab3093d07 100755
--- a/egs/swbd/s5/local/swbd1_prepare_dict.sh
+++ b/egs/swbd/s5/local/swbd1_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Formatting the Mississippi State dictionary for use in Edinburgh. Differs 
 # from the one in Kaldi s5 recipe in that it uses lower-case --Arnab (Jan 2013)
diff --git a/egs/swbd/s5/local/swbd1_train_lms_edin.sh b/egs/swbd/s5/local/swbd1_train_lms_edin.sh
index db093f851f0..d084ae74eb4 100755
--- a/egs/swbd/s5/local/swbd1_train_lms_edin.sh
+++ b/egs/swbd/s5/local/swbd1_train_lms_edin.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/swbd/s5/local/swbd_p1_data_prep.sh b/egs/swbd/s5/local/swbd_p1_data_prep.sh
index 53982837701..0a5812a8035 100755
--- a/egs/swbd/s5/local/swbd_p1_data_prep.sh
+++ b/egs/swbd/s5/local/swbd_p1_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 # To be run from one directory above this script.
diff --git a/egs/swbd/s5/local/swbd_p1_format_data.sh b/egs/swbd/s5/local/swbd_p1_format_data.sh
index 4f81138827a..d310de447ba 100755
--- a/egs/swbd/s5/local/swbd_p1_format_data.sh
+++ b/egs/swbd/s5/local/swbd_p1_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/swbd/s5/local/swbd_p1_prepare_dict.sh b/egs/swbd/s5/local/swbd_p1_prepare_dict.sh
index 12f7e8a66c8..a9b180aafa8 100755
--- a/egs/swbd/s5/local/swbd_p1_prepare_dict.sh
+++ b/egs/swbd/s5/local/swbd_p1_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 
 # To be run from one directory above this script.
diff --git a/egs/swbd/s5/local/swbd_p1_train_lms.sh b/egs/swbd/s5/local/swbd_p1_train_lms.sh
index 4362bdd708f..2c40702b2b5 100755
--- a/egs/swbd/s5/local/swbd_p1_train_lms.sh
+++ b/egs/swbd/s5/local/swbd_p1_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # To be run from one directory above this script.
diff --git a/egs/swbd/s5/run.sh b/egs/swbd/s5/run.sh
index 79fe7703314..5c403743ce6 100755
--- a/egs/swbd/s5/run.sh
+++ b/egs/swbd/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Warning-- this recipe is now out of date.  See ../s5c/ for the latest recipe.
 
diff --git a/egs/swbd/s5/run_edin.sh b/egs/swbd/s5/run_edin.sh
index dce4ae37b51..c29a2983aca 100755
--- a/egs/swbd/s5/run_edin.sh
+++ b/egs/swbd/s5/run_edin.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Switchboard-1 recipe customized for Edinburgh
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/swbd/s5b/local/eval1997_data_prep.sh b/egs/swbd/s5b/local/eval1997_data_prep.sh
index 705ab8c3874..d9610004646 100755
--- a/egs/swbd/s5b/local/eval1997_data_prep.sh
+++ b/egs/swbd/s5b/local/eval1997_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Hub-5 Eval 1997 data preparation
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/swbd/s5b/local/eval2000_data_prep.sh b/egs/swbd/s5b/local/eval2000_data_prep.sh
index abb62c339c8..90bae051a95 100755
--- a/egs/swbd/s5b/local/eval2000_data_prep.sh
+++ b/egs/swbd/s5b/local/eval2000_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Hub-5 Eval 2000 data preparation 
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/swbd/s5b/local/nnet/run_dnn.sh b/egs/swbd/s5b/local/nnet/run_dnn.sh
index 7f8918bc8e4..05d5c254858 100755
--- a/egs/swbd/s5b/local/nnet/run_dnn.sh
+++ b/egs/swbd/s5b/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/swbd/s5b/local/nnet/run_dnn_tandem_uc.sh b/egs/swbd/s5b/local/nnet/run_dnn_tandem_uc.sh
index 3791659278d..58d1de03044 100755
--- a/egs/swbd/s5b/local/nnet/run_dnn_tandem_uc.sh
+++ b/egs/swbd/s5b/local/nnet/run_dnn_tandem_uc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/swbd/s5b/local/nnet2/run_5a.sh b/egs/swbd/s5b/local/nnet2/run_5a.sh
index 5acc54aacec..0717b107445 100755
--- a/egs/swbd/s5b/local/nnet2/run_5a.sh
+++ b/egs/swbd/s5b/local/nnet2/run_5a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This runs on the 100 hour subset.
 
diff --git a/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh
index 3aae7918964..e4bebadb733 100755
--- a/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh
+++ b/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This runs on the 100 hour subset.
 # e.g. of usage:
diff --git a/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh
index 74058d9fac4..15e675425fa 100755
--- a/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh
+++ b/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 stage=0
diff --git a/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh
index 55becfbe0fc..12dc23b6ed9 100755
--- a/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh
+++ b/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This is on the full training set (with duplicates removed).  This version of the recipe runs on GPUs.
diff --git a/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh
index e0b523910df..6c6115d41ae 100755
--- a/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh
+++ b/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This runs on the 100 hour subset, similar as run_5a_gpu.sh, but it's using steps/nnet2/train_pnorm.sh.
 # e.g. of usage:
diff --git a/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh
index 7a869071898..ebce57a3852 100755
--- a/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh
+++ b/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is as 5d (100 hour subset, pnorm),
 # but on top of the raw-fMLLR input
diff --git a/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh
index b91599a27e6..bea3a7a75f3 100755
--- a/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh
+++ b/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This runs on the full training set (with duplicates removed), with p-norm units, on top of fMLLR features, on GPU.
diff --git a/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh b/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh
index 6327ee85224..8287f65e80a 100755
--- a/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh
+++ b/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This runs on the 100 hour subset; it's another neural-net training 
 # after the nnet5a setup, but after realignment.   We're just seeing
diff --git a/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh b/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh
index 0296f4cca00..f3b7af2a64d 100755
--- a/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh
+++ b/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script demonstrates discriminative training of neural nets.  It's on top
diff --git a/egs/swbd/s5b/local/online/run_nnet2.sh b/egs/swbd/s5b/local/online/run_nnet2.sh
index 679829fe84e..4a0dd2ab668 100755
--- a/egs/swbd/s5b/local/online/run_nnet2.sh
+++ b/egs/swbd/s5b/local/online/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/swbd/s5b/local/online/run_nnet2_baseline.sh b/egs/swbd/s5b/local/online/run_nnet2_baseline.sh
index 8cb264c3ea1..4fd59788a55 100755
--- a/egs/swbd/s5b/local/online/run_nnet2_baseline.sh
+++ b/egs/swbd/s5b/local/online/run_nnet2_baseline.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/swbd/s5b/local/online/run_nnet2_common.sh b/egs/swbd/s5b/local/online/run_nnet2_common.sh
index c55250757d2..b7e56feac91 100755
--- a/egs/swbd/s5b/local/online/run_nnet2_common.sh
+++ b/egs/swbd/s5b/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e 
diff --git a/egs/swbd/s5b/local/online/run_nnet2_fisher.sh b/egs/swbd/s5b/local/online/run_nnet2_fisher.sh
index 94ca1afae03..a1fa95dedd2 100755
--- a/egs/swbd/s5b/local/online/run_nnet2_fisher.sh
+++ b/egs/swbd/s5b/local/online/run_nnet2_fisher.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script trains a Switchboard system starting from a neural net trained for
diff --git a/egs/swbd/s5b/local/online/run_nnet2_ms.sh b/egs/swbd/s5b/local/online/run_nnet2_ms.sh
index ae6bad59d42..033f7426b54 100755
--- a/egs/swbd/s5b/local/online/run_nnet2_ms.sh
+++ b/egs/swbd/s5b/local/online/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e
diff --git a/egs/swbd/s5b/local/online/run_nnet2_ms_disc.sh b/egs/swbd/s5b/local/online/run_nnet2_ms_disc.sh
index 3eebbc6751a..d0e7fa77dfe 100755
--- a/egs/swbd/s5b/local/online/run_nnet2_ms_disc.sh
+++ b/egs/swbd/s5b/local/online/run_nnet2_ms_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # this is run_nnet2_ms_disc.sh but with 4 jobs not 2 (and double the learning rate).
diff --git a/egs/swbd/s5b/local/online/run_nnet2_perturb_speed.sh b/egs/swbd/s5b/local/online/run_nnet2_perturb_speed.sh
index a3b081861ae..03ae9476739 100755
--- a/egs/swbd/s5b/local/online/run_nnet2_perturb_speed.sh
+++ b/egs/swbd/s5b/local/online/run_nnet2_perturb_speed.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 #           2014  Tom Ko
diff --git a/egs/swbd/s5b/local/run_nnet2.sh b/egs/swbd/s5b/local/run_nnet2.sh
index bc1e55a24d2..68a8d9e07c1 100644
--- a/egs/swbd/s5b/local/run_nnet2.sh
+++ b/egs/swbd/s5b/local/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This shows what you can potentially run; you'd probably want to pick and choose.
diff --git a/egs/swbd/s5b/local/run_raw_fmllr.sh b/egs/swbd/s5b/local/run_raw_fmllr.sh
index 0215c832ce5..796c3c6cc5a 100755
--- a/egs/swbd/s5b/local/run_raw_fmllr.sh
+++ b/egs/swbd/s5b/local/run_raw_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e
diff --git a/egs/swbd/s5b/local/run_resegment.sh b/egs/swbd/s5b/local/run_resegment.sh
index dbcadc2cbde..c80e58608c2 100755
--- a/egs/swbd/s5b/local/run_resegment.sh
+++ b/egs/swbd/s5b/local/run_resegment.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script demonstrates some scripts for resegmenting data.  This was
 # developed for a scenario where the evaluation data doesn't come with
diff --git a/egs/swbd/s5b/local/run_sgmm2.sh b/egs/swbd/s5b/local/run_sgmm2.sh
index 0cddc13bbd4..994d3cc7509 100755
--- a/egs/swbd/s5b/local/run_sgmm2.sh
+++ b/egs/swbd/s5b/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/swbd/s5b/local/run_vtln.sh b/egs/swbd/s5b/local/run_vtln.sh
index 2ffdf22a0fd..394a35b4c2e 100755
--- a/egs/swbd/s5b/local/run_vtln.sh
+++ b/egs/swbd/s5b/local/run_vtln.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 featdir=mfcc_vtln
diff --git a/egs/swbd/s5b/local/score.sh b/egs/swbd/s5b/local/score.sh
index da71d126a86..06bfd0e9202 100755
--- a/egs/swbd/s5b/local/score.sh
+++ b/egs/swbd/s5b/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 orig_args=
diff --git a/egs/swbd/s5b/local/score_basic.sh b/egs/swbd/s5b/local/score_basic.sh
index 6cefe566f4b..ae57745e652 100755
--- a/egs/swbd/s5b/local/score_basic.sh
+++ b/egs/swbd/s5b/local/score_basic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/swbd/s5b/local/score_sclite.sh b/egs/swbd/s5b/local/score_sclite.sh
index f5d62c9d44d..1453d018df0 100755
--- a/egs/swbd/s5b/local/score_sclite.sh
+++ b/egs/swbd/s5b/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/swbd/s5b/local/score_sclite_conf.sh b/egs/swbd/s5b/local/score_sclite_conf.sh
index bb354fd50cf..d6d052ee763 100755
--- a/egs/swbd/s5b/local/score_sclite_conf.sh
+++ b/egs/swbd/s5b/local/score_sclite_conf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/swbd/s5b/local/swbd1_data_prep.sh b/egs/swbd/s5b/local/swbd1_data_prep.sh
index 635ad7e99a1..c65958de693 100755
--- a/egs/swbd/s5b/local/swbd1_data_prep.sh
+++ b/egs/swbd/s5b/local/swbd1_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Switchboard-1 training data preparation customized for Edinburgh
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/swbd/s5b/local/swbd1_prepare_dict.sh b/egs/swbd/s5b/local/swbd1_prepare_dict.sh
index b05661cedb3..e663155c3ee 100755
--- a/egs/swbd/s5b/local/swbd1_prepare_dict.sh
+++ b/egs/swbd/s5b/local/swbd1_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Formatting the Mississippi State dictionary for use in Edinburgh. Differs 
 # from the one in Kaldi s5 recipe in that it uses lower-case --Arnab (Jan 2013)
diff --git a/egs/swbd/s5b/local/swbd1_train_lms.sh b/egs/swbd/s5b/local/swbd1_train_lms.sh
index 2d2fd4dcf08..57fd5bd40f6 100755
--- a/egs/swbd/s5b/local/swbd1_train_lms.sh
+++ b/egs/swbd/s5b/local/swbd1_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 #                 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/swbd/s5b/run.sh b/egs/swbd/s5b/run.sh
index ba447e6f972..8629005f317 100755
--- a/egs/swbd/s5b/run.sh
+++ b/egs/swbd/s5b/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Warning: this recipe is now out of date.  See ../s5c/
 
diff --git a/egs/swbd/s5c/RESULTS b/egs/swbd/s5c/RESULTS
index ee61423b683..f4642476372 100644
--- a/egs/swbd/s5c/RESULTS
+++ b/egs/swbd/s5c/RESULTS
@@ -247,3 +247,9 @@ exit 0
 %WER 14.6 | 1831 21395 | 87.1 9.2 3.6 1.8 14.6 51.8 | exp/tri7uc-sat_mmi_b0.1/decode_eval2000_graph_sw1_tg_it4/score_17_0.0/eval2000.ctm.swbd.filt.sys
 # fisher 4gram rescoring,
 %WER 13.2 | 1831 21395 | 88.3 8.2 3.4 1.5 13.2 49.2 | exp/tri7uc-sat_mmi_b0.1/decode_eval2000_graph_sw1_fsh_fg_it4/score_19_0.0/eval2000.ctm.swbd.filt.sys
+
+# chain2 recipe with tdnn_7k
+%WER 17.7 | 4459 42989 | 84.0 10.2 5.8 1.7 17.7 55.7 | exp/chain2/tdnn_7k_sp/decode_eval2000_sw1_tg/score_8_0.0/eval2000_hires.ctm.filt.sys
+%WER 11.4 | 1831 21395 | 89.6 6.6 3.8 1.0 11.4 46.9 | exp/chain2/tdnn_7k_sp/decode_eval2000_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
+%WER 16.2 | 4459 42989 | 85.3 9.5 5.2 1.5 16.2 53.9 | exp/chain2/tdnn_7k_sp/decode_eval2000_sw1_fsh_fg/score_8_0.0/eval2000_hires.ctm.filt.sys
+%WER 10.3 | 1831 21395 | 90.5 5.8 3.7 0.8 10.3 44.1 | exp/chain2/tdnn_7k_sp/decode_eval2000_sw1_fsh_fg/score_11_0.0/eval2000_hires.ctm.swbd.filt.sys
diff --git a/egs/swbd/s5c/local/chain/compare_wer_tdnn.sh b/egs/swbd/s5c/local/chain/compare_wer_tdnn.sh
index 542dae82581..72a6256c9d3 100755
--- a/egs/swbd/s5c/local/chain/compare_wer_tdnn.sh
+++ b/egs/swbd/s5c/local/chain/compare_wer_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 models=""
 for x in $*; do   models="$models tdnn_${x}";   done
diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh
index d317b1dc55a..ee22269c331 100755
--- a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh
+++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script (multi_condition/run_tdnn_7f.sh) is the reverberated version of
 # tuning/run_tdnn_7f.sh. It reverberates the training data with room impulse responses
diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh
index 20dcab8eb50..b6ee50eff1c 100755
--- a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh
+++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script (multi_condition/run_tdnn_7k.sh) is the reverberated version of
 # tuning/run_tdnn_7k.sh. It reverberates the training data with room impulse responses
diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_aug_1a.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_aug_1a.sh
index 8762430ee7f..c4801332e31 100755
--- a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_aug_1a.sh
+++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_aug_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This recipe does multi-style training of TDNN model
 
@@ -128,7 +128,7 @@ if [ $stage -le 14 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
 
   num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  learning_rate_factor=$(echo "print(0.5/$xent_regularize)" | python3)
   affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
   tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
   linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
diff --git a/egs/swbd/s5c/local/chain/show_wer.sh b/egs/swbd/s5c/local/chain/show_wer.sh
index a82c4acf26d..e4b728be516 100755
--- a/egs/swbd/s5c/local/chain/show_wer.sh
+++ b/egs/swbd/s5c/local/chain/show_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 for l in $*; do
   grep WER exp/chain/tdnn_${l}_sp/decode_train_dev_sw1_tg/wer_* | utils/best_wer.sh
diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh
index a1be44cdbbf..b704bf55044 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # based on run_tdnn_6h.sh
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh
index d7382d78dc6..635989f25fa 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -o pipefail
 set -e
diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh
index 1eac1c60c27..936a3732e59 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 6i based on run_blstm_6h.sh, but changing the HMM context from triphone to left biphone.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh
index acdae844b65..9c45a51e223 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 6j is same as 6i but using the xconfig format of network specification.
 # Also, the model is trained without layer-wise discriminative pretraining.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh
index bbd8cb63697..39e12f82e5f 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 6k is same as 6j, but with the fast lstm layers
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh
index 16f2ea211d0..89c511d88cf 100644
--- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 6l is same as 6k, but with the per-frame dropout
 # location4 as paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf
diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh
index 09f7d72434c..47dffa32414 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Johns Hopkins University (Author: Daniel Povey).
 #           2015  Vijayaditya Peddinti
diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh
index 8e44d0bc114..bef0138230b 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Johns Hopkins University (Author: Daniel Povey).
 #           2015  Vijayaditya Peddinti
diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh
index 6a836e81b09..3acc136c941 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Johns Hopkins University (Author: Daniel Povey).
 #           2015  Vijayaditya Peddinti
diff --git a/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh
index d1a61360f85..5c07bf3e0e3 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is based on tdnn_7q, but adding cnn as the front-end.
 # The cnn-tdnn-f (cnn_tdnn_1a) outperforms the tdnn-f (tdnn_7q).
diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh
index ac22e858aea..d462fe9614c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # based on run_tdnn_6h.sh
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh
index aa48db04841..b630b5012b2 100644
--- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 6i is based on run_lstm_6h.sh, but changing the HMM context from triphone to left biphone.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh
index 48db81f586f..23750b67155 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 6j is same as 6i but using the xconfig format of network specification.
 # Also, the model is trained without layer-wise discriminative pretraining.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh
index 021eab09506..d75553aa1bf 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Johns Hopkins University (Author: Daniel Povey).
 #           2015  Vijayaditya Peddinti
diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh
index f219167f9ec..4dc07b1b7dc 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Johns Hopkins University (Author: Daniel Povey).
 #           2015  Vijayaditya Peddinti
diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh
index 551be099390..1ab898bb6dc 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # based on run_tdnn_2o.sh
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2a.sh
index c584bbe29a6..563f5277dcc 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2a is as _z but setting --lm-opts "--num-extra-states=8000".
 #  Note, this leads to a cutoff of zero, so it's the same as infinite --num-extra-states.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2b.sh
index 227a74067d4..eea638f34a0 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2b.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2b is as _y but --frames-overlap-per-eg 75 (was 30 before).  This is not very
 # efficient in terms of disk space but I want to see the effect on results.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2c.sh
index 9fc08f27d45..bf0a2f22e33 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2c.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2c is as _2a but after a code change in which we start using transition-scale
 # and self-loop-scale of 1 instead of zero in training; we change the options to
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2d.sh
index 2ef8c374514..8f2b3950e8a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2d.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2d is as _2c but with different LM options:
 # --lm-opts "--ngram-order=4 --leftmost-context-questions=/dev/null --num-extra-states=2000"
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2e.sh
index 2db9a59c2e2..6f2c611fef2 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2e.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2e is as _2b, but --frames-overlap-per-eg 0 (also compare with _y, which has
 # an overlap of 30; _2b has 75).  BUT we also made a code change as in 2a->2c, where we use
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2f.sh
index f510fccd882..45e012f1bcd 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2f.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2f is as _2d but following a code change, and with different LM options:
 #  --ngram-order=5 --num-lm-states=10000
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2g.sh
index 65b48b43685..e10c3165463 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2g.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2g is as _2f but reducing the --num-lm-states from 10k to 7k
 # see table in run_tdnn_2a.sh for results.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2h.sh
index d86233ff83b..3cbd9ef4269 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2h.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2h is as _2g but --ngram-order=4, and --num-lm-states=5k.
 # see table in run_tdnn_2a.sh for results.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2i.sh
index cab9dd957a3..239db90d0d2 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2i.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2i is as _2d but with a new set of code for estimating the LM, in which we compute
 # the log-like change when deciding which states to back off.  The code is not the same
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2j.sh
index 0eca2ff10ff..07a085790ec 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2j.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2j is as _2i but with --num-extra-lm-states=1000, not 2000.
 # see table in run_tdnn_2a.sh for results
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2k.sh
index 7e127c10917..d5cb7b9a023 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2k.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2k is as _2i, but doing the same change as in _s -> _2e, in which we
 #  set --apply-deriv-weights false and --frames-overlap-per-eg 0.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2l.sh
index fbe45761996..316ce647c0b 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2l.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2l is as _2k, but using 100 frames per eg instead of 150.
 #  Previously we had found 150 better than 75, but this may have changed as we
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2m.sh
index 93db16408cc..cd5beac25b3 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2m.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2m is as _2k, but setting --leftmost-questions-truncate=-1, i.e. disabling
 # that mechanism.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2n.sh
index 57eb66dac35..3e3f5828ca9 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2n.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2n is as _2m, but using the combine-data script to ensure that we don't have
 # very short segments (this can cause an excessive amount of either missing or
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2o.sh
index ae085c9804f..a5ffb099fc6 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2o.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2o is as _2m, but going back to our original 2-state topology, which it turns
 # out that I never tested to WER.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2p.sh
index 4c6ad3b9761..a88301a07a8 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2p.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2p is as _2m, but 6500 instead of 9000 as the target for num-leaves.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2q.sh
index 54b03fb2296..a7679992151 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2q.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2q.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2q is as _2o but changing from 9000 -> 6000 states as the target.
 #  (like 2p, where it wasn't helpful, but doing this experiment for the topology with fewer state).
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2r.sh
index 4bdc61ef0e5..9b6bcd2230c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2r.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2r.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2r is as _2q, but further changing the topology to have one rather than
 # two pdf-ids per triphone.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2s.sh
index 3e829e246f3..1ade4de46af 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2s.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2s.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2s is as _2o, but another topology, this time with 3 states and 3 pdf-ids
 # worse :-(
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2t.sh
index 4a322e1a8fa..3e771542af3 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2t.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2t.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2t is as _2o and _2s, but another topology: with 3 pdf-ids like 2s, but
 # differently arranged.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2u.sh
index 9ec5bf81d3d..9ffeacf8686 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2u.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2u.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2u is as _2o, but using 'not-shared' in the roots files, to ensure that
 # the initial and non-initial states will never be shared.  I don't expect this
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2v.sh
index cd009cfcc12..e1575f002ed 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2v.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2v.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2v is as _2u, but adding the --scale-stats-for-pdf-classes="1=0.5" option to
 # the tree building, to scale down the stats for the self-loop to have fewer pdf-ids
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2w.sh
index 687093c98c5..7b12f3741c6 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2w.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2w.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2w is as _2o, but setting the frame subsampling factor to 2 instead of 3.
 # Going back to 100 frames per eg, which I previously found to be about the same in
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2x.sh
index e2d6204af0c..3d6e87b0d6b 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2x.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2x is as _2w (which has frame subsampling factor of 2 not 3), but with more
 # epochs (6 vs 4), as it looks like the 2w model hadn't completely trained.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2y.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2y.sh
index c1211feae64..1e414f991e1 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2y.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2y.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _2y is as _2o, but increasing the --frames-per-iter by a factor of 1.5, from
 # 800k to 1.2 million.  The aim is to avoid some of the per-job overhead
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3c.sh
index 01ff8079f2a..77274e38332 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3c.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3c is as _2y, but using 'jesus' nonlinearity: the --jesus-dim 800 option, instead of
 #   --relu-dim 850.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3d.sh
index 0cb513c84f1..a390f163581 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3d.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3d is as _2y, and re-using the egs, but using --jesus-opts and
 # configs from make_jesus_configs.py.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3e.sh
index 687f684a68c..b2db1fb840c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3e.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3e is as _3d, but increasing the --num-jesus-blocks from 100 (the default)
 # to 200 in order to reduce computation in the Jesus layer.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3f.sh
index 0a4b935485a..a21daae5c8e 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3f.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # _3f is as _3e, but modifying the splicing setup to add (left) recurrence:
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3g.sh
index 077a84d31e9..33fc64f68f5 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3g.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3g is as _3f but using 100 blocks instead of 200, as in d->e 200 groups was found
 # to be worse.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3h.sh
index dcda3a00383..0b448835ab1 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3h.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3h is as _3g but using a different and hopefully better type of recurrence, using
 # steps/nnet3/make_jesus_configs_recurrent.py to create the configs.  This is more
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3i.sh
index 996795c9aee..9bfbb18dfc7 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3i.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3i is as _3h but after a script fix in which the --final-layer-normalize-target is
 # applied, in order to control how fast the final layer's affine component learns.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3j.sh
index 66e44fb6f04..74bfa554292 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3j.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3j is as _3i but using BlockAffineComponent instead of
 # RepeatedAffineComponent in Jesus layers. (see --use-repeated-affine false
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k.sh
index 5369b5251d1..1e798843ec4 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3k is as _3i, but adding the option --jesus-stddev-scale 0.316 "
 # [~sqrt(1/10)], which will make the jesus layer learn about 10 times faster- it
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k2.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k2.sh
index 1902213402f..c6cfb9fcd48 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k2.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 3k2 is as 3k, but dumping the egs with --extra-left-context 20.
 # Also there will have been some script changes in the meantime,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3l.sh
index ae36ab2b65f..d831d50de1c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3l.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # [abandoned, not working well.]
 # _3l is as _3j, but making similar changes to as 3i->3k, which is (1) adding
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3m.sh
index 49656fb8aa7..ddd30e43707 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3m.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # [note: this uses BlockAffineComponent not RepeatedAffineComponent]
 # _3m is as _3l, but changing --jesus-stddev-scale from 0.2 to 0.1, as the Jesus layers
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3n.sh
index e2b0b0ebb10..d2035c55ced 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3n.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3n is as _3d (a non-recurrent setup), but using the more recent scripts that support
 # recurrence, with improvements to the learning of the jesus layers.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3o.sh
index 298eb913ff3..eb0bf9c4ed9 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3o.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3o is as _3n but filling in the first splice-indexes from -1,2 to -1,0,1,2.
 # [ seemed helpful based on likelihoods on first iterations]: on iter 42,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3p.sh
index 6ec9c6fe4b8..cffba093ea3 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3p.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3p is the same as 3o, but after a code and script change so we can use
 # natural gradient for the RepeatedAffineComponent.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3q.sh
index 4c911ba867e..edd9cf02421 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3q.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3q.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3q is as _3p, but now trying out the 'block' training script, where in addition to
 # the affine connections we have block-matrix connections between the layers.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3r.sh
index fba4ef6d15f..e4a08c1730e 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3r.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3r.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3r is as _3p but reducing the number of parameters as it seemed to be
 # overtraining (despite already being quite a small model): [600,1800 ->
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3s.sh
index daab4cad318..045aa7c2c75 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3s.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3s.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3s is as _3r but reducing jesus-forward-input-dim from 500 to 400.
 # num-params is quite small now: 5.4 million, vs. 12.1 million in 2y, and 8.8 million in 3p.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3t.sh
index 034f2bafd70..a164b5c7b3a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3t.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3t.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3t is as _3s but using slightly wider context.  Dumping our own egs.
 #  The final train prob is better -0.0851->-0.0815, but valid prob is worse -0.1231->-0.1243.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3u.sh
index 97c44ad55fc..5d1481a884c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3u.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3u.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3u is as _3s (and re-using the egs) but with one more layer; keeping the same dim
 # and total context, and reducing --jesus-forward-output-dim from 1500 to 1300 to
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3v.sh
index 381a9e8686f..53849a642b7 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3v.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3v.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3v is as _3t but decreasing the --num-jesus-blocks from 100 to 50.
 # I stopped it early after likelihoods were not promising:
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3w.sh
index 9f13b10753d..987b3ecd15d 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3w.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3w.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3w is as _3t but instead of having a rectangular affine component in each
 # layer, making it square (700->600 not 1300->400), and introducing a new script
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3x.sh
index 25db1450265..e9554cc9447 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3x.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3x is as _3s (and continuing the same kind of experimentation as in 3t->3w)...
 #  increasing --jesus-forward-output-dim from 1500 to 2000.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3y.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3y.sh
index 3376652f3c2..4d43f37c7ec 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3y.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3y.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3y is as _3s but doubling jesus-hidden-dim from 15000 to 30000.
 #  not promising: by iteration 228, train prob changed -0.09583->-0.09575, and
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3z.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3z.sh
index 25a68263dc7..5a0f922b05b 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3z.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3z.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _3z is as _3s, but reducing the target num-states in the tree building from 9k to 6k.
 # A slight degradation in WER, but it's not 100% consistent.  The final train-prob
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4a.sh
index 0be490863dc..94e7a78306a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4a is as _3s, but using narrower splice-indexes in the first layer.
 # WER is maybe a fraction worse than 3s (see below); final train prob is
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4b.sh
index 40ede7c5982..4db4af698bc 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4b.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4b is as _4a, but even narrower splice-indexes in 1st layer (no splicing)
 #  stopped early after train and valid likelihoods were not promising.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4c.sh
index be9043c0527..0bf2fdb613a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4c.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4c is as _4a, but using half the --jesus-hidden-dim: 7500 versus 15000.
 # Yay-- WER is slightly better or the same.  Final train-prob is worse
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4d.sh
index 7f58fbebbfc..2659c7ceda7 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4d.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4d is as _4a, but with --egs-opts "--frames-overlap-per-eg 10
 # --cut-zero-frames 5" and changing apply-deriv-weights to true... this to
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4e.sh
index 8625cfa52c8..2880246c1fc 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4e.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4e is as _4c, but adding the option --l2-regularize 0.0001.
 # big improvement- about 0.7% WER abs.  Considering the non-l2 part of the objf, the
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4f.sh
index 7ba4e8c6cb7..3310dbeef2b 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4f.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4f is as _4e, but halving the regularization from 0.0001 to 0.00005.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4g.sh
index f1059f0091f..76a178f1a25 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4g.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4g is as _4c, but reducing the --jesus-hidden-dim further from 7500 to 4000.
 # Strangely, the trend from 4a->4a does not continue: instead of continuing to get worse,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4n.sh
index 62154dd5d71..308ea48e632 100644
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4n.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4n is as _4f, but adding the [new] option --convert-repeated-to-block-iter=100.
 #  reusing iter 100 of model 4f to avoid some iterations of training [did this by
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4p.sh
index 0120c2c507d..3e920bd17b1 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4p.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4p is as _4f, but one fewer layer, and making the final-layer context wider to
 # compensate; also increasing the jesus-layer input and output dims 400->500 and 1500->1600 to
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4q.sh
index 7d920092c30..455c3c7350c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4q.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4q.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is based on Dan's tdnn_2o script
 # it has a different splicing configuration
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4r.sh
index 591b79352ab..759dd76a529 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4r.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4r.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4r is as _4f, but one more hidden layer, and reducing context of existing
 # layers so we can re-use the egs.  Reducing jesus-forward-output-dim slightly
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4s.sh
index fea6a776dbf..26673c5f2ef 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4s.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4s.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4s is as _4f, but with --leaky-hmm-coefficient 0.02.  [A new option-
 #currently in a branch]
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4t.sh
index 0173b586700..f9b741009e3 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4t.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4t.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4t is as _4s, but with --leaky-hmm-coefficient 0.04.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4u.sh
index ac15f232500..552a5306bad 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4u.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4u.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4u is as _4t, but with --leaky-hmm-coefficient 0.08.  Note: the
 # ultimate baseline is 4f.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4v.sh
index 0682615acf3..90ef2a787c0 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4v.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4v.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4v is as _4r, but with --xent-regularize 0.1.  Increasing max_param_change
 # from 1.0 to 2.0 because there is a lot of parameter change in the final xent
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4w.sh
index 77d5013d91f..cd6ac3f9140 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4w.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4w.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4w is as _4v, but doubling --xent-regularize to 0.2 WER seems consistently a
 # bit worse, although final valid prob is very slightly better.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4x.sh
index 9c59137bbfc..355c6ff4494 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4x.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _4x is as _4u, but with --leaky-hmm-coefficient 0.2.   Note: the
 # ultimate baseline is 4f.  It seems a little bit worse than 4u on average: (+0.2, +0.2, 0.0, -0.1).
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5a.sh
index 1d44637a8c8..0615287ce87 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5a is as _4w, but increasing jesus-forward-output-dim from 1400 to 1800, and
 # jesus-forward-input-dim from 400 to 500.  Hoping that the cross-entropy regularization
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5b.sh
index cdb769fb959..26046f54bdf 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5b.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5b is as _5a, but adding --leaky-hmm-coefficient 0.1.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5c.sh
index 17d8c41a82e..0019dee621b 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5c.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5c is as _4w, but changing --xent-regularize to 0.05, since 0.2 seemed to be
 # worse than 0.1.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5d.sh
index f3b92944f1a..8b2d7429b1c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5d.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5d is as _5b, but increasing jesus-forward-input-dim from 500 to 600 and
 # jesus-forward-output-dim from 1800 to 2000.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5e.sh
index 5a64c967907..4f94195f95b 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5e.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5e is as _5b, but reducing --xent-regularize from 0.2 to 0.1 (since based on
 # the results of 4v, 4w and 5c, it looks like 0.1 is better than 0.2 or 0.05).
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5f.sh
index c40f2ada0d3..7ce7d646ff7 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5f.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5f is as _5e, but making the 5b->5d change (increasing the
 # number of parameters)-- increasing jesus-forward-output-dim from 1800 to 2000,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5g.sh
index 5f59e146f65..1ba24b67cbb 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5g.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5g is as _5e, but adding one statistics-extraction layer to the
 # splice indexes, in the middle of the network (with both mean
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5h.sh
index f8dc8886eb5..0d92ed7d100 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5h.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5h is as _5g, but only mean, no stddev, stats.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5i.sh
index 7b7f67125c3..ccdcb815b3a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5i.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5i is as _5g, but adding the mean+stddev features for all hidden layers.
 # a little worse than 5g (but for Remi Francis it was a little better).
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5j.sh
index bf1787c4373..22e461181fd 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5j.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5j is as _5e, but omitting the iVectors.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5k.sh
index 93f9bffdd12..d35a8efa035 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5k.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5k is as _5j (omitting iVectors), and adding a statistics-extraction layer
 # in the middle, like 5e->5g, to see whether it recovers some of the improvement
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5l.sh
index f0c66c3a7cd..91829daec91 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5l.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5l is as _5k, but doubling frames-per-eg from 150 to 300, and increasing
 # the context radius of the statistics-pooling from 99 to 153.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5m.sh
index dc0f19e9261..3e57cbe7ffe 100644
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5m.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5m is as _5e, but with a script change where we are randomizing
 # the frame shift a bit better.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5n.sh
index 51a3f6e7723..3b756252a77 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5n.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5n is as _5j (also omitting the iVectors), but using double the input frame
 # rate from 10 to 5 ms (and reducing frame width from 25 to 20), and modifying
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5o.sh
index 4e2e6033d29..6b43c4b6381 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5o.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5o is as _5n but adding an extra splicing layer and increasing the
 # splice-width slightly on the 1st layer, to get closer to the context in 5n;
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5p.sh
index 36056efce7a..0463a44f59d 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5p.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5p is as _5e but adding (new option) --self-repair-scale-nonlinearity 0.00001, to repair
 # ReLUs that are over or under-saturated. [abandoned after discovering bug,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5q.sh
index 01a9e867b57..c25a67d083f 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5q.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5q.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5q is as _5p but reducing jesus-forward-output-dim from 1800 to 1500 to try
 # to compensate for the fact that more of the output dimensions are now being
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5r.sh
index a20ca2da3de..353f1ad6adc 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5r.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5r.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5r is as _5q but also reducing --jesus-hidden-dim from 7500 to 5000.
 # [abandoned after discovering bug, this thread is picked up in 5s and 5t.]
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5s.sh
index df981a478c0..ceb5ec415c0 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5s.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5s.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Comparing with 5e which is the most recent baseline we actually decoded,
 # 5s is as 5e but with jesus-forward-output-dim reduced 1800->1700,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5t.sh
index ddd08de7707..acf1c806d7b 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5t.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5t.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5t is as _5s but further reducing the jesus-hidden-dim (trying to speed it
 # up), from 5000 to 3500.  Seems to make no difference to WERs; valid prob improves.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5u.sh
index 28333fd912e..c14f7879c6e 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5u.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5u.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5u is as _5o but modifying the mfcc generation to use a narrower window while
 # generating the lower-order mfcc coefficients (the first 10).
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5v.sh
index 2cdb0bb988c..947f141e578 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5v.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5v.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5v is as _5t, but further reducing the --jesus-hidden-dim from 3500 to 2500.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5w.sh
index 5a33622645a..e425e52455a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5w.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5w.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5w is as _5k (which is a fairly good-performing ivector-free model), but
 # making the same changes as 5e -> 5t, which makes the model more lightweight
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5x.sh
index 0b76fe60a7b..dbfba2b9bea 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5x.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5x is as _5w but decreasing the context of the averaging layer from +-0.99
 # seconds to +-0.66 seconds.  I would not have expected this to work a priori,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5y.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5y.sh
index 3fd623e163f..7f2973fbb9c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5y.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5y.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5y is as _5v, but rebalancing the network to have fewer parameters in the
 # final layer and more in the hidden parts, by reducing --final-hidden-dim from 500
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5z.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5z.sh
index ff3528d9660..6e7518e7a35 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5z.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5z.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _5z is as _5v, but adding skip-splicing (a new configuration option)
 # It seems not helpful.  I'll remove the option soon.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6a.sh
index 194245be1e3..38365385e39 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6a is as _5y, where we keep the hidden parts of the network a bit larger
 # but take the final-hidden-dim back up to 500, which is the same as what
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6b.sh
index d4194a5afe4..914f1ad2158 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6b.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6b is as _5y, where we keep the hidden parts of the network a bit larger
 # but take the final-hidden-dim back up to 500, which is the same as what
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6c.sh
index 89021098c49..c2f515da3e3 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6c.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6c is as _5v but adding "--thick-jesus-layer true" (new option): extra hidden
 # layer inside jesus layer.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6d.sh
index 354640e0258..b58e027409e 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6d.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6d is as _5v but changing adding --num-jesus-blocks 84 (default is 100).
 # this means (after rounding) that we have 6, not 5, as
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6e.sh
index 80fea19e7a2..5c6dfafdbac 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6e.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # _6e is as _6d but going further: reducing --num-jesus-blocks to 72 = ceil(500/7).
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6f.sh
index f92048cfeb4..8f5fe54fd1a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6f.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6f is as _5v, but setting --jesus-hidden-dim to 0 which with a script change
 # means there is no hidden part in the jesus layer (it's just repeated affine and relu).
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6g.sh
index fbc5e0c54b5..9726bf4eb89 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6g.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6g is as _6f but increasing the parameters (increasing
 # jesus-forward-input-from from 500 to 600).
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h.sh
index 5449671d131..270320547d7 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6h is as _6g but adding --xent-separate-forward-affine=true, which
 # gives a separate last-but-one weight matrix to the xent output.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh
index 6db0a4f5ac4..23d44b51850 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -o pipefail
 set -e
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_py.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_py.sh
index 32631f4d348..1e18884f0a7 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_py.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_py.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is a replica of_6h script, but makes use of the python trainer
 set -e
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6i.sh
index 093bceb2717..0846f1f9a32 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6i.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6i takes aspects from 5n and 6g.  Like 6g it uses a 'thin' jesus-layer
 # (no hidden dimension), and like 5n it uses a non-standard frame shift at the
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6j.sh
index cf98106ea04..6ab58dfb827 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6j.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6j is another baseline for _6i, in which we use regular features (10 ms frame
 # shift) with the 4-fold subsampling of 6i.  I don't expect this will be as
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6k.sh
index 5d518aeab2a..95145c87e5c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6k.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6k is as _6i, but one more epoch.  After running the first few stages, I'm
 # copying the last model from 6i and starting from that point, to save compute.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6l.sh
index c76f5a9efd3..18558fb8586 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6l.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6l is as _6i, but adding the option --xent-separate-forward-affine=true which
 # I had accidentally omitted, and adding 4 frames more left context and 2 frames
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6m.sh
index 39d6d3cb449..8ffeac4570a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6m.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6m is as _6j (which subsamples by 4 frames not 3 at the output), changing just the
 # --left-tolerance and --right-tolerance to be the same total width but more
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6n.sh
index 0911711e73c..47aa3dffb4e 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6n.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6n is as _6m, but with a less-wide splicing context.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6o.sh
index c07cb35ed33..b45c66ca8b7 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6o.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6o is as _6h but halving the --l2-regularize option, because since the
 # time we last tuned this, other regularization methods have been added.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6p.sh
index 5710dbe2ef9..a0888b3bbfa 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6p.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6p is as _6j, but increasing the various regularization coefficients.
 # the intention is to increase them by 4/3, since they are all evaluated
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6q.sh
index 3e93d79b799..102ebc84c94 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6q.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6q.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6q is as _5n (which is a double-frame-rate system), but putting back
 # the iVectors and otherwise changing the configuration as in 5j -> 6g,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6r.sh
index 0415f4e0fb9..200616dc4d4 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6r.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6r.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6r is as _6q, but adding --self-repair-scale-nonlinearity 0.00001
 # --xent-separate-forward-affine=true.  the appropriate normal-frame-rate
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6s.sh
index 0564c0a858f..d5a9813de66 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6s.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6s.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # _6s is as _6r, but changing the splicing indexes to be exactly the same as 6h,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6t.sh
index 98ecd477a1d..ddbd9c4d3da 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6t.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6t.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # since _6s didn't work that well, in 6t we try something else:
 # modifying 6s to use almost exactly the same splicing indexes as 6r,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6u.sh
index 9e8afc3c5b8..c87cb0d1d97 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6u.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6u.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _6u is as _6h, but with slightly different splicing indexes (start
 # narrower than 6h and ramp up slowly).  These are designed to be
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6v.sh
index 732b60d7c95..12758ce9d64 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6v.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6v.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # This script contains online decoding using chain + nnet3 setup.
 # _6v is as _6h, but moving to a TDNN+ReLU recipe instead of using jesus-layer.
 # Otherwise we make everything as similar as possible to 6h.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6w.sh
index a625859f7d4..ff0a9936ed6 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6w.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6w.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 6w is as 6v (a new tdnn-based recipe), but using 1.5 million not 1.2 million
 # frames per iter (and of course re-dumping the egs).
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6x.sh
index 2e79e24ddb6..931eaee27d0 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6x.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 6x is as 6w, but changing the splice-indexes to be like in 6u
 # except since this is a TDNN setup, we need a final "0" [the jesus-layer
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6y.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6y.sh
index 5cf1cead63f..b545df05d45 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6y.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6y.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 6y is as 6w, but after fixing the config-generation script to use
 # a higher learning-rate factor for the final xent layer (it was otherwise
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6z.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6z.sh
index baa42a087b7..4167e37ceaf 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6z.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6z.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 6z is as 6y, but fixing the right-tolerance in the scripts to default to 5 (as
 # the default is in the code), rather than the previous script default value of
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7a.sh
index 5dd430ded8d..c6496073927 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7a inherits from 6z (which is a TDNN+ReLU-based network with various small
 # bugs hopefully fixed now), and from 6r, which is our most-successful
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7b.sh
index 47dbe843d8e..c731b1975be 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7b.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7b is as 6z, but increasing the relu-dim slightly from 576 to 625.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7c.sh
index 3335ef788a4..35e7e350aa7 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7c.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7c is as 6z, but reducing the left and right tolerance from 5 to 4.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7d.sh
index dba1b99582a..609999c7d88 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7d.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7d is as 7b, but changing the HMM context from triphone to left biphone.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7e.sh
index 704411b6a76..74a45e615b0 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7e.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7e is as 7d, but using a different splice indexes which gives slightly better results.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh
index a7a5a11dc7a..bbc9520d06a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7f is as 7e, but adding the max-change-per-component to the neural net training
 # which affects results slightly
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
index 0623d26a9e4..54f31e29175 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 7g is same as 7f but using the xconfig format of network specification.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh
index dbbe3c1e6fd..fa82dd076fc 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #System                  tdnn_7g   tdnn_7h
 #WER on train_dev(tg)      13.98     13.84
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh
index 2a8a658bf6b..8cab592dc26 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Same as 7h but double the number of parameters (27983950 vs 15551509)
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh
index a9eba36ddaa..92eca66b6c5 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 #System                  tdnn_7h   tdnn_7j
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh
index 8e0b290cf87..b60264aec84 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_7k.sh is like run_tdnn_7h.sh but batchnorm components instead of renorm
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh
index bb9ddf209d6..882c13f49aa 100644
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7l is based on 7h, but adding a 64 dim lowrank module in the xent branch
 #System                   tdnn_7h    tdnn_7l
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh
index 97f92c14f1f..10c0b640f4b 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7m is as 7k but adding two non-splicing layers towards the beginning of the
 #   network.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh
index d9fe106e5d7..85ae17cb20e 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7m25l is as 7m25j but with no dropout on the prefinal layer.  Hoping to resolve
 # bad objf in middle of training.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh
index 99e43443f99..b595c216a94 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 7n is a kind of factorized TDNN, with skip connections.  We have to write
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh
index 44ca3b3d279..ea315fd023d 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 7o is as 7n but with a bunch of tuning changes affecting both the structure
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh
index d19a4ef4c0b..28daadb28bc 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7p is as 7o but adding the option "--constrained false" to --egs.opts.
 # This is the new 'unconstrained egs' code where it uses the e2e examples.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh
index cea0891d5d7..3ecb57ee349 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7q is as 7p but a modified topology with resnet-style skip connections, more layers,
 #  skinnier bottlenecks, removing the 3-way splicing and skip-layer splicing,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7r.sh
index 10e4ae1ddca..9da9f0503d8 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7r.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7r.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 7r is as 7q but replaces the LDA layer at the input of the
 # network with traditional delta and delta-delta features.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_a.sh
index 96046ac23c1..b1d1ffdfc65 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # caution: the egs for this were dumped with a bug in the numerator lattices,
 # you can subtract 0.0152 from the likelihoods to correct for this.  (compare
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_a2.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_a2.sh
index 3a8e41a8315..f4cdd45b8ea 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_a2.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_a2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh
index d4febd61e94..be3f741fd61 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # In this recipe everything is the same as tdnn_7k, except the
 # 7th TDNN layer has been replaced with an attention layer
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_b.sh
index 8c623a7c01b..c6c953a6004 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_b.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _b is as as _a except for configuration changes: using 12k num-leaves instead of
 # 5k; using 5 times larger learning rate, and --final-layer-normalize-target=0.5,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh
index 4414147bf0e..b07546b389f 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # tdnn_blstm_1a is same as blstm_6k, but with the initial tdnn layers
 # blstm_6k : num-parameters: 41155430
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh
index cd9d4dc6f2b..2c3cb70b394 100644
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # tdnn_blstm_1b is same as tdnn_blstm_1a, but with the per-frame dropout
 # added with location 4, see paper:
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh
index 18b660b4080..06613298de3 100644
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # tdnn_blstm_1c is same as tdnn_blstm_1a,
 # but take :  egs/wsj/s5/local/chain/run_tdnn_lstm.sh as reference
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh
index be615e0e361..033f84e7b9c 100644
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # tdnn_blstm_1d is same as tdnn_blstm_1c, but with the perframe-dropout added
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_c.sh
index ec4634acf69..f54a804df08 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_c.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _c is as _a but getting rid of the final-layer-normalize-target (making it 1.0
 # as the default) as it's not clear that it was helpful; using the old learning-rates;
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_d.sh
index 3a66a8cd556..1ee8d2d32eb 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_d.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _d is as _c but with a modified topology (with 4 distinct states per phone
 # instead of 2), and a slightly larger num-states (8000) to compensate for the
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_e.sh
index d30a513181e..be956caaf3a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_e.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _e is as _d but making it more similar in configuration to _b.
 # (turns out b was better than a after all-- the egs' likelihoods had to
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_f.sh
index 12450c2ae62..e0fb4fe114e 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_f.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _f is as _e but with 30 as the number of left phone classes instead
 # of 10.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_g.sh
index 70845684262..5cff7b6cbd7 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_g.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _g is as _f but more splicing at last layer.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_h.sh
index 01f8743f585..752f6980138 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_h.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _h is as _g but different application of max-param-change (use --scale-max-param-change true)
 # The WER is quite a bit worse.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_i.sh
index 82d91bbd33e..6b779fc3c3e 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_i.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _i is as _h but longer egs: 150 frames instead of 75, and
 # 128 elements per minibatch instead of 256.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_j.sh
index 334eec7e872..4c6c6987f99 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_j.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _j is as _i and using the same egs, but setting
 # --left-deriv-truncate and --right-deriv-truncate to 10
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_k.sh
index b64318ec4bb..254f0ae5cb7 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_k.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _k is as _i but reverting the g->h change, removing the --scale-max-param-change
 # option and setting max-param-change to 1..  Using the same egs.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_l.sh
index 6de6c79affc..ee351517bf0 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_l.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _l is as _k but even longer chunk size: 200 instead of 150.  having to halve
 #  minibatch size to save memory.  I correspondingly changed max-param-change.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh
index 43855e6f7ce..c76ddbc7705 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1a is a straight forward combination of tdnn_7h and lstm_6j.
 # TDNN layers are stacked before LSTM.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh
index 5c82ed0eb11..655571006ed 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Unlike 1a this setup interleaves the TDNN and LSTM layers.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh
index c3df0bf2b2c..855459270ca 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_lstm_1c.sh is like run_tdnn_lstm_1b.sh but using the
 # new 'fast-lstm' layer.  Results are slightly improved, plus
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh
index 3d353387239..2997bd4c690 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_tdnn_lstm_1d.sh is like run_tdnn_lstm_1c.sh but making
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh
index 2a2d508ecdd..952f1659b44 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but
 # trying the change of xent_regularize from 0.025 (which was an
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh
index 5af5463b372..fe6a9f07f5c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_lstm_1f.sh is like run_tdnn_lstm_1e.sh but
 # reducing the frames-per-iter from 1.5 million to 1 million,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh
index 28105a587ec..3afccc860bd 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1g is like 1e, but reducing decay-time from 20 to 15, to see if
 # it reduces the difference between regular and looped decoding.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh
index d6e81f2d8eb..eb1073542af 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1h is like 1e, but reducing the hidden-dims from 1024 to 880.
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh
index 060d98c9d05..33115c31322 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_lstm_1i.sh is like run_tdnn_lstm_1{e,f}.sh but
 # with a different frames-per-iter: 2 million, vs. 1.5 million
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh
index 9bd39a262c5..b1a6948ed28 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # same as 1e but with delay of -1
 # System                tdnn_lstm_1e_sp tdnn_lstm_1j_sp
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh
index ccd6138da6e..e4bc2ed4942 100644
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_lstm_1k.sh is like run_tdnn_lstm_1e.sh but
 # added the per-frame dropout location 4 as paper:
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh
index f702033377a..d74fdd6c307 100644
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # tdnn_lstm_1l is same as tdnn_lstm_1b, but with the per-frame dropout
 # added with location 4 in LSTM layer, see paper:
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh
index b43577bd76c..fd071091374 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # tdnn_lstm_1m is same as tdnn_lstm_1j, but with batchnorm-layer and decay-time
 # option
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh
index 5bb6e7da152..f02635c6849 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1n is as 1m but with significant changes, replacing TDNN layers with a
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_m.sh
index 8d357db0217..576e446a0b3 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_m.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _m is as _k but after a code change that makes the denominator FST more
 # compact.  I am rerunning in order to verify that the WER is not changed (since
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_n.sh
index a190a1d56dd..fa0adb7fe4a 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_n.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _n is as _m but changing the egs configuration to get better and more even
 # coverage of the data: increasing frames_per_eg from 150 to 200,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_o.sh
index 5b80665268d..83b610e51cc 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_o.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _o is as _n, but reducing the number of parameters to try to reduce
 # over-training: reducing relu-dim from 1024 to 850 and target num-states
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh
index 4db38d74508..97493438bfa 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
 # Apache 2.0
 
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh
index 7e9dec67068..71f7e44bae3 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Apache 2.0
 
 # This is based on TDNN_OPGRU_1A, but using the FastNormOPGRU to replace the NormPGRU.
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_p.sh
index d401790449d..16b9c4b9f38 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_p.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _p is as _m except with a code change in which we switch to a different, more
 # exact mechanism to deal with the edges of the egs, and correspondingly
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_q.sh
index c6758a62fa5..ec6c76b4f94 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_q.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_q.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # _q is as _p except making the same change as from n->o, which
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_r.sh
index 73cadcc622c..38d068a6f69 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_r.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_r.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _r is as _q except adding --lm-opts "--num-extra-states=0"
 # to reduce the size of the phone LM.  Not really expecting much difference
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_s.sh
index ae10b53824f..2ddcf71242f 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_s.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_s.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _s is as _q but setting pdf-boundary-penalty to 0.0
 # This is helpful: 19.8->18.0 after fg rescoring on all of eval2000,
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_t.sh
index dabb2a6db87..30499a22a01 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_t.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_t.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # _t is as _s but setting pdf-boundary-penalty to 2.0
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_u.sh
index c83274499fa..8861504a6d3 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_u.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_u.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _u is as _t but also setting --truncate-deriv-weights 3.
 #  This doesn't seem to be helpful, or at least inconsistent: 18.2->18.6 on all of eval2000
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_v.sh
index 38f31269d33..722c4044358 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_v.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_v.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _v is as _u but setting pdf-boundary-penalty to 0.0 (as in t->s),
 #   and also trying a smaller language model:   --lm-opts "--num-extra-states=0"
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_w.sh
index 35d1ddd8052..0f8125d75a3 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_w.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_w.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _w is as _s (with --pdf-boundary-penalty 0.0) but setting
 #   --lm-opts "--num-extra-states=500" (like the opposite of
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_x.sh
index 0f294033489..98f36725e12 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_x.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _x is as _s but setting     --lm-opts "--num-extra-states=0".
 #  this is a kind of repeat of the u->v experiment, where it seemed to make things
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_y.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_y.sh
index 09217d1b196..29dfb635b3b 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_y.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_y.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _y is as _s but trying --apply-deriv-weights false. (note: in the
 # interim, the script was changed so the train and valid probs have --pdf-boundary-penalty 0
diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_z.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_z.sh
index 0c8524a2c90..571258aeb6c 100755
--- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_z.sh
+++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_z.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _z is as _x but setting  --lm-opts "--num-extra-states=2000".
 # (see also w, which has --num-extra-states=500, and 2a, which has 8000).
diff --git a/egs/swbd/s5c/local/chain2/tuning/run_tdnn_7k.sh b/egs/swbd/s5c/local/chain2/tuning/run_tdnn_7k.sh
new file mode 100755
index 00000000000..8a87934a089
--- /dev/null
+++ b/egs/swbd/s5c/local/chain2/tuning/run_tdnn_7k.sh
@@ -0,0 +1,377 @@
+#!/bin/bash
+
+# Copyright 2019 Idiap Research Institute (Srikanth Madikeri)
+# Apache 2.0.
+# run_tdnn_7k.sh in local/chain but uses new kaldi recipe.
+
+set -e
+
+# configs for 'chain'
+affix=chain2
+stage=0
+train_stage=-10
+get_egs_stage=-10
+speed_perturb=true
+dir=exp/chain2/tdnn_8k  # Note: _sp will get added to this if $speed_perturb == true.
+decode_iter=
+decode_nj=96
+decode_suff=
+
+# The amount of extra left/right context we put in the egs.  Note: this could
+# easily be zero, since we're not using a recurrent topology, but we put in a
+# little extra context so that we have more room to play with the configuration
+# without re-dumping egs.
+egs_extra_left_context=5
+egs_extra_right_context=5
+
+# training options
+frame_subsampling_factor=3
+num_epochs=4
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+max_param_change=2.0
+final_layer_normalize_target=0.5
+num_jobs_initial=3
+num_jobs_final=16
+minibatch_size=128
+frames_per_eg=150
+remove_egs=true
+common_egs_dir=exp/chain/tdnn_8k_chaina_v2_sp/egs
+xent_regularize=0.1
+srand=0
+graph_dir=
+
+test_online_decoding=false  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
+# run those things.
+
+suffix=
+if [ "$speed_perturb" == "true" ]; then
+  suffix=_sp
+fi
+
+dir=${dir}${affix:+_$affix}$suffix
+train_set=train_nodup$suffix
+gmm_dir=exp/tri4
+ali_dir=exp/tri4_ali_nodup$suffix
+treedir=exp/chain/tri5_7d_tree$suffix
+lang=data/lang_chain_2y
+
+
+# if we are using the speed-perturbed data we need to generate
+# alignments for it.
+local/nnet3/run_ivector_common.sh --stage $stage \
+  --speed-perturb $speed_perturb \
+  --generate-alignments $speed_perturb || exit 1;
+
+
+# skipping this step
+if [ $stage -le 9 ]; then
+  # Get the alignments as lattices (gives the LF-MMI training more freedom).
+  # use the same num-jobs as the alignments
+  nj=$(cat exp/tri4_ali_nodup$suffix/num_jobs) || exit 1;
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/$train_set \
+    data/lang exp/tri4 exp/tri4_lats_nodup$suffix
+  rm exp/tri4_lats_nodup$suffix/fsts.*.gz # save space
+fi
+lat_dir=exp/tri4_lats_nodup$suffix
+train_data_dir=data/${train_set}_hires
+
+
+if [ $stage -le 10 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 11 ]; then
+  # Build a tree using our new topology. This is the critically different
+  # step compared with other recipes.
+  steps/nnet3/chain/build_tree.sh --frame-subsampling-factor $frame_subsampling_factor \
+      --context-opts "--context-width=2 --central-position=1" \
+      --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir
+fi
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 dim=625
+  relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=625
+  relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=625
+  relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=625
+  relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=625
+  relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=625
+  relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=625
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain input=tdnn7 dim=625 target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+  output-layer name=output-default input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn7 dim=625 target-rms=0.5
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+  output-layer name=output-default-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+  if [ ! -f $dir/init/default_trans.mdl ]; then # checking this because it may have been copied in a previous run of the same script
+      copy-transition-model $treedir/final.mdl $dir/init/default_trans.mdl  || exit 1 &
+  else
+      echo "Keeping the old $dir/init/default_trans.mdl as it already exists."
+  fi
+fi
+
+init_info=$dir/init/info.txt
+if [ $stage -le 13 ]; then
+
+  if [ ! -f $dir/configs/ref.raw ]; then
+      echo "Expected $dir/configs/ref.raw to exist"
+      exit
+  fi
+
+  mkdir  -p $dir/init
+  nnet3-info $dir/configs/ref.raw  > $dir/configs/temp.info 
+  model_left_context=`fgrep 'left-context' $dir/configs/temp.info | awk '{print $2}'`
+  model_right_context=`fgrep 'right-context' $dir/configs/temp.info | awk '{print $2}'`
+  cat >$init_info <<EOF
+frame_subsampling_factor $frame_subsampling_factor
+langs $langs
+model_left_context $model_left_context
+model_right_context $model_right_context
+EOF
+  rm $dir/configs/temp.info
+fi
+
+# Make phone LM and denominator and normalization FST
+if [ $stage -le 13 ]; then
+  echo "$0: Making Phone LM and denominator and normalization FST"
+  mkdir -p $dir/den_fsts/log
+
+  # We may later reorganize this.
+  cp $treedir/tree $dir/default.tree
+
+  echo "$0: creating phone language-model"
+  #TODO: check if we should use ali_dir or gmm_dir!
+  $train_cmd $dir/den_fsts/log/make_phone_lm_default.log \
+    chain-est-phone-lm --num-extra-lm-states=2000 \
+       "ark:gunzip -c $ali_dir/ali.*.gz | ali-to-phones $gmm_dir/final.mdl ark:- ark:- |" \
+       $dir/den_fsts/default.phone_lm.fst
+
+  echo "$0: creating denominator FST"
+  $train_cmd $dir/den_fsts/log/make_den_fst.log \
+     chain-make-den-fst $dir/default.tree $dir/init/default_trans.mdl $dir/den_fsts/default.phone_lm.fst \
+     $dir/den_fsts/default.den.fst $dir/den_fsts/default.normalization.fst || exit 1;
+fi
+
+model_left_context=$(awk '/^model_left_context/ {print $2;}' $dir/init/info.txt)
+model_right_context=$(awk '/^model_right_context/ {print $2;}' $dir/init/info.txt)
+if [ -z $model_left_context ]; then
+    echo "ERROR: Cannot find entry for model_left_context in $dir/init/info.txt"
+fi
+if [ -z $model_right_context ]; then
+    echo "ERROR: Cannot find entry for model_right_context in $dir/init/info.txt"
+fi
+# Note: we add frame_subsampling_factor/2 so that we can support the frame
+# shifting that's done during training, so if frame-subsampling-factor=3, we
+# train on the same egs with the input shifted by -1,0,1 frames.  This is done
+# via the --frame-shift option to nnet3-chain-copy-egs in the script.
+egs_left_context=$[model_left_context+(frame_subsampling_factor/2)+egs_extra_left_context]
+egs_right_context=$[model_right_context+(frame_subsampling_factor/2)+egs_extra_right_context]
+
+for d in $dir/raw_egs $dir/processed_egs; do
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $d/storage ] ; then
+    mkdir -p $d
+    utils/create_split_dir.pl \
+      /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$d/storage $d/storage
+  fi
+done
+
+
+if [ -z $common_egs_dir ]; then
+    if [ $stage -le 14 ]; then
+      echo "$0: about to dump raw egs."
+      # Dump raw egs.
+      steps/chain/get_raw_egs.sh --cmd "$train_cmd" \
+        --lang "default" \
+        --online-ivector-dir exp/nnet3/ivectors_${train_set} \
+        --left-context $egs_left_context \
+        --right-context $egs_right_context \
+        --frame-subsampling-factor $frame_subsampling_factor \
+        --alignment-subsampling-factor $frame_subsampling_factor \
+        --frames-per-chunk $frames_per_eg \
+        ${train_data_dir} ${dir} ${lat_dir} ${dir}/raw_egs
+    fi
+
+    if [ $stage -le 15 ]; then
+      echo "$0: about to process egs"
+      steps/chain/process_egs.sh  --cmd "$train_cmd" \
+          --num-repeats 1 \
+        ${dir}/raw_egs ${dir}/processed_egs
+    fi
+
+    if [ $stage -le 16 ]; then
+      echo "$0: about to randomize egs"
+      steps/chain/randomize_egs.sh --frames-per-job 1500000 \
+        ${dir}/processed_egs ${dir}/egs
+    fi
+    common_egs_dir=$dir/egs
+fi
+
+if [ $stage -le 17 ]; then
+    echo "$0: Training pre-conditioning matrix"
+    num_lda_jobs=`find $common_egs_dir/ -iname 'train.*.scp' | wc -l | cut -d ' ' -f2`
+    steps/chain/compute_preconditioning_matrix.sh --cmd "$train_cmd" \
+        --nj $num_lda_jobs \
+        $dir/configs/init.raw \
+        $common_egs_dir \
+        $dir || exit 1
+fi
+
+if [ $stage -le 18 ]; then
+    echo "$0: Preparing initial acoustic model"
+    if [ -f $dir/configs/init.config ]; then
+            $train_cmd ${dir}/log/add_first_layer.log \
+                    nnet3-init --srand=${srand} ${dir}/configs/init.raw \
+                    ${dir}/configs/final.config ${dir}/init/default.raw || exit 1
+    else
+            $train_cmd ${dir}/log/init_model.log \
+               nnet3-init --srand=${srand} ${dir}/configs/final.config ${dir}/init/default.raw || exit 1
+    fi
+
+    $train_cmd $dir/log/init_mdl.log \
+        nnet3-am-init ${dir}/init/default_trans.mdl $dir/init/default.raw $dir/init/default.mdl || exit 1
+fi
+
+if [ $stage -le 19 ]; then
+  echo "$0: about to train model"
+  steps/chain/train2.sh \
+    --stage $train_stage --cmd "$cuda_cmd" \
+    --xent-regularize $xent_regularize --leaky-hmm-coefficient 0.1 \
+    --initial-effective-lrate $initial_effective_lrate \
+    --final-effective-lrate $final_effective_lrate \
+    --max-param-change $max_param_change \
+    --groups-per-minibatch 128 \
+    --l2-regularize 0.00005 \
+    --num-jobs-initial $num_jobs_initial --num-jobs-final $num_jobs_final \
+     $common_egs_dir $dir
+fi
+
+
+if [ $stage -le 20 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg
+fi
+
+
+graph_dir=$dir/graph_sw1_tg
+iter_opts=
+if [ ! -z $decode_iter ]; then
+  iter_opts=" --iter $decode_iter "
+fi
+if [ $stage -le 21 ]; then
+  rm $dir/.error 2>/dev/null || true
+  for decode_set in eval2000; do
+      (
+      decode_nj=`wc -l data/${decode_set}_hires/spk2utt | cut -d ' ' -f1`
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $decode_nj --cmd "$decode_cmd" $iter_opts \
+          --online-ivector-dir exp/nnet3/ivectors_${decode_set} \
+          $graph_dir data/${decode_set}_hires \
+          $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff}_sw1_tg || exit 1;
+      if $has_fisher; then
+          steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+            data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \
+            $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff}_sw1_{tg,fsh_fg} || exit 1;
+      fi
+      ) || touch $dir/.error &
+  done
+  wait
+  if [ -f $dir/.error ]; then
+    echo "$0: something went wrong in decoding"
+    exit 1
+  fi
+fi
+
+if $test_online_decoding && [ $stage -le 16 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+       --mfcc-config conf/mfcc_hires.conf \
+       $lang exp/nnet3/extractor $dir ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+  for decode_set in train_dev eval2000; do
+    (
+      # note: we just give it "$decode_set" as it only uses the wav.scp, the
+      # feature type does not matter.
+
+      steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+         $graph_dir data/${decode_set}_hires \
+         ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1;
+      if $has_fisher; then
+          steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+            data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \
+            ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1;
+      fi
+    ) || touch $dir/.error &
+  done
+  wait
+  if [ -f $dir/.error ]; then
+    echo "$0: something went wrong in decoding"
+    exit 1
+  fi
+fi
+
+
+exit 0;
+
diff --git a/egs/swbd/s5c/local/confidence_calibration.sh b/egs/swbd/s5c/local/confidence_calibration.sh
index e9dd8947e6f..ce17d24e372 100755
--- a/egs/swbd/s5c/local/confidence_calibration.sh
+++ b/egs/swbd/s5c/local/confidence_calibration.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 . ./cmd.sh
 . ./path.sh
 
diff --git a/egs/swbd/s5c/local/eval1997_data_prep.sh b/egs/swbd/s5c/local/eval1997_data_prep.sh
index ea1e691e1c7..5d5cd473ac9 100755
--- a/egs/swbd/s5c/local/eval1997_data_prep.sh
+++ b/egs/swbd/s5c/local/eval1997_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Hub-5 Eval 1997 data preparation
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/swbd/s5c/local/eval2000_data_prep.sh b/egs/swbd/s5c/local/eval2000_data_prep.sh
index 33dfac24ddf..96b36135a85 100755
--- a/egs/swbd/s5c/local/eval2000_data_prep.sh
+++ b/egs/swbd/s5c/local/eval2000_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Hub-5 Eval 2000 data preparation
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/swbd/s5c/local/nnet/run_dnn.sh b/egs/swbd/s5c/local/nnet/run_dnn.sh
index 0ad87100e31..000354a3f73 100755
--- a/egs/swbd/s5c/local/nnet/run_dnn.sh
+++ b/egs/swbd/s5c/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 #                2014  Guoguo Chen
diff --git a/egs/swbd/s5c/local/nnet/run_dnn_tandem_uc.sh b/egs/swbd/s5c/local/nnet/run_dnn_tandem_uc.sh
index 637f970b460..4a7fc598d66 100755
--- a/egs/swbd/s5c/local/nnet/run_dnn_tandem_uc.sh
+++ b/egs/swbd/s5c/local/nnet/run_dnn_tandem_uc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/swbd/s5c/local/nnet2/run_nnet2.sh b/egs/swbd/s5c/local/nnet2/run_nnet2.sh
index e83c587a006..aa8a7e2e0bf 100755
--- a/egs/swbd/s5c/local/nnet2/run_nnet2.sh
+++ b/egs/swbd/s5c/local/nnet2/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This runs on the full training set (with duplicates removed), with p-norm
diff --git a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh
index 7cf42c9ae04..40791c72942 100755
--- a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh
+++ b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # this script is used for comparing decoding results between systems.
diff --git a/egs/swbd/s5c/local/nnet3/multi_condition/run_aug_common.sh b/egs/swbd/s5c/local/nnet3/multi_condition/run_aug_common.sh
index 7d36cdfaac9..e557e7e4c6a 100755
--- a/egs/swbd/s5c/local/nnet3/multi_condition/run_aug_common.sh
+++ b/egs/swbd/s5c/local/nnet3/multi_condition/run_aug_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2019   Phani Sankar Nidadavolu
 # Apache 2.0.
 
diff --git a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh
index ba751ad8732..94440f73d2f 100755
--- a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh
+++ b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -o pipefail
 set -e
diff --git a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh
index a0cac0c1d5d..daf1ec961c8 100755
--- a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh
+++ b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e
diff --git a/egs/swbd/s5c/local/nnet3/run_lstm.sh b/egs/swbd/s5c/local/nnet3/run_lstm.sh
index 169737654e4..5b3e2df0cbb 100755
--- a/egs/swbd/s5c/local/nnet3/run_lstm.sh
+++ b/egs/swbd/s5c/local/nnet3/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Johns Hopkins University (Author: Daniel Povey).
 #           2015  Vijayaditya Peddinti
diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_a.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_a.sh
index 42b9bcb0a1e..18a310c5532 100755
--- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_a.sh
+++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is the standard "tdnn" system, built in nnet3; it's what we use to
 # call multi-splice.
diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_b.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_b.sh
index 1ddbaef71d6..90b865ff43b 100755
--- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_b.sh
+++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # b is as a, but using a different splice indexes with geometrically increasing
 # relu dims across layers. It gives better results but takes 2x time for training
diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_c.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_c.sh
index 49f8ab62247..309b1799f80 100644
--- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_c.sh
+++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # c is as a, but uses xconfig.
 
diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh
index 427678da17b..95018ec5e01 100755
--- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh
+++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # d is as c, but with one extra layer.
 
diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh
index 6f42e042166..f5be1c004c3 100755
--- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh
+++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script does discriminative training on top of the CE nnet3 system
 # from run_tdnn_d.  To simplify things, this assumes you are using the "speed-perturbed" data
diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_e.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_e.sh
index 974f697d651..16d9bf89fba 100755
--- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_e.sh
+++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e is as c, but uses splicing similar to chain's without changing number of
 # layers.
diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh
index 02e637286b5..2dd042cd38f 100755
--- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh
+++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _lfr1a is as _c, but is LFR (low frame rate): it uses triphone chain topology
 #  with a frame subsampling factor of 3.
diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh
index 67fd3c03d27..851fca84c27 100755
--- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh
+++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _lfr1b is as _lfr1a, but with one more -3,3 layer (the comparable
 # non-LFR system is tdnn_d)
diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh
index 260116666a0..7b265ddaf98 100755
--- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh
+++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # _lfr1c is as _lfr1a, but uses splicing similar to chain's without changing
 # number of layers (comparable non-LFR system is tdnn_e).
diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh
index e1d0f06affe..a260a237911 100755
--- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh
+++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script does discriminative training on top of the CE nnet3 LFR system
 # from run_tdnn_lfr1c. To simplify things, this assumes you are using the
diff --git a/egs/swbd/s5c/local/online/run_nnet2_common.sh b/egs/swbd/s5c/local/online/run_nnet2_common.sh
index 8221be79162..814af2ea3cd 100755
--- a/egs/swbd/s5c/local/online/run_nnet2_common.sh
+++ b/egs/swbd/s5c/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e
diff --git a/egs/swbd/s5c/local/online/run_nnet2_ms.sh b/egs/swbd/s5c/local/online/run_nnet2_ms.sh
index 2525aa85739..08b6439400c 100755
--- a/egs/swbd/s5c/local/online/run_nnet2_ms.sh
+++ b/egs/swbd/s5c/local/online/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e
diff --git a/egs/swbd/s5c/local/online/run_nnet2_ms_perturbed.sh b/egs/swbd/s5c/local/online/run_nnet2_ms_perturbed.sh
index 5a120d1e00d..5d9fdb88dfa 100755
--- a/egs/swbd/s5c/local/online/run_nnet2_ms_perturbed.sh
+++ b/egs/swbd/s5c/local/online/run_nnet2_ms_perturbed.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 #           2014  Tom Ko
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1a.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
index 2bc7f7db21e..4d1a3fecd4c 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1b.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1b.sh
index 5d24309e99a..00729a1e1fd 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1c.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1c.sh
index bdef09102d9..f8e9e3dc964 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1c.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1d.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1d.sh
index 3787c8a532a..16118ab8b9f 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1d.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1e.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1e.sh
index b7e53b016ea..afabc49e96b 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1e.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_back_1e.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_back_1e.sh
index 4f85ed8f28b..e4d921c050e 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_back_1e.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_back_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2015  Guoguo Chen
diff --git a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_swbd.sh b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_swbd.sh
index c167e454fe8..c3628f1b2f7 100755
--- a/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_swbd.sh
+++ b/egs/swbd/s5c/local/rnnlm/tuning/run_tdnn_lstm_swbd.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/swbd/s5c/local/rt03_data_prep.sh b/egs/swbd/s5c/local/rt03_data_prep.sh
index f537b88b609..15e03f70498 100755
--- a/egs/swbd/s5c/local/rt03_data_prep.sh
+++ b/egs/swbd/s5c/local/rt03_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # RT-03 data preparation (conversational telephone speech part only)
 # Adapted from Arnab Ghoshal's script for Hub-5 Eval 2000 by Peng Qi
diff --git a/egs/swbd/s5c/local/run_asr_segmentation.sh b/egs/swbd/s5c/local/run_asr_segmentation.sh
index 6da8b55631d..a81913fbeac 100755
--- a/egs/swbd/s5c/local/run_asr_segmentation.sh
+++ b/egs/swbd/s5c/local/run_asr_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2017  Nagendra Kumar Goel
 #            2017  Vimal Manohar
diff --git a/egs/swbd/s5c/local/run_cleanup_segmentation.sh b/egs/swbd/s5c/local/run_cleanup_segmentation.sh
index c879a55d16a..e7d7a603092 100755
--- a/egs/swbd/s5c/local/run_cleanup_segmentation.sh
+++ b/egs/swbd/s5c/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2016  Vimal Manohar
 #             2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/swbd/s5c/local/run_raw_fmllr.sh b/egs/swbd/s5c/local/run_raw_fmllr.sh
index 88839fdbfc5..58c4f646346 100755
--- a/egs/swbd/s5c/local/run_raw_fmllr.sh
+++ b/egs/swbd/s5c/local/run_raw_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 set -e
diff --git a/egs/swbd/s5c/local/run_resegment.sh b/egs/swbd/s5c/local/run_resegment.sh
index 8cf08330afe..f01185bf107 100755
--- a/egs/swbd/s5c/local/run_resegment.sh
+++ b/egs/swbd/s5c/local/run_resegment.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script demonstrates some scripts for resegmenting data.  This was
 # developed for a scenario where the evaluation data doesn't come with
diff --git a/egs/swbd/s5c/local/run_sgmm2.sh b/egs/swbd/s5c/local/run_sgmm2.sh
index 5410819dadb..56b71cfaa2f 100755
--- a/egs/swbd/s5c/local/run_sgmm2.sh
+++ b/egs/swbd/s5c/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/swbd/s5c/local/score.sh b/egs/swbd/s5c/local/score.sh
index 49aedc2163d..f3ac934ec8b 100755
--- a/egs/swbd/s5c/local/score.sh
+++ b/egs/swbd/s5c/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 orig_args=
diff --git a/egs/swbd/s5c/local/score_basic.sh b/egs/swbd/s5c/local/score_basic.sh
index 2cb1fafc8d8..094ce3e0f7a 100755
--- a/egs/swbd/s5c/local/score_basic.sh
+++ b/egs/swbd/s5c/local/score_basic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/swbd/s5c/local/score_sclite.sh b/egs/swbd/s5c/local/score_sclite.sh
index 0f632bb48dc..d7665981d55 100755
--- a/egs/swbd/s5c/local/score_sclite.sh
+++ b/egs/swbd/s5c/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/swbd/s5c/local/score_sclite_conf.sh b/egs/swbd/s5c/local/score_sclite_conf.sh
index 21da4520a4d..599030c03e3 100755
--- a/egs/swbd/s5c/local/score_sclite_conf.sh
+++ b/egs/swbd/s5c/local/score_sclite_conf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # begin configuration section.
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
index 13318756e43..1bd818098e5 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Nagendra Kumar Goel
 # Apache 2.0
diff --git a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index 05e5f4ded05..6305536048f 100755
--- a/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/swbd/s5c/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017   Nagendra Kumar Goel
 #           2016   Vimal Manohar
diff --git a/egs/swbd/s5c/local/swbd1_data_download.sh b/egs/swbd/s5c/local/swbd1_data_download.sh
index b4e051b2065..a1f03898d24 100755
--- a/egs/swbd/s5c/local/swbd1_data_download.sh
+++ b/egs/swbd/s5c/local/swbd1_data_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Switchboard-1 training data preparation customized for Edinburgh
 # Author:  Arnab Ghoshal (Jan 2013)
@@ -36,7 +36,7 @@ if [ ! -d $SWBD_DIR/transcriptions/swb_ms98_transcriptions ]; then
     if [ ! -d swb_ms98_transcriptions ]; then
       echo " *** Downloading trascriptions and dictionary ***" 
       wget http://www.openslr.org/resources/5/switchboard_word_alignments.tar.gz ||
-      wget http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
+      wget -c http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
       tar -xf switchboard_word_alignments.tar.gz
     fi
   )
diff --git a/egs/swbd/s5c/local/swbd1_data_prep.sh b/egs/swbd/s5c/local/swbd1_data_prep.sh
index 33f3ab8807a..5c8626bf1fb 100755
--- a/egs/swbd/s5c/local/swbd1_data_prep.sh
+++ b/egs/swbd/s5c/local/swbd1_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Switchboard-1 training data preparation customized for Edinburgh
 # Author:  Arnab Ghoshal (Jan 2013)
diff --git a/egs/swbd/s5c/local/swbd1_prepare_dict.sh b/egs/swbd/s5c/local/swbd1_prepare_dict.sh
index dadd33e7954..157b0c6d6a2 100755
--- a/egs/swbd/s5c/local/swbd1_prepare_dict.sh
+++ b/egs/swbd/s5c/local/swbd1_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Formatting the Mississippi State dictionary for use in Edinburgh. Differs
 # from the one in Kaldi s5 recipe in that it uses lower-case --Arnab (Jan 2013)
diff --git a/egs/swbd/s5c/local/swbd1_train_lms.sh b/egs/swbd/s5c/local/swbd1_train_lms.sh
index 5b98f4b6beb..e323f557099 100755
--- a/egs/swbd/s5c/local/swbd1_train_lms.sh
+++ b/egs/swbd/s5c/local/swbd1_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 #                 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/swbd/s5c/run.sh b/egs/swbd/s5c/run.sh
index 3a1c90567a6..a2a6b8e55fd 100755
--- a/egs/swbd/s5c/run.sh
+++ b/egs/swbd/s5c/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 stage=0
 train_discriminative=false  # by default, don't do the GMM-based discriminative
diff --git a/egs/tedlium/s5/local/chain/run_tdnn.sh b/egs/tedlium/s5/local/chain/run_tdnn.sh
index 545294dd035..ed4d56aa5ff 100755
--- a/egs/tedlium/s5/local/chain/run_tdnn.sh
+++ b/egs/tedlium/s5/local/chain/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # This script requires that you have run the toplevel run.sh script in TEDLIUM up to stage 7.
 #
diff --git a/egs/tedlium/s5/local/confidence_calibration.sh b/egs/tedlium/s5/local/confidence_calibration.sh
index 9791524ffb6..90ab881495d 100755
--- a/egs/tedlium/s5/local/confidence_calibration.sh
+++ b/egs/tedlium/s5/local/confidence_calibration.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 . ./cmd.sh
 . ./path.sh
 
diff --git a/egs/tedlium/s5/local/download_data.sh b/egs/tedlium/s5/local/download_data.sh
index f24aa33c3d3..6309b93622a 100755
--- a/egs/tedlium/s5/local/download_data.sh
+++ b/egs/tedlium/s5/local/download_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2014 Nickolay V. Shmyrev
 #            2014 Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5/local/nnet/run_dnn.sh b/egs/tedlium/s5/local/nnet/run_dnn.sh
index d1cf3440bc5..9702097ec07 100755
--- a/egs/tedlium/s5/local/nnet/run_dnn.sh
+++ b/egs/tedlium/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/tedlium/s5/local/nnet/run_dnn_bn.sh b/egs/tedlium/s5/local/nnet/run_dnn_bn.sh
index 6bef438d61b..a200e7d7153 100755
--- a/egs/tedlium/s5/local/nnet/run_dnn_bn.sh
+++ b/egs/tedlium/s5/local/nnet/run_dnn_bn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the Switchboard recipe. The training database is TED-LIUM,
 # it consists of TED talks with cleaned automatic transcripts:
diff --git a/egs/tedlium/s5/local/nnet/run_dnn_fbank.sh b/egs/tedlium/s5/local/nnet/run_dnn_fbank.sh
index 762b8a71307..d1445052f44 100755
--- a/egs/tedlium/s5/local/nnet/run_dnn_fbank.sh
+++ b/egs/tedlium/s5/local/nnet/run_dnn_fbank.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/tedlium/s5/local/nnet/run_lstm.sh b/egs/tedlium/s5/local/nnet/run_lstm.sh
index a8d6326812e..ddc12113a3d 100755
--- a/egs/tedlium/s5/local/nnet/run_lstm.sh
+++ b/egs/tedlium/s5/local/nnet/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/tedlium/s5/local/nnet3/run_ivector_common.sh b/egs/tedlium/s5/local/nnet3/run_ivector_common.sh
index 0b1738a2e8e..839744c60c4 100755
--- a/egs/tedlium/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/tedlium/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is based on:
 # swbd/s5c/local/nnet3/run_ivector_common.sh and
diff --git a/egs/tedlium/s5/local/nnet3/run_tdnn.sh b/egs/tedlium/s5/local/nnet3/run_tdnn.sh
index 16b756473d1..3191aa9c73b 100755
--- a/egs/tedlium/s5/local/nnet3/run_tdnn.sh
+++ b/egs/tedlium/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is the standard "tdnn" system, built in nnet3; it's what we use to
 # call multi-splice.
diff --git a/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh
index b37fbf9d35e..abce0aeae68 100755
--- a/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh
+++ b/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script does discriminative training on top of CE nnet3 system.
 # note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
diff --git a/egs/tedlium/s5/local/online/run_nnet2_common.sh b/egs/tedlium/s5/local/online/run_nnet2_common.sh
index 3b947a7f577..a258e1e54e4 100755
--- a/egs/tedlium/s5/local/online/run_nnet2_common.sh
+++ b/egs/tedlium/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script contains some common (shared) parts of the run_nnet*.sh scripts.
 
diff --git a/egs/tedlium/s5/local/online/run_nnet2_ensemble.sh b/egs/tedlium/s5/local/online/run_nnet2_ensemble.sh
index d2590c69a7b..8d2176014fa 100755
--- a/egs/tedlium/s5/local/online/run_nnet2_ensemble.sh
+++ b/egs/tedlium/s5/local/online/run_nnet2_ensemble.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is the "multi-splice" version of the online-nnet2 training script.
 # It's currently the best recipe.
diff --git a/egs/tedlium/s5/local/online/run_nnet2_ms.sh b/egs/tedlium/s5/local/online/run_nnet2_ms.sh
index 76b908ef70b..8addf3b9dd0 100755
--- a/egs/tedlium/s5/local/online/run_nnet2_ms.sh
+++ b/egs/tedlium/s5/local/online/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is the "multi-splice" version of the online-nnet2 training script.
 # It's currently the best recipe.
diff --git a/egs/tedlium/s5/local/online/run_nnet2_ms_disc.sh b/egs/tedlium/s5/local/online/run_nnet2_ms_disc.sh
index fc4e34b35ed..45cf835b102 100755
--- a/egs/tedlium/s5/local/online/run_nnet2_ms_disc.sh
+++ b/egs/tedlium/s5/local/online/run_nnet2_ms_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script does discriminative training on top of the online, multi-splice
diff --git a/egs/tedlium/s5/local/online/run_nnet2_ms_perturbed.sh b/egs/tedlium/s5/local/online/run_nnet2_ms_perturbed.sh
index 09ddd46b7fe..ff0063ba171 100755
--- a/egs/tedlium/s5/local/online/run_nnet2_ms_perturbed.sh
+++ b/egs/tedlium/s5/local/online/run_nnet2_ms_perturbed.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 #           2014  Tom Ko
diff --git a/egs/tedlium/s5/local/prepare_data.sh b/egs/tedlium/s5/local/prepare_data.sh
index c2a1f0a5e84..4ac26e1af3f 100755
--- a/egs/tedlium/s5/local/prepare_data.sh
+++ b/egs/tedlium/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014  Nickolay V. Shmyrev
 #            2014  Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5/local/prepare_dict.sh b/egs/tedlium/s5/local/prepare_dict.sh
index fcb03ea7aef..e1b78ca5bd3 100755
--- a/egs/tedlium/s5/local/prepare_dict.sh
+++ b/egs/tedlium/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014 Nickolay V. Shmyrev
 #            2014 Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5/local/prepare_lm.sh b/egs/tedlium/s5/local/prepare_lm.sh
index dd3a5434a8e..1198658e2ba 100755
--- a/egs/tedlium/s5/local/prepare_lm.sh
+++ b/egs/tedlium/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014 Nickolay V. Shmyrev
 # Apache 2.0
diff --git a/egs/tedlium/s5/local/score_basic.sh b/egs/tedlium/s5/local/score_basic.sh
index e59c053926f..a78edcccec8 100755
--- a/egs/tedlium/s5/local/score_basic.sh
+++ b/egs/tedlium/s5/local/score_basic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/tedlium/s5/local/score_sclite.sh b/egs/tedlium/s5/local/score_sclite.sh
index 52ab2c8f93d..6fff25e9464 100755
--- a/egs/tedlium/s5/local/score_sclite.sh
+++ b/egs/tedlium/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012,
 #           Brno University of Technology (Author: Karel Vesely) 2014,
diff --git a/egs/tedlium/s5/run.sh b/egs/tedlium/s5/run.sh
index 33ed73fa2db..02df7c09f00 100755
--- a/egs/tedlium/s5/run.sh
+++ b/egs/tedlium/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the Switchboard recipe. The training database is TED-LIUM,
 # it consists of TED talks with cleaned automatic transcripts:
diff --git a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh
index 88dde1ff0e2..e5fd8b14b8c 100755
--- a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh
+++ b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_{c,d}_sp
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh
index 2ac8c09dad1..daa1dde2108 100644
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_blstm_1a.sh is a first attempt at an BLSTM system, based on xconfigs
 # ./local/chain/compare_wer_general.sh exp/chain_cleaned/blstm_1a_sp_bi
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh
index 47557f93696..54ac70acfe3 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_lstm_1a.sh is a first attempt at an LSTM system, based on xconfigs-- it's
 # probably not very well configured, e.g. the num-params might be too small.
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh
index 7afa1b7f902..da42338f63b 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_lstm_1b.sh is as run_lstm_1a.sh but replacing the projected LSTM
 # with a regular LSTM.  This is done in order to have an LSTM-only baseline
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh
index e69e499e152..327a045ca1f 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_lstm_1c.sh is like run_lstm_1b.sh but changing from the old LSTM
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh
index 86e0352828c..0678bbc23ac 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_lstm_1d.sh is like run_lstm_1c.sh, but switching back to projected
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh
index 0fdb2b3b63e..0316d1b2667 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_lstm_1e.sh is like run_lstm_1d.sh, but reducing non-recurrent-projection-dim
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1a.sh
index 70e72ee1914..27ebede135c 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is the original TDNN script before we introduced xconfigs.
 # See run_tdnn_1b.sh for comparative results.
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh
index 492d3efb804..2fce0f27a45 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_1b.sh is like run_tdnn_1a.sh but upgrading to xconfig-based
 # config generation.
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh
index 01768c3875f..a803075a376 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_1c.sh is like run_tdnn_1b.sh but changing chunk-width from 150 to
 # '140,110,160', and
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh
index bb5007f4c9f..85e4e6d0a7b 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_tdnn_1d.sh is like run_tdnn_1b.sh but using 10 times the self-repair
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh
index 1476ed1fd40..a3edb1a96f5 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_tdnn_1e.sh is like run_tdnn_1d.sh but batchnorm components instead of renorm
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh
index 47f939fea1c..927c2cbeeb8 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_1f.sh is like run_tdnn_1e.sh but it use 2 to 6 jobs and add proportional-shrink 20.
 
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh
index f02025674e8..9f825686c61 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1g is as 1f but moving to a factorized TDNN (TDNN-F) model, re-tuning it, and
 #  switching to unconstrained egs (the last of which gives around 0.1%
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh
index b03da27e760..9bd9a5474b3 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh
index e896a7867b3..99f2c8dfd89 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is as run_tdnn_lstm_1a.sh, but changing
 # frames_per_chunk  150 to  140,100,160
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh
index 00f72fab796..6f5b11ca21d 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1c is as 1b, but adding the option --slow-start true. [since removed; it
 # takes half the param change from the first two minibatches of each
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh
index 80a9ed1c4d0..acb758a35f4 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers.  note: it
 # uses egs from 1b, remember to remove that before I commit.
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh
index 031978f878a..1fa3b51fa1f 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1e is as 1d, but reducing decay-time from 40 to 20.
 
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh
index 0d64c75aea8..ed1760db7d6 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script does discriminative training on top of the 1e chain system.  To
 # simplify things, this assumes you are using the "cleaned" data (since this is
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh
index c60b8f7fefc..98dbb5a4c1c 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1f is as 1d, but increasing decay-time from 40 to 80.  [see also 1e, at 20.]
 # see 1e for summary of results.
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh
index 2d2048a6869..5a30cceef60 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #######################
 # 1g is as 1e, but reducing decay-time further from 20 to 10.
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh
index a074e128270..84c89ebc062 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #######################
 # 1h is as 1e, but increasing decay-time from to to 30.
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh
index 3bfe175806f..527bf9338ec 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1i is as 1e, but adding boundary-offset.  No clear effect.
 #
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh
index acbef783823..f4a100802b7 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1j is as 1e, but adding self-repair-scale=1.0e-04 on 1st tdnn layer [default is 1e-5].
 # It's definitely more effective in preventing under or over-saturated ReLUs, but
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh
index 173be863608..5912e9dc015 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1k is as 1e, but introducing a dropout schedule.
 
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh
index 94955d0472c..ab176e11920 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1l is as 1k, but having the dropout end at the end of training, not @0.75.
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh
index efd3bc98725..abd722ec306 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1m is as 1l, but having the dropout end at 0.1
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh
index c0559e8d389..e6c0175b513 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1n is as 1k, but maxing out at 0.5, not 0.7.
 # 1k is as 1e, but introducing a dropout schedule.
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh
index 5a6dbaef8af..a8286d1a120 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1o is as 1k, but putting the dropout on (c,m), i.e. the output
 # of the LstmNonlinearityComponent, which I believe is the same as
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh
index dd38d56759f..4df38437ac9 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1r is as 1e, but changing update-period of natural gradient from 4 to 1,
 # Not helpful.
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh
index 1378d2d176d..4cc7e95245b 100644
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1s is as 1e, but adding per-frame dropout to LSTM in location4
 # as paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh
index 3c4882ec2c6..4b81828d7a7 100644
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1t is as 1e, but increasing the TDNN dim and LSTM cell-dim into
 # 1024, the recurrent and non-recurrent projection of the LSTM from
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh
index 23ea14ae151..da3f64484bc 100644
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1u is the same as 1t but adding per-frame dropout to LSTM
 # in location4, see paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh
index 7c44d963504..089721fdb97 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1v is as 1t, but using backstitch training with scale=1.0,interval=4, and
 # num of epochs increased to 7
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh
index 042ef346578..dbf4a3fd800 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # In this recipe we replace the last LSTM layer with an attention layer
 # which leads to some consistent improvements in WER
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh
index 905e1845183..5e3186b8209 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # tdnn_lstm_attention_bs_1a is like tdnn_lstm_1v (i.e. it uses backstitch)
 # except we replace the last LSTM layer with an attention layer
diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh
index 7bd96e7d82c..cd91a8ff26b 100755
--- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh
+++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1b is as 1a except all renorms are replaced by batchnorms
 
diff --git a/egs/tedlium/s5_r2/local/download_data.sh b/egs/tedlium/s5_r2/local/download_data.sh
index 945ba378cef..cfbc70c6ce5 100755
--- a/egs/tedlium/s5_r2/local/download_data.sh
+++ b/egs/tedlium/s5_r2/local/download_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2014  Nickolay V. Shmyrev
 #            2014  Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5_r2/local/format_lms.sh b/egs/tedlium/s5_r2/local/format_lms.sh
index 7d8e342cddd..f87d72eb1d8 100755
--- a/egs/tedlium/s5_r2/local/format_lms.sh
+++ b/egs/tedlium/s5_r2/local/format_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014 Nickolay V. Shmyrev
 # Apache 2.0
diff --git a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh
index da0bb728e69..484a60dc232 100755
--- a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_{c,d}_sp
diff --git a/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh b/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh
index 337092b1520..86cced16bd0 100755
--- a/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_lstm_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_lstm_1a.sh
index 0f8dc6304f0..db1de508c7c 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_lstm_1a.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #    This is the standard "lstm" system, built in nnet3; this script
 # is the version that's meant to run with data-cleanup, that doesn't
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh
index f0eae04918d..374874243e2 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #    This is the standard "tdnn" system, built in nnet3; this script
 # is the version that's meant to run with data-cleanup, that doesn't
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh
index 21268398997..d0ebffa3429 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1b is as 1a but uses xconfigs.
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh
index 61f68839a5f..94af22448e4 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1c is as 1b but using more 'chain-like' splicing and slightly
 # smaller dim.  Not better; maybe slightly worse.
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh
index f0220b17376..f1f3bb7f9ca 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_tdnn_lfr_1a.sh is similar in configuration to run_tdnn_1c.sh, but it's a
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh
index 28c45836cf7..2caa798df6c 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is a TDNN+LSTM system; the configuration is similar to
 # local/chain/tuning/run_tdnn_lstm_1e.sh, but a non-chain nnet3 system, and
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh
index 1826caf3d05..77221afbb3f 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script does discriminative training on top of CE nnet3 system.  To
 # simplify things, this assumes you are using the "cleaned" data (since this is
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh
index 8b8af6eff78..b6dd9521a54 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1b is as 1a, but removing the decay-time option as a baseline.
 
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh
index 07c3d4af233..e1ebf5baa6d 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script does discriminative training on top of CE nnet3 system.  To
 # simplify things, this assumes you are using the "cleaned" data (since this is
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh
index bc9a717419d..27d159a802b 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_lstm_1c.sh is as run_tdnn_lstm_1a.sh, but about 1.5 times larger
 # chunk lengths than 1a.
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh
index 3e8509bf4ac..f948e9fff20 100755
--- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh
+++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_tdnn_lstm_lfr_1a.sh is like run_tdnn_lstm_1a.sh, but
diff --git a/egs/tedlium/s5_r2/local/prepare_data.sh b/egs/tedlium/s5_r2/local/prepare_data.sh
index a1f554ef7b6..411690b504b 100755
--- a/egs/tedlium/s5_r2/local/prepare_data.sh
+++ b/egs/tedlium/s5_r2/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014  Nickolay V. Shmyrev
 #            2014  Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5_r2/local/prepare_data_iwslt.sh b/egs/tedlium/s5_r2/local/prepare_data_iwslt.sh
index 1e0c33dbfa2..b96a0f701cb 100755
--- a/egs/tedlium/s5_r2/local/prepare_data_iwslt.sh
+++ b/egs/tedlium/s5_r2/local/prepare_data_iwslt.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014 Nickolay V. Shmyrev 
 #            2014 Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5_r2/local/prepare_dict.sh b/egs/tedlium/s5_r2/local/prepare_dict.sh
index 18837c21085..502346a239b 100755
--- a/egs/tedlium/s5_r2/local/prepare_dict.sh
+++ b/egs/tedlium/s5_r2/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014 Nickolay V. Shmyrev
 #            2014 Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5_r2/local/rnnlm/prepare_rnnlm_data.sh b/egs/tedlium/s5_r2/local/rnnlm/prepare_rnnlm_data.sh
index ba6252450da..cd1a0c9aa93 100755
--- a/egs/tedlium/s5_r2/local/rnnlm/prepare_rnnlm_data.sh
+++ b/egs/tedlium/s5_r2/local/rnnlm/prepare_rnnlm_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # To be run from the egs/ directory.
 
diff --git a/egs/tedlium/s5_r2/local/rnnlm/tuning/run_lstm_tdnn.sh b/egs/tedlium/s5_r2/local/rnnlm/tuning/run_lstm_tdnn.sh
index 87f99f651bf..25d144b3a90 100755
--- a/egs/tedlium/s5_r2/local/rnnlm/tuning/run_lstm_tdnn.sh
+++ b/egs/tedlium/s5_r2/local/rnnlm/tuning/run_lstm_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/tedlium/s5_r2/local/rnnlm/tuning/run_lstm_tdnn_with_lm1b.sh b/egs/tedlium/s5_r2/local/rnnlm/tuning/run_lstm_tdnn_with_lm1b.sh
index ec289df81ef..2e1bc25a9a1 100755
--- a/egs/tedlium/s5_r2/local/rnnlm/tuning/run_lstm_tdnn_with_lm1b.sh
+++ b/egs/tedlium/s5_r2/local/rnnlm/tuning/run_lstm_tdnn_with_lm1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2018  Ke Li
diff --git a/egs/tedlium/s5_r2/local/run_cleanup_segmentation.sh b/egs/tedlium/s5_r2/local/run_cleanup_segmentation.sh
index 559d20046dd..ac5f0cb9009 100755
--- a/egs/tedlium/s5_r2/local/run_cleanup_segmentation.sh
+++ b/egs/tedlium/s5_r2/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/tedlium/s5_r2/local/run_segmentation_long_utts.sh b/egs/tedlium/s5_r2/local/run_segmentation_long_utts.sh
index 560d6fb450f..9f77e20d717 100644
--- a/egs/tedlium/s5_r2/local/run_segmentation_long_utts.sh
+++ b/egs/tedlium/s5_r2/local/run_segmentation_long_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 # Apache 2.0
diff --git a/egs/tedlium/s5_r2/local/run_unk_model.sh b/egs/tedlium/s5_r2/local/run_unk_model.sh
index 35f8292b940..8a21531f01f 100755
--- a/egs/tedlium/s5_r2/local/run_unk_model.sh
+++ b/egs/tedlium/s5_r2/local/run_unk_model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 utils/lang/make_unk_lm.sh data/local/dict exp/unk_lang_model
diff --git a/egs/tedlium/s5_r2/local/score_basic.sh b/egs/tedlium/s5_r2/local/score_basic.sh
index 47b57396c64..a8dab49ce68 100755
--- a/egs/tedlium/s5_r2/local/score_basic.sh
+++ b/egs/tedlium/s5_r2/local/score_basic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/tedlium/s5_r2/local/score_sclite.sh b/egs/tedlium/s5_r2/local/score_sclite.sh
index 16c8b30e52f..ab71c909270 100755
--- a/egs/tedlium/s5_r2/local/score_sclite.sh
+++ b/egs/tedlium/s5_r2/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012,
 #           Brno University of Technology (Author: Karel Vesely) 2014,
diff --git a/egs/tedlium/s5_r2/local/ted_download_lm.sh b/egs/tedlium/s5_r2/local/ted_download_lm.sh
index 11ee5bc82d6..3f73afb0621 100755
--- a/egs/tedlium/s5_r2/local/ted_download_lm.sh
+++ b/egs/tedlium/s5_r2/local/ted_download_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2018  David Snyder
 # Apache 2.0
diff --git a/egs/tedlium/s5_r2/local/ted_train_lm.sh b/egs/tedlium/s5_r2/local/ted_train_lm.sh
index 3a1bef567fb..25dca8e13c4 100755
--- a/egs/tedlium/s5_r2/local/ted_train_lm.sh
+++ b/egs/tedlium/s5_r2/local/ted_train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/tedlium/s5_r2/results.sh b/egs/tedlium/s5_r2/results.sh
index 98bcab94ec5..f3bfcdc44fa 100755
--- a/egs/tedlium/s5_r2/results.sh
+++ b/egs/tedlium/s5_r2/results.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 filter_regexp=.
 [ $# -ge 1 ] && filter_regexp=$1
diff --git a/egs/tedlium/s5_r2/run.sh b/egs/tedlium/s5_r2/run.sh
index e7b5df6055e..5c94f72583e 100755
--- a/egs/tedlium/s5_r2/run.sh
+++ b/egs/tedlium/s5_r2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the Switchboard recipe. The training database is TED-LIUM,
 # it consists of TED talks with cleaned automatic transcripts:
diff --git a/egs/tedlium/s5_r2_wsj/local/format_lms.sh b/egs/tedlium/s5_r2_wsj/local/format_lms.sh
index e9b9d3c63f0..de3745f0dbf 100755
--- a/egs/tedlium/s5_r2_wsj/local/format_lms.sh
+++ b/egs/tedlium/s5_r2_wsj/local/format_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014 Nickolay V. Shmyrev
 # Apache 2.0
diff --git a/egs/tedlium/s5_r2_wsj/local/prepare_data.sh b/egs/tedlium/s5_r2_wsj/local/prepare_data.sh
index 2aa5eb71be4..490b5a0130b 100755
--- a/egs/tedlium/s5_r2_wsj/local/prepare_data.sh
+++ b/egs/tedlium/s5_r2_wsj/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014  Nickolay V. Shmyrev
 #            2014  Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5_r2_wsj/local/prepare_dict.sh b/egs/tedlium/s5_r2_wsj/local/prepare_dict.sh
index 23b478be5ce..671cc199565 100755
--- a/egs/tedlium/s5_r2_wsj/local/prepare_dict.sh
+++ b/egs/tedlium/s5_r2_wsj/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation  
 #           2012-2014 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/tedlium/s5_r2_wsj/local/score_basic.sh b/egs/tedlium/s5_r2_wsj/local/score_basic.sh
index d840bd9c981..7caed63bb95 100755
--- a/egs/tedlium/s5_r2_wsj/local/score_basic.sh
+++ b/egs/tedlium/s5_r2_wsj/local/score_basic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/tedlium/s5_r2_wsj/local/score_sclite.sh b/egs/tedlium/s5_r2_wsj/local/score_sclite.sh
index 16c8b30e52f..ab71c909270 100755
--- a/egs/tedlium/s5_r2_wsj/local/score_sclite.sh
+++ b/egs/tedlium/s5_r2_wsj/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012,
 #           Brno University of Technology (Author: Karel Vesely) 2014,
diff --git a/egs/tedlium/s5_r2_wsj/local/train_lm.sh b/egs/tedlium/s5_r2_wsj/local/train_lm.sh
index 2e8f8de11f9..8748fd677f0 100755
--- a/egs/tedlium/s5_r2_wsj/local/train_lm.sh
+++ b/egs/tedlium/s5_r2_wsj/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/tedlium/s5_r2_wsj/local/wsj_data_prep.sh b/egs/tedlium/s5_r2_wsj/local/wsj_data_prep.sh
index 62174ec4349..32541e7e84f 100755
--- a/egs/tedlium/s5_r2_wsj/local/wsj_data_prep.sh
+++ b/egs/tedlium/s5_r2_wsj/local/wsj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/tedlium/s5_r2_wsj/local/wsj_format_data.sh b/egs/tedlium/s5_r2_wsj/local/wsj_format_data.sh
index 4bbfd3d3014..625ddfd4148 100755
--- a/egs/tedlium/s5_r2_wsj/local/wsj_format_data.sh
+++ b/egs/tedlium/s5_r2_wsj/local/wsj_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 #           2015  Guoguo Chen
diff --git a/egs/tedlium/s5_r2_wsj/results.sh b/egs/tedlium/s5_r2_wsj/results.sh
index 11b23294d80..945f0913ca1 100755
--- a/egs/tedlium/s5_r2_wsj/results.sh
+++ b/egs/tedlium/s5_r2_wsj/results.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 filter_regexp=.
 [ $# -ge 1 ] && filter_regexp=$1
diff --git a/egs/tedlium/s5_r2_wsj/run.sh b/egs/tedlium/s5_r2_wsj/run.sh
index d96201ce012..37b9819369a 100755
--- a/egs/tedlium/s5_r2_wsj/run.sh
+++ b/egs/tedlium/s5_r2_wsj/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # This recipe uses WSJ models and TED-LIUM audio with un-aligned transcripts.
 #
diff --git a/egs/tedlium/s5_r3/local/chain/compare_wer_general.sh b/egs/tedlium/s5_r3/local/chain/compare_wer_general.sh
index c709e351e1e..7701607b1be 100755
--- a/egs/tedlium/s5_r3/local/chain/compare_wer_general.sh
+++ b/egs/tedlium/s5_r3/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_{c,d}_sp
diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh
index 1204ff6ce4c..7e07a19edeb 100755
--- a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Results
 
diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh
index 744c964db2f..ba4b30f5fa7 100755
--- a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # run_tdnn_1b.sh is the script which results are presented in the corpus release paper.
 # It uses 2 to 6 jobs and add proportional-shrink 10.
diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh
index faac365af54..2307f289806 100755
--- a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is copied from tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh setup, and it replaces the current run_tdnn_1b.sh script. 
 
diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1d.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1d.sh
index 7d0e2c9bae4..1248edff841 100755
--- a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1d is like 1c, while it introduces 'apply-cmvn-online' that does
 # cmn normalization both for i-extractor and TDNN input.
diff --git a/egs/tedlium/s5_r3/local/download_data.sh b/egs/tedlium/s5_r3/local/download_data.sh
index 0b31a258613..2f319f43be5 100755
--- a/egs/tedlium/s5_r3/local/download_data.sh
+++ b/egs/tedlium/s5_r3/local/download_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2014  Nickolay V. Shmyrev
 #            2014  Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5_r3/local/format_lms.sh b/egs/tedlium/s5_r3/local/format_lms.sh
index bba5bbd17ec..c16945b4bd9 100755
--- a/egs/tedlium/s5_r3/local/format_lms.sh
+++ b/egs/tedlium/s5_r3/local/format_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014 Nickolay V. Shmyrev
 # Apache 2.0
diff --git a/egs/tedlium/s5_r3/local/nnet3/run_ivector_common.sh b/egs/tedlium/s5_r3/local/nnet3/run_ivector_common.sh
index bd29f4a8f11..efa1c89224c 100755
--- a/egs/tedlium/s5_r3/local/nnet3/run_ivector_common.sh
+++ b/egs/tedlium/s5_r3/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
@@ -47,6 +47,10 @@ if [ $stage -le 1 ]; then
   echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
   utils/data/perturb_data_dir_speed_3way.sh \
     data/${train_set} data/${train_set}_sp
+
+  for datadir in ${train_set}_sp dev test; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
 fi
 
 if [ $stage -le 2 ]; then
@@ -77,11 +81,6 @@ if [ $stage -le 5 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then
   exit 1
 fi
 
-if [ $stage -le 4 ]; then
-  echo "$0: preparing directory for speed-perturbed data"
-  utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
-fi
-
 if [ $stage -le 5 ]; then
   echo "$0: creating high-resolution MFCC features"
 
@@ -94,10 +93,6 @@ if [ $stage -le 5 ]; then
     utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/tedlium-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
   fi
 
-  for datadir in ${train_set}_sp dev test; do
-    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
-  done
-
   # do volume-perturbation on the training data prior to extracting hires
   # features; this helps make trained nnets more invariant to test data volume.
   utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires
diff --git a/egs/tedlium/s5_r3/local/prepare_data.sh b/egs/tedlium/s5_r3/local/prepare_data.sh
index c4b911601e5..3f3346d30ea 100755
--- a/egs/tedlium/s5_r3/local/prepare_data.sh
+++ b/egs/tedlium/s5_r3/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014  Nickolay V. Shmyrev
 #            2014  Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5_r3/local/prepare_dict.sh b/egs/tedlium/s5_r3/local/prepare_dict.sh
index 204b3f910e5..aaf299249ce 100755
--- a/egs/tedlium/s5_r3/local/prepare_dict.sh
+++ b/egs/tedlium/s5_r3/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2014 Nickolay V. Shmyrev
 #            2014 Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/tedlium/s5_r3/local/rnnlm/average_rnnlm.sh b/egs/tedlium/s5_r3/local/rnnlm/average_rnnlm.sh
index 61ad07645ff..41d0172bc58 100755
--- a/egs/tedlium/s5_r3/local/rnnlm/average_rnnlm.sh
+++ b/egs/tedlium/s5_r3/local/rnnlm/average_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2018  François Hernandez (Ubiqus)
 #
diff --git a/egs/tedlium/s5_r3/local/rnnlm/prepare_rnnlm_data.sh b/egs/tedlium/s5_r3/local/rnnlm/prepare_rnnlm_data.sh
index ba6252450da..cd1a0c9aa93 100755
--- a/egs/tedlium/s5_r3/local/rnnlm/prepare_rnnlm_data.sh
+++ b/egs/tedlium/s5_r3/local/rnnlm/prepare_rnnlm_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # To be run from the egs/ directory.
 
diff --git a/egs/tedlium/s5_r3/local/rnnlm/tuning/run_lstm_tdnn_a.sh b/egs/tedlium/s5_r3/local/rnnlm/tuning/run_lstm_tdnn_a.sh
index 73a684b6379..f9d8e9014ed 100755
--- a/egs/tedlium/s5_r3/local/rnnlm/tuning/run_lstm_tdnn_a.sh
+++ b/egs/tedlium/s5_r3/local/rnnlm/tuning/run_lstm_tdnn_a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/tedlium/s5_r3/local/run_cleanup_segmentation.sh b/egs/tedlium/s5_r3/local/run_cleanup_segmentation.sh
index 559d20046dd..ac5f0cb9009 100755
--- a/egs/tedlium/s5_r3/local/run_cleanup_segmentation.sh
+++ b/egs/tedlium/s5_r3/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/tedlium/s5_r3/local/score_basic.sh b/egs/tedlium/s5_r3/local/score_basic.sh
index 47b57396c64..a8dab49ce68 100755
--- a/egs/tedlium/s5_r3/local/score_basic.sh
+++ b/egs/tedlium/s5_r3/local/score_basic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/tedlium/s5_r3/local/score_sclite.sh b/egs/tedlium/s5_r3/local/score_sclite.sh
index 16c8b30e52f..ab71c909270 100755
--- a/egs/tedlium/s5_r3/local/score_sclite.sh
+++ b/egs/tedlium/s5_r3/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012,
 #           Brno University of Technology (Author: Karel Vesely) 2014,
diff --git a/egs/tedlium/s5_r3/local/ted_download_lm.sh b/egs/tedlium/s5_r3/local/ted_download_lm.sh
index 6118876a0ab..33badbaf8e4 100755
--- a/egs/tedlium/s5_r3/local/ted_download_lm.sh
+++ b/egs/tedlium/s5_r3/local/ted_download_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2018  David Snyder
 # Apache 2.0
diff --git a/egs/tedlium/s5_r3/local/ted_download_rnnlm.sh b/egs/tedlium/s5_r3/local/ted_download_rnnlm.sh
index 6cbcaaa85ee..faac9a7ed3c 100755
--- a/egs/tedlium/s5_r3/local/ted_download_rnnlm.sh
+++ b/egs/tedlium/s5_r3/local/ted_download_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright  2018  François Hernandez
 # Apache 2.0
diff --git a/egs/tedlium/s5_r3/local/ted_train_lm.sh b/egs/tedlium/s5_r3/local/ted_train_lm.sh
index 3c587f63094..663e36a1add 100755
--- a/egs/tedlium/s5_r3/local/ted_train_lm.sh
+++ b/egs/tedlium/s5_r3/local/ted_train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/tedlium/s5_r3/results.sh b/egs/tedlium/s5_r3/results.sh
index ba04c6c9bac..2bdbeeeba5d 100755
--- a/egs/tedlium/s5_r3/results.sh
+++ b/egs/tedlium/s5_r3/results.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # The output of this script (after successfully running ./run.sh) can be found in the RESULTS file.
 
diff --git a/egs/tedlium/s5_r3/run.sh b/egs/tedlium/s5_r3/run.sh
index c264c21d728..63d71c8a50a 100755
--- a/egs/tedlium/s5_r3/run.sh
+++ b/egs/tedlium/s5_r3/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the Switchboard recipe. The training database is TED-LIUM,
 # it consists of TED talks with cleaned automatic transcripts:
diff --git a/egs/thchs30/s5/local/dae/run_dae.sh b/egs/thchs30/s5/local/dae/run_dae.sh
index 8248b867425..83a6183cdae 100755
--- a/egs/thchs30/s5/local/dae/run_dae.sh
+++ b/egs/thchs30/s5/local/dae/run_dae.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2016  Tsinghua University (Author: Dong Wang, Xuewei Zhang).  Apache 2.0.
 #           2016  LeSpeech (Author: Xingyu Na). Apache 2.0
 
diff --git a/egs/thchs30/s5/local/download_and_untar.sh b/egs/thchs30/s5/local/download_and_untar.sh
index 655e674dc9b..1f6c77396b3 100755
--- a/egs/thchs30/s5/local/download_and_untar.sh
+++ b/egs/thchs30/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2014  Johns Hopkins University (author: Daniel Povey) 
 # Copyright   2016  Tsinghua University (author: Dong Wang)
diff --git a/egs/thchs30/s5/local/nnet/run_dnn.sh b/egs/thchs30/s5/local/nnet/run_dnn.sh
index ccb915feed1..93d18af5167 100755
--- a/egs/thchs30/s5/local/nnet/run_dnn.sh
+++ b/egs/thchs30/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #Copyright 2016  Tsinghua University (Author: Dong Wang, Xuewei Zhang).  Apache 2.0.
 
 #run from ../..
diff --git a/egs/thchs30/s5/local/thchs-30_data_prep.sh b/egs/thchs30/s5/local/thchs-30_data_prep.sh
index 525ee069531..f230aed8d85 100755
--- a/egs/thchs30/s5/local/thchs-30_data_prep.sh
+++ b/egs/thchs30/s5/local/thchs-30_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2016  Tsinghua University (Author: Dong Wang, Xuewei Zhang).  Apache 2.0.
 #           2016  LeSpeech (Author: Xingyu Na)
 
diff --git a/egs/thchs30/s5/local/thchs-30_decode.sh b/egs/thchs30/s5/local/thchs-30_decode.sh
index f9661f61f21..1f987fe0e36 100755
--- a/egs/thchs30/s5/local/thchs-30_decode.sh
+++ b/egs/thchs30/s5/local/thchs-30_decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #Copyright 2016  Tsinghua University (Author: Dong Wang, Xuewei Zhang).  Apache 2.0.
 
 #decoding wrapper for thchs30 recipe
diff --git a/egs/thchs30/s5/run.sh b/egs/thchs30/s5/run.sh
index 54eb4586455..5e54ce52c12 100755
--- a/egs/thchs30/s5/run.sh
+++ b/egs/thchs30/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
diff --git a/egs/tidigits/s5/local/score.sh b/egs/tidigits/s5/local/score.sh
index f38be2d2d20..8072d23b092 100755
--- a/egs/tidigits/s5/local/score.sh
+++ b/egs/tidigits/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/tidigits/s5/local/tidigits_data_prep.sh b/egs/tidigits/s5/local/tidigits_data_prep.sh
index 9d177fd31a4..f947dc11dcf 100755
--- a/egs/tidigits/s5/local/tidigits_data_prep.sh
+++ b/egs/tidigits/s5/local/tidigits_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/tidigits/s5/local/tidigits_prepare_lang.sh b/egs/tidigits/s5/local/tidigits_prepare_lang.sh
index 0bc08ab40a0..ab596a714d3 100755
--- a/egs/tidigits/s5/local/tidigits_prepare_lang.sh
+++ b/egs/tidigits/s5/local/tidigits_prepare_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/tidigits/s5/run.sh b/egs/tidigits/s5/run.sh
index 873e2db69fa..3f19ffb4c94 100755
--- a/egs/tidigits/s5/run.sh
+++ b/egs/tidigits/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Note: this TIDIGITS setup has not been tuned at all and has some obvious
 # deficiencies; this has been created as a starting point for a tutorial.
diff --git a/egs/timit/s5/local/nnet/run_autoencoder.sh b/egs/timit/s5/local/nnet/run_autoencoder.sh
index 1fa9c0772f0..c239df9db38 100755
--- a/egs/timit/s5/local/nnet/run_autoencoder.sh
+++ b/egs/timit/s5/local/nnet/run_autoencoder.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./path.sh
 . ./cmd.sh
diff --git a/egs/timit/s5/local/nnet/run_dnn.sh b/egs/timit/s5/local/nnet/run_dnn.sh
index beb4f9d962a..90e81865665 100755
--- a/egs/timit/s5/local/nnet/run_dnn.sh
+++ b/egs/timit/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/timit/s5/local/score_basic.sh b/egs/timit/s5/local/score_basic.sh
index 2dbffe38e80..f665181dbb5 100755
--- a/egs/timit/s5/local/score_basic.sh
+++ b/egs/timit/s5/local/score_basic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/timit/s5/local/score_combine.sh b/egs/timit/s5/local/score_combine.sh
index 835d40f6417..18150253a02 100755
--- a/egs/timit/s5/local/score_combine.sh
+++ b/egs/timit/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/timit/s5/local/score_sclite.sh b/egs/timit/s5/local/score_sclite.sh
index 97dfbc04e80..8da4ce26cf4 100755
--- a/egs/timit/s5/local/score_sclite.sh
+++ b/egs/timit/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #           2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/timit/s5/local/timit_data_prep.sh b/egs/timit/s5/local/timit_data_prep.sh
index be2d6725952..ee18826c8c1 100755
--- a/egs/timit/s5/local/timit_data_prep.sh
+++ b/egs/timit/s5/local/timit_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013   (Authors: Bagher BabaAli, Daniel Povey, Arnab Ghoshal)
 #           2014   Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/timit/s5/local/timit_format_data.sh b/egs/timit/s5/local/timit_format_data.sh
index 4e8816a6799..9e503a38c93 100755
--- a/egs/timit/s5/local/timit_format_data.sh
+++ b/egs/timit/s5/local/timit_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/timit/s5/local/timit_prepare_dict.sh b/egs/timit/s5/local/timit_prepare_dict.sh
index 7996aab01b2..8829fd17f5b 100755
--- a/egs/timit/s5/local/timit_prepare_dict.sh
+++ b/egs/timit/s5/local/timit_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013   (Authors: Daniel Povey, Bagher BabaAli)
 
diff --git a/egs/timit/s5/run.sh b/egs/timit/s5/run.sh
index 58bd871f60e..40bd591723c 100755
--- a/egs/timit/s5/run.sh
+++ b/egs/timit/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Copyright 2013 Bagher BabaAli,
diff --git a/egs/tunisian_msa/s5/local/chain/compare_wer.sh b/egs/tunisian_msa/s5/local/chain/compare_wer.sh
index c6a3a91ea69..a553aaf2831 100755
--- a/egs/tunisian_msa/s5/local/chain/compare_wer.sh
+++ b/egs/tunisian_msa/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh
index ab68ba6fb68..cfb7c46e66b 100755
--- a/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Uses a resnet-style factored TDNN-F model.
 
diff --git a/egs/tunisian_msa/s5/local/nnet3/run_ivector_common.sh b/egs/tunisian_msa/s5/local/nnet3/run_ivector_common.sh
index e8ff9a150ea..561eb301d8e 100755
--- a/egs/tunisian_msa/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/tunisian_msa/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/tunisian_msa/s5/local/prepare_lm.sh b/egs/tunisian_msa/s5/local/prepare_lm.sh
index 4fc50b84d11..37b36f2abf2 100755
--- a/egs/tunisian_msa/s5/local/prepare_lm.sh
+++ b/egs/tunisian_msa/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018 John Morgan
 # Apache 2.0.
diff --git a/egs/tunisian_msa/s5/local/qcri_buckwalter2utf8.sh b/egs/tunisian_msa/s5/local/qcri_buckwalter2utf8.sh
index 0468c04ebd8..76b796c08ca 100755
--- a/egs/tunisian_msa/s5/local/qcri_buckwalter2utf8.sh
+++ b/egs/tunisian_msa/s5/local/qcri_buckwalter2utf8.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018 John Morgan
 # Apache 2.0.
diff --git a/egs/tunisian_msa/s5/local/subs_download.sh b/egs/tunisian_msa/s5/local/subs_download.sh
index 7e46fd255aa..646246bc101 100755
--- a/egs/tunisian_msa/s5/local/subs_download.sh
+++ b/egs/tunisian_msa/s5/local/subs_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018 John Morgan
 # Apache 2.0.
diff --git a/egs/tunisian_msa/s5/local/tamsa_download.sh b/egs/tunisian_msa/s5/local/tamsa_download.sh
index 5e4666482ab..2c9d883f84b 100755
--- a/egs/tunisian_msa/s5/local/tamsa_download.sh
+++ b/egs/tunisian_msa/s5/local/tamsa_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018 John Morgan
 # Apache 2.0.
diff --git a/egs/uw3/v1/local/chain/compare_wer.sh b/egs/uw3/v1/local/chain/compare_wer.sh
index 1a40523355a..ece324c279e 100755
--- a/egs/uw3/v1/local/chain/compare_wer.sh
+++ b/egs/uw3/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/uw3/v1/local/chain/run_cnn_1a.sh b/egs/uw3/v1/local/chain/run_cnn_1a.sh
index e3548609da7..ad65dfd389c 100755
--- a/egs/uw3/v1/local/chain/run_cnn_1a.sh
+++ b/egs/uw3/v1/local/chain/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2017 Hossein Hadian
 #              2017 Chun Chieh Chang
diff --git a/egs/uw3/v1/local/prepare_data.sh b/egs/uw3/v1/local/prepare_data.sh
index 47f62e4335a..8f3d4eeb800 100755
--- a/egs/uw3/v1/local/prepare_data.sh
+++ b/egs/uw3/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Chun Chieh Chang
 
diff --git a/egs/uw3/v1/local/prepare_dict.sh b/egs/uw3/v1/local/prepare_dict.sh
index 72c9b50e5ec..67903fedeff 100755
--- a/egs/uw3/v1/local/prepare_dict.sh
+++ b/egs/uw3/v1/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Hossein Hadian
diff --git a/egs/uw3/v1/local/score.sh b/egs/uw3/v1/local/score.sh
index 3a6aeaa08ad..3f1cfd7c1bf 100755
--- a/egs/uw3/v1/local/score.sh
+++ b/egs/uw3/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/uw3/v1/local/train_lm.sh b/egs/uw3/v1/local/train_lm.sh
index 39eb051d273..19567e71a5e 100755
--- a/egs/uw3/v1/local/train_lm.sh
+++ b/egs/uw3/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/uw3/v1/run.sh b/egs/uw3/v1/run.sh
index 68c51fa4690..632f0f50c0e 100755
--- a/egs/uw3/v1/run.sh
+++ b/egs/uw3/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
diff --git a/egs/voxceleb/v1/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/voxceleb/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
index 21efb8e3dad..a899ea7e952 100755
--- a/egs/voxceleb/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
+++ b/egs/voxceleb/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copied from egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh (commit 3ea534070fd2cccd2e4ee21772132230033022ce).
 #
diff --git a/egs/voxceleb/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/voxceleb/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
index 0c2c77bb5bd..a7bb0cdd432 100755
--- a/egs/voxceleb/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
+++ b/egs/voxceleb/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright      2017   David Snyder
 #                2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #                2017   Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/voxceleb/v1/run.sh b/egs/voxceleb/v1/run.sh
index 500c05c5db6..3e291728310 100755
--- a/egs/voxceleb/v1/run.sh
+++ b/egs/voxceleb/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #             2017   Johns Hopkins University (Author: Daniel Povey)
 #        2017-2018   David Snyder
diff --git a/egs/voxceleb/v2/run.sh b/egs/voxceleb/v2/run.sh
index 7c70e4a42c1..c333691ebc2 100755
--- a/egs/voxceleb/v2/run.sh
+++ b/egs/voxceleb/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 #             2017   Johns Hopkins University (Author: Daniel Povey)
 #        2017-2018   David Snyder
diff --git a/egs/voxforge/gst_demo/run-simulated.sh b/egs/voxforge/gst_demo/run-simulated.sh
index 5e93c70dec4..f23e01cd83b 100755
--- a/egs/voxforge/gst_demo/run-simulated.sh
+++ b/egs/voxforge/gst_demo/run-simulated.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2012 Vassil Panayotov
 # Copyright 2013 Tanel Alumae
diff --git a/egs/voxforge/online_demo/run.sh b/egs/voxforge/online_demo/run.sh
index 6a7e89991b6..9924d38882a 100755
--- a/egs/voxforge/online_demo/run.sh
+++ b/egs/voxforge/online_demo/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/voxforge/s5/getdata.sh b/egs/voxforge/s5/getdata.sh
index 6651491789e..f976abe689a 100755
--- a/egs/voxforge/s5/getdata.sh
+++ b/egs/voxforge/s5/getdata.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/voxforge/s5/local/run_mmi_tri2b.sh b/egs/voxforge/s5/local/run_mmi_tri2b.sh
index 8a4d03c59c4..22b670c144e 100755
--- a/egs/voxforge/s5/local/run_mmi_tri2b.sh
+++ b/egs/voxforge/s5/local/run_mmi_tri2b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/voxforge/s5/local/run_mmi_tri4b.sh b/egs/voxforge/s5/local/run_mmi_tri4b.sh
index 52485eceb3a..18e1c625217 100755
--- a/egs/voxforge/s5/local/run_mmi_tri4b.sh
+++ b/egs/voxforge/s5/local/run_mmi_tri4b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 . ./cmd.sh
 
 steps/make_denlats.sh --nj 30 --sub-split 30 --cmd "$train_cmd" \
diff --git a/egs/voxforge/s5/local/run_sgmm2.sh b/egs/voxforge/s5/local/run_sgmm2.sh
index 89da19690a5..951ad06510a 100755
--- a/egs/voxforge/s5/local/run_sgmm2.sh
+++ b/egs/voxforge/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is as run_sgmm.sh but using the "sgmm2" code, which uses "state-clustered tied mixtures"
 # and the symmetric SGMM, and one or two other small changes (e.g. no updating of M for a few
diff --git a/egs/voxforge/s5/local/run_sgmm2x.sh b/egs/voxforge/s5/local/run_sgmm2x.sh
index c019bfdf3be..a6a11378078 100755
--- a/egs/voxforge/s5/local/run_sgmm2x.sh
+++ b/egs/voxforge/s5/local/run_sgmm2x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is as run_sgmm2.sh but excluding the "speaker-dependent weights",
 # so not doing the symmetric SGMM.
diff --git a/egs/voxforge/s5/local/score.sh b/egs/voxforge/s5/local/score.sh
index 0be7d192282..096db28a832 100755
--- a/egs/voxforge/s5/local/score.sh
+++ b/egs/voxforge/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/voxforge/s5/local/voxforge_data_prep.sh b/egs/voxforge/s5/local/voxforge_data_prep.sh
index ec2b49b6b6c..b13b513061e 100755
--- a/egs/voxforge/s5/local/voxforge_data_prep.sh
+++ b/egs/voxforge/s5/local/voxforge_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Vassil Panayotov
 #           2014  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/voxforge/s5/local/voxforge_format_data.sh b/egs/voxforge/s5/local/voxforge_format_data.sh
index 6abaf6c7656..489cfd6f22e 100755
--- a/egs/voxforge/s5/local/voxforge_format_data.sh
+++ b/egs/voxforge/s5/local/voxforge_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/voxforge/s5/local/voxforge_map_anonymous.sh b/egs/voxforge/s5/local/voxforge_map_anonymous.sh
index 202d8128f6f..8c4955087c2 100755
--- a/egs/voxforge/s5/local/voxforge_map_anonymous.sh
+++ b/egs/voxforge/s5/local/voxforge_map_anonymous.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/voxforge/s5/local/voxforge_prepare_dict.sh b/egs/voxforge/s5/local/voxforge_prepare_dict.sh
index daf4e2326e5..ffd0f882fe7 100755
--- a/egs/voxforge/s5/local/voxforge_prepare_dict.sh
+++ b/egs/voxforge/s5/local/voxforge_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/voxforge/s5/local/voxforge_prepare_lm.sh b/egs/voxforge/s5/local/voxforge_prepare_lm.sh
index 195eb52a46d..a764409b1ce 100755
--- a/egs/voxforge/s5/local/voxforge_prepare_lm.sh
+++ b/egs/voxforge/s5/local/voxforge_prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/voxforge/s5/local/voxforge_select.sh b/egs/voxforge/s5/local/voxforge_select.sh
index a17a17d293e..116bbe74dc9 100755
--- a/egs/voxforge/s5/local/voxforge_select.sh
+++ b/egs/voxforge/s5/local/voxforge_select.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/voxforge/s5/run.sh b/egs/voxforge/s5/run.sh
index 86fc128469e..b244a7295a6 100755
--- a/egs/voxforge/s5/run.sh
+++ b/egs/voxforge/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/vystadial_cz/online_demo/display_gmm_latgen.sh b/egs/vystadial_cz/online_demo/display_gmm_latgen.sh
index 15a25db2717..70550ef37b0 100755
--- a/egs/vystadial_cz/online_demo/display_gmm_latgen.sh
+++ b/egs/vystadial_cz/online_demo/display_gmm_latgen.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # source the settings
 . ./path.sh
diff --git a/egs/vystadial_cz/online_demo/path.sh b/egs/vystadial_cz/online_demo/path.sh
index f54d95d60a8..3683a2b2372 100755
--- a/egs/vystadial_cz/online_demo/path.sh
+++ b/egs/vystadial_cz/online_demo/path.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # data location
 PWD=`pwd`
diff --git a/egs/vystadial_cz/online_demo/run_live-demo.sh b/egs/vystadial_cz/online_demo/run_live-demo.sh
index d2a330231e5..f1ab140fc47 100755
--- a/egs/vystadial_cz/online_demo/run_live-demo.sh
+++ b/egs/vystadial_cz/online_demo/run_live-demo.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # source the settings
 . ./path.sh
diff --git a/egs/vystadial_cz/online_demo/run_online-latgen-recogniser.sh b/egs/vystadial_cz/online_demo/run_online-latgen-recogniser.sh
index ffe09f6dad6..cad8c130d8b 100755
--- a/egs/vystadial_cz/online_demo/run_online-latgen-recogniser.sh
+++ b/egs/vystadial_cz/online_demo/run_online-latgen-recogniser.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # source the settings
 . ./path.sh
diff --git a/egs/vystadial_cz/online_demo/run_pyonline-latgen-recogniser.sh b/egs/vystadial_cz/online_demo/run_pyonline-latgen-recogniser.sh
index 7a7705c4121..aae5681ffe0 100755
--- a/egs/vystadial_cz/online_demo/run_pyonline-latgen-recogniser.sh
+++ b/egs/vystadial_cz/online_demo/run_pyonline-latgen-recogniser.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # source the settings
 . ./path.sh
diff --git a/egs/vystadial_cz/s5/env_voip_cs.sh b/egs/vystadial_cz/s5/env_voip_cs.sh
index 4c0adf5dcb2..39b6bcf350c 100755
--- a/egs/vystadial_cz/s5/env_voip_cs.sh
+++ b/egs/vystadial_cz/s5/env_voip_cs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # EVERY_N utterance is used for training
 # EVERY_N=3    ->   we use one third of training data
diff --git a/egs/vystadial_cz/s5/local/backup.sh b/egs/vystadial_cz/s5/local/backup.sh
index 13a6dd9c2d2..68aa7db960e 100755
--- a/egs/vystadial_cz/s5/local/backup.sh
+++ b/egs/vystadial_cz/s5/local/backup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
diff --git a/egs/vystadial_cz/s5/local/create_G.sh b/egs/vystadial_cz/s5/local/create_G.sh
index b462b9eab01..02f35cac3e3 100755
--- a/egs/vystadial_cz/s5/local/create_G.sh
+++ b/egs/vystadial_cz/s5/local/create_G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Vassil Panayotov
 #           2013 Ondrej Platek
diff --git a/egs/vystadial_cz/s5/local/create_LMs.sh b/egs/vystadial_cz/s5/local/create_LMs.sh
index 0b506a611d0..c8ed8026d7c 100755
--- a/egs/vystadial_cz/s5/local/create_LMs.sh
+++ b/egs/vystadial_cz/s5/local/create_LMs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/vystadial_cz/s5/local/create_phone_lists.sh b/egs/vystadial_cz/s5/local/create_phone_lists.sh
index 6dcb5a80fed..1b7dd1849b5 100755
--- a/egs/vystadial_cz/s5/local/create_phone_lists.sh
+++ b/egs/vystadial_cz/s5/local/create_phone_lists.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/vystadial_cz/s5/local/create_sample.sh b/egs/vystadial_cz/s5/local/create_sample.sh
index 80301f2d756..09f49e9f784 100755
--- a/egs/vystadial_cz/s5/local/create_sample.sh
+++ b/egs/vystadial_cz/s5/local/create_sample.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # example usage:
 # ./local/create_sample.sh /ha/projects/vystadial/data/asr/en/voip/ Results/vystadial-sample/ test 100
 # note that it suppose there are only *.wav and *.wav.trn and the 
diff --git a/egs/vystadial_cz/s5/local/data_split.sh b/egs/vystadial_cz/s5/local/data_split.sh
index 07d9fca8fa2..a083910d0e8 100755
--- a/egs/vystadial_cz/s5/local/data_split.sh
+++ b/egs/vystadial_cz/s5/local/data_split.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/vystadial_cz/s5/local/download_cs_data.sh b/egs/vystadial_cz/s5/local/download_cs_data.sh
index 3ce4ff2e25d..8094fe67a83 100755
--- a/egs/vystadial_cz/s5/local/download_cs_data.sh
+++ b/egs/vystadial_cz/s5/local/download_cs_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Ondrej Platek Apache 2.0
 
 DATA_ROOT=$1
diff --git a/egs/vystadial_cz/s5/local/export_models.sh b/egs/vystadial_cz/s5/local/export_models.sh
index 3226fcd1c8f..ad201f0ffd5 100755
--- a/egs/vystadial_cz/s5/local/export_models.sh
+++ b/egs/vystadial_cz/s5/local/export_models.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
diff --git a/egs/vystadial_cz/s5/local/prepare_cs_transcription.sh b/egs/vystadial_cz/s5/local/prepare_cs_transcription.sh
index 0aebfb85ce4..e3e48775340 100755
--- a/egs/vystadial_cz/s5/local/prepare_cs_transcription.sh
+++ b/egs/vystadial_cz/s5/local/prepare_cs_transcription.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 locdata=$1; shift
 locdict=$1; shift
diff --git a/egs/vystadial_cz/s5/local/save_check.sh b/egs/vystadial_cz/s5/local/save_check.sh
index f49db5d5621..50faae52856 100755
--- a/egs/vystadial_cz/s5/local/save_check.sh
+++ b/egs/vystadial_cz/s5/local/save_check.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/vystadial_cz/s5/local/score.sh b/egs/vystadial_cz/s5/local/score.sh
index d1a3b5878dd..18b63c84916 100755
--- a/egs/vystadial_cz/s5/local/score.sh
+++ b/egs/vystadial_cz/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #           2014  Mff UK, UFAL (modification: Ondrej Platek)
 # Apache 2.0
diff --git a/egs/vystadial_cz/s5/run.sh b/egs/vystadial_cz/s5/run.sh
index 472a55022cb..07c565b4157 100755
--- a/egs/vystadial_cz/s5/run.sh
+++ b/egs/vystadial_cz/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Ondrej Platek Apache 2.0
 renice 20 $$
 
diff --git a/egs/vystadial_cz/s5b/local/chain/compare_wer.sh b/egs/vystadial_cz/s5b/local/chain/compare_wer.sh
index 14ca1196e64..bd06d985fca 100755
--- a/egs/vystadial_cz/s5b/local/chain/compare_wer.sh
+++ b/egs/vystadial_cz/s5b/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copied from egs/mini_librispeech/s5/local/chain/compare_wer.sh (commit 421a062477d732fc02e2109b9d50857ae0f18661)
 
diff --git a/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh
index 844ccf80677..216213bcee4 100755
--- a/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Adapted from egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh
 
diff --git a/egs/vystadial_cz/s5b/local/nnet3/run_ivector_common.sh b/egs/vystadial_cz/s5b/local/nnet3/run_ivector_common.sh
index beecd9a46c2..bb3c37b575e 100755
--- a/egs/vystadial_cz/s5b/local/nnet3/run_ivector_common.sh
+++ b/egs/vystadial_cz/s5b/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -euo pipefail
 
diff --git a/egs/vystadial_cz/s5b/run.sh b/egs/vystadial_cz/s5b/run.sh
index f837b273466..8af3cb63c26 100755
--- a/egs/vystadial_cz/s5b/run.sh
+++ b/egs/vystadial_cz/s5b/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Change this location to somewhere where you want to put the data.
 data=$HOME/vystadial_cz
diff --git a/egs/vystadial_en/s5/env_voip_en.sh b/egs/vystadial_en/s5/env_voip_en.sh
index 0d334422cb5..3b29ea728df 100755
--- a/egs/vystadial_en/s5/env_voip_en.sh
+++ b/egs/vystadial_en/s5/env_voip_en.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # EVERY_N utterance is used for training
 # EVERY_N=3    ->   we use one third of training data
diff --git a/egs/vystadial_en/s5/local/backup.sh b/egs/vystadial_en/s5/local/backup.sh
index 13a6dd9c2d2..68aa7db960e 100755
--- a/egs/vystadial_en/s5/local/backup.sh
+++ b/egs/vystadial_en/s5/local/backup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
diff --git a/egs/vystadial_en/s5/local/create_G.sh b/egs/vystadial_en/s5/local/create_G.sh
index b462b9eab01..02f35cac3e3 100755
--- a/egs/vystadial_en/s5/local/create_G.sh
+++ b/egs/vystadial_en/s5/local/create_G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Vassil Panayotov
 #           2013 Ondrej Platek
diff --git a/egs/vystadial_en/s5/local/create_LMs.sh b/egs/vystadial_en/s5/local/create_LMs.sh
index 0b506a611d0..c8ed8026d7c 100755
--- a/egs/vystadial_en/s5/local/create_LMs.sh
+++ b/egs/vystadial_en/s5/local/create_LMs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/vystadial_en/s5/local/create_phone_lists.sh b/egs/vystadial_en/s5/local/create_phone_lists.sh
index 6dcb5a80fed..1b7dd1849b5 100755
--- a/egs/vystadial_en/s5/local/create_phone_lists.sh
+++ b/egs/vystadial_en/s5/local/create_phone_lists.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/vystadial_en/s5/local/create_sample.sh b/egs/vystadial_en/s5/local/create_sample.sh
index 80301f2d756..09f49e9f784 100755
--- a/egs/vystadial_en/s5/local/create_sample.sh
+++ b/egs/vystadial_en/s5/local/create_sample.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # example usage:
 # ./local/create_sample.sh /ha/projects/vystadial/data/asr/en/voip/ Results/vystadial-sample/ test 100
 # note that it suppose there are only *.wav and *.wav.trn and the 
diff --git a/egs/vystadial_en/s5/local/data_split.sh b/egs/vystadial_en/s5/local/data_split.sh
index 07d9fca8fa2..a083910d0e8 100755
--- a/egs/vystadial_en/s5/local/data_split.sh
+++ b/egs/vystadial_en/s5/local/data_split.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/vystadial_en/s5/local/download_en_data.sh b/egs/vystadial_en/s5/local/download_en_data.sh
index a85b51b0f2a..1f2d07f6eae 100755
--- a/egs/vystadial_en/s5/local/download_en_data.sh
+++ b/egs/vystadial_en/s5/local/download_en_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Ondrej Platek Apache 2.0
 
 DATA_ROOT=$1
diff --git a/egs/vystadial_en/s5/local/export_models.sh b/egs/vystadial_en/s5/local/export_models.sh
index 3226fcd1c8f..ad201f0ffd5 100755
--- a/egs/vystadial_en/s5/local/export_models.sh
+++ b/egs/vystadial_en/s5/local/export_models.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
diff --git a/egs/vystadial_en/s5/local/prepare_en_transcription.sh b/egs/vystadial_en/s5/local/prepare_en_transcription.sh
index dbebda23f07..3e6e8d401ec 100755
--- a/egs/vystadial_en/s5/local/prepare_en_transcription.sh
+++ b/egs/vystadial_en/s5/local/prepare_en_transcription.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 locdata=$1
 locdict=$2
diff --git a/egs/vystadial_en/s5/local/save_check.sh b/egs/vystadial_en/s5/local/save_check.sh
index f49db5d5621..50faae52856 100755
--- a/egs/vystadial_en/s5/local/save_check.sh
+++ b/egs/vystadial_en/s5/local/save_check.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2013, Ondrej Platek, Ufal MFF UK <oplatek@ufal.mff.cuni.cz>
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/vystadial_en/s5/local/score.sh b/egs/vystadial_en/s5/local/score.sh
index d1a3b5878dd..18b63c84916 100755
--- a/egs/vystadial_en/s5/local/score.sh
+++ b/egs/vystadial_en/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #           2014  Mff UK, UFAL (modification: Ondrej Platek)
 # Apache 2.0
diff --git a/egs/vystadial_en/s5/run.sh b/egs/vystadial_en/s5/run.sh
index ef746a723d9..8de52628857 100755
--- a/egs/vystadial_en/s5/run.sh
+++ b/egs/vystadial_en/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Ondrej Platek Apache 2.0
 renice 20 $$
 
diff --git a/egs/wsj/s5/local/append_utterances.sh b/egs/wsj/s5/local/append_utterances.sh
index e94c19d5cb7..f1c864b02a4 100755
--- a/egs/wsj/s5/local/append_utterances.sh
+++ b/egs/wsj/s5/local/append_utterances.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/wsj/s5/local/chain/compare_wer.sh b/egs/wsj/s5/local/chain/compare_wer.sh
index 924e417e55b..d90a856a752 100755
--- a/egs/wsj/s5/local/chain/compare_wer.sh
+++ b/egs/wsj/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/wsj/s5/local/chain/e2e/run_tdnn_flatstart.sh b/egs/wsj/s5/local/chain/e2e/run_tdnn_flatstart.sh
index 1ddb3c305ac..02ef0c52bee 100755
--- a/egs/wsj/s5/local/chain/e2e/run_tdnn_flatstart.sh
+++ b/egs/wsj/s5/local/chain/e2e/run_tdnn_flatstart.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script performs chain training in a flat-start manner
diff --git a/egs/wsj/s5/local/chain/e2e/run_tdnn_lstm_flatstart.sh b/egs/wsj/s5/local/chain/e2e/run_tdnn_lstm_flatstart.sh
index be82e80d5fe..92ea24cd58e 100755
--- a/egs/wsj/s5/local/chain/e2e/run_tdnn_lstm_flatstart.sh
+++ b/egs/wsj/s5/local/chain/e2e/run_tdnn_lstm_flatstart.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This is a TDNN-LSTM recipe that performs chain training in a flat-start manner
diff --git a/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1a.sh b/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1a.sh
index 4ab0cf58d53..a3ca58096c2 100755
--- a/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1a.sh
+++ b/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script performs chain training in a flat-start manner
diff --git a/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1b.sh b/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1b.sh
index 4e66fae8baa..93d21c55ccb 100755
--- a/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1b.sh
+++ b/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2019  Hossein Hadian
 
 # 1b is the same as 1a except it uses a better tree (which is
diff --git a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
index e656b67e529..a04aa51ec94 100755
--- a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This was modified from run_tdnn_1a.sh; it's the
diff --git a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
index 9db76e94430..b69cb1b1f05 100755
--- a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1b is like 1a, but converting the batch-norm layers in all but the CNN
diff --git a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh
index 36ec5bb61af..a08bccd9b2a 100755
--- a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1c is as 1b but taking the first layers from the cnn_tdnn_1a setup in mini_librispeech.
 # A little better than the baseline and overfits more.
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh
index 8d44db6f917..798c455e75e 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This was modified from run_tdnn_lstm_1a.sh, making similar
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh
index 544b9b04a0a..05cea76a345 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1f is as 1a but using --proportional-shrink=60.0
 
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh
index b268ed7feda..cbdf22cdde5 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1c is as 1b but using batchnorm instead of renorm
 # 1b is as 1a but using --proportional-shrink=60.0
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh
index d1a7f9d0663..979b55c6315 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1d is as 1c but introducing two non-splicing layers towards the beginning of
 #   the network.
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh
index e20069fbfa1..fe63c6ee2cb 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1e is as 1d but increasing num-jobs-final to 8 for greater speed,
 #   adding l2-regularize options and removing proportional-shrink.
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh
index 86df0779841..7517301d0a8 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1f is as 1e but a re-tuned model with fewer parameters and a bottleneck at the
 # end, and no chain l2-regularize
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh
index 9927a0c28d3..d52efb7a5ac 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1g is like 1f but upgrading to a "resnet-style TDNN-F model", i.e.
 #   with bypass resnet connections, and re-tuned.
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1h.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1h.sh
index 2e014eed970..53ef17ae299 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1h.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1h is as 1g but replaces the LDA layer at the input of the
 # network with traditional delta and delta-delta features.
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1i.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1i.sh
index e1bdc24b5d8..95f095d8f74 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1i.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1i is like 1h, while it introduces 'apply-cmvn-online' that does
 # cmn normalization both for i-extractor and TDNN input.
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index 6e4f220c1f2..1eb1091c25c 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # this is a TDNN+LSTM chain system.
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
index 2d113e58a93..bc8a658b512 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # 1b is like 1a but instead of having 3 fast-lstm-layers, having one
diff --git a/egs/wsj/s5/local/cstr_wsj_data_prep.sh b/egs/wsj/s5/local/cstr_wsj_data_prep.sh
index 755edda9fed..b91b00c3c9e 100755
--- a/egs/wsj/s5/local/cstr_wsj_data_prep.sh
+++ b/egs/wsj/s5/local/cstr_wsj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/wsj/s5/local/cstr_wsj_extend_dict.sh b/egs/wsj/s5/local/cstr_wsj_extend_dict.sh
index 8004db1d924..c8d643cf5ac 100755
--- a/egs/wsj/s5/local/cstr_wsj_extend_dict.sh
+++ b/egs/wsj/s5/local/cstr_wsj_extend_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script builds a larger word-list and dictionary 
 # than used for the LMs supplied with the WSJ corpus.
diff --git a/egs/wsj/s5/local/e2e/run_end2end_char.sh b/egs/wsj/s5/local/e2e/run_end2end_char.sh
index ff44802f2be..fcb428bf0b8 100755
--- a/egs/wsj/s5/local/e2e/run_end2end_char.sh
+++ b/egs/wsj/s5/local/e2e/run_end2end_char.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017    Hossein Hadian
 
 # This top-level script demonstrates character-based end-to-end LF-MMI training
diff --git a/egs/wsj/s5/local/e2e/run_end2end_phone.sh b/egs/wsj/s5/local/e2e/run_end2end_phone.sh
index 3d33a4a57b5..10ba58623d5 100755
--- a/egs/wsj/s5/local/e2e/run_end2end_phone.sh
+++ b/egs/wsj/s5/local/e2e/run_end2end_phone.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017    Hossein Hadian
 
 # This top-level script demonstrates end-to-end LF-MMI training (specifically
diff --git a/egs/wsj/s5/local/generate_example_kws.sh b/egs/wsj/s5/local/generate_example_kws.sh
index 2c849438192..ecba20efbf1 100755
--- a/egs/wsj/s5/local/generate_example_kws.sh
+++ b/egs/wsj/s5/local/generate_example_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/wsj/s5/local/kws_data_prep.sh b/egs/wsj/s5/local/kws_data_prep.sh
index 5222a88c9ef..fecfda52473 100755
--- a/egs/wsj/s5/local/kws_data_prep.sh
+++ b/egs/wsj/s5/local/kws_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/wsj/s5/local/nnet/run_dnn.sh b/egs/wsj/s5/local/nnet/run_dnn.sh
index 89db68340f4..dcee1259763 100755
--- a/egs/wsj/s5/local/nnet/run_dnn.sh
+++ b/egs/wsj/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/wsj/s5/local/nnet2/run_5b.sh b/egs/wsj/s5/local/nnet2/run_5b.sh
index ae45d73d8ce..14c7cbf12fd 100755
--- a/egs/wsj/s5/local/nnet2/run_5b.sh
+++ b/egs/wsj/s5/local/nnet2/run_5b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 stage=0
diff --git a/egs/wsj/s5/local/nnet2/run_5b_gpu.sh b/egs/wsj/s5/local/nnet2/run_5b_gpu.sh
index c1faf3e5d4f..2f1bc330d71 100755
--- a/egs/wsj/s5/local/nnet2/run_5b_gpu.sh
+++ b/egs/wsj/s5/local/nnet2/run_5b_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 stage=0
diff --git a/egs/wsj/s5/local/nnet2/run_5c.sh b/egs/wsj/s5/local/nnet2/run_5c.sh
index e3fb48f2ff5..bfe361e99af 100755
--- a/egs/wsj/s5/local/nnet2/run_5c.sh
+++ b/egs/wsj/s5/local/nnet2/run_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is neural net training on top of adapted 40-dimensional features.
 #
diff --git a/egs/wsj/s5/local/nnet2/run_5c2_gpu.sh b/egs/wsj/s5/local/nnet2/run_5c2_gpu.sh
index 3d4b070ceda..be4768fbafb 100755
--- a/egs/wsj/s5/local/nnet2/run_5c2_gpu.sh
+++ b/egs/wsj/s5/local/nnet2/run_5c2_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is neural net training on top of adapted 40-dimensional features.
 # This is an alternative to the run_5c_gpu.sh that will train faster if you
diff --git a/egs/wsj/s5/local/nnet2/run_5d.sh b/egs/wsj/s5/local/nnet2/run_5d.sh
index 131a380ec87..10d393fe61b 100755
--- a/egs/wsj/s5/local/nnet2/run_5d.sh
+++ b/egs/wsj/s5/local/nnet2/run_5d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is pnorm neural net training on top of adapted 40-dimensional features.
 
diff --git a/egs/wsj/s5/local/nnet2/run_5e_gpu.sh b/egs/wsj/s5/local/nnet2/run_5e_gpu.sh
index b727acb905e..0162c7d877a 100755
--- a/egs/wsj/s5/local/nnet2/run_5e_gpu.sh
+++ b/egs/wsj/s5/local/nnet2/run_5e_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This is an ensemble training recipe using pnorm neural nets on top of adapted 40-dimensional features.
 ensemble_size=4
diff --git a/egs/wsj/s5/local/nnet2/run_6c_gpu.sh b/egs/wsj/s5/local/nnet2/run_6c_gpu.sh
index ddd2d3e2a86..464f72da070 100755
--- a/egs/wsj/s5/local/nnet2/run_6c_gpu.sh
+++ b/egs/wsj/s5/local/nnet2/run_6c_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script demonstrates discriminative training of neural nets.  It's on top
diff --git a/egs/wsj/s5/local/nnet2/run_6d.sh b/egs/wsj/s5/local/nnet2/run_6d.sh
index c208404e7e5..544fff0c132 100755
--- a/egs/wsj/s5/local/nnet2/run_6d.sh
+++ b/egs/wsj/s5/local/nnet2/run_6d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script demonstrates discriminative training of p-norm neural nets.  It's on top
diff --git a/egs/wsj/s5/local/nnet2/run_6d_gpu.sh b/egs/wsj/s5/local/nnet2/run_6d_gpu.sh
index 3ee2ecb53a3..e5dcc8bb073 100755
--- a/egs/wsj/s5/local/nnet2/run_6d_gpu.sh
+++ b/egs/wsj/s5/local/nnet2/run_6d_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This script demonstrates discriminative training of p-norm neural nets.  It's on top
diff --git a/egs/wsj/s5/local/nnet2/run_bnf.sh b/egs/wsj/s5/local/nnet2/run_bnf.sh
index 245bb1c0bcf..2091531bdcc 100644
--- a/egs/wsj/s5/local/nnet2/run_bnf.sh
+++ b/egs/wsj/s5/local/nnet2/run_bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Note: In order to run BNF, run run_bnf.sh
 . ./path.sh
diff --git a/egs/wsj/s5/local/nnet2/run_pnorm_bnf.sh b/egs/wsj/s5/local/nnet2/run_pnorm_bnf.sh
index 861e993774b..56fd1426200 100755
--- a/egs/wsj/s5/local/nnet2/run_pnorm_bnf.sh
+++ b/egs/wsj/s5/local/nnet2/run_pnorm_bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Note: In order to run BNF, run run_bnf.sh
 use_gpu=true
diff --git a/egs/wsj/s5/local/nnet3/compare_wer.sh b/egs/wsj/s5/local/nnet3/compare_wer.sh
index 7a2fbd8a123..605c870a264 100755
--- a/egs/wsj/s5/local/nnet3/compare_wer.sh
+++ b/egs/wsj/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/nnet3/compare_wer.sh exp/nnet3/tdnn_{c,d}_sp
diff --git a/egs/wsj/s5/local/nnet3/run_ivector_common.sh b/egs/wsj/s5/local/nnet3/run_ivector_common.sh
index f7f51a45a29..98ff6ff5896 100755
--- a/egs/wsj/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/wsj/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
@@ -77,17 +77,12 @@ fi
 
 
 # high-resolution features and i-vector extractor,
-if [ $stage -le 5 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then
+if [ $stage -le 4 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then
   echo "$0: data/${train_set}_sp_hires/feats.scp already exists."
   echo " ... Please either remove it, or rerun this script with stage > 2."
   exit 1
 fi
 
-if [ $stage -le 4 ]; then
-  echo "$0: preparing directory for speed-perturbed data"
-  utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
-fi
-
 if [ $stage -le 5 ]; then
   echo "$0: creating high-resolution MFCC features"
 
diff --git a/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh
index 91c6be5fc22..b6eb26015a6 100755
--- a/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh
+++ b/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -o pipefail
 set -e
diff --git a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_1a.sh b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_1a.sh
index 3d3adf16ae8..0465e5b9487 100755
--- a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #    This is the standard "tdnn" system, built in nnet3 with xconfigs.
 
diff --git a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_1b.sh b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_1b.sh
index 82efcda2173..03c64517bf6 100755
--- a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_1b.sh
+++ b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 1b is like 1a, but using a different splicing setup; the difference
 # is like the difference from
diff --git a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
index 6369fdc3fed..e33e90648ca 100755
--- a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_tdnn_lstm_1a.sh is a TDNN+LSTM system.  Compare with the TDNN
diff --git a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh
index 6b1f98f04e7..432dfdfcc73 100755
--- a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh
+++ b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 ### This script is not tested ###
 
 # This script does discriminative training on top of CE nnet3 system.
diff --git a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh
index f2a4ed37ae5..e1b2ddf9189 100755
--- a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh
+++ b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # run_tdnn_lstm_lfr_1a.sh is modified from the same named script
diff --git a/egs/wsj/s5/local/online/run_nnet2.sh b/egs/wsj/s5/local/online/run_nnet2.sh
index 2531e24fd2d..51a60956f79 100755
--- a/egs/wsj/s5/local/online/run_nnet2.sh
+++ b/egs/wsj/s5/local/online/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this is our online-nnet2 build.  it's a "multi-splice" system (i.e. we have
 # splicing at various layers), with p-norm nonlinearities.  We use the "accel2"
diff --git a/egs/wsj/s5/local/online/run_nnet2_baseline.sh b/egs/wsj/s5/local/online/run_nnet2_baseline.sh
index 6175776c6af..4d99dab8f61 100755
--- a/egs/wsj/s5/local/online/run_nnet2_baseline.sh
+++ b/egs/wsj/s5/local/online/run_nnet2_baseline.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/wsj/s5/local/online/run_nnet2_common.sh b/egs/wsj/s5/local/online/run_nnet2_common.sh
index 9e64bd703d7..f1beac24199 100755
--- a/egs/wsj/s5/local/online/run_nnet2_common.sh
+++ b/egs/wsj/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is called from scripts like run_nnet2.sh; it does
 # the common stages of the build.
diff --git a/egs/wsj/s5/local/online/run_nnet2_discriminative.sh b/egs/wsj/s5/local/online/run_nnet2_discriminative.sh
index 612a2df78e5..3d2dbc41933 100755
--- a/egs/wsj/s5/local/online/run_nnet2_discriminative.sh
+++ b/egs/wsj/s5/local/online/run_nnet2_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # This is discriminative training, to be run after run_nnet2.sh.
diff --git a/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh b/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh
index bfc28a4802b..ec609045767 100755
--- a/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh
+++ b/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 #           2014  Tom Ko
 # Apache 2.0
diff --git a/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh b/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
index 421da755f03..30e573fc02d 100755
--- a/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
+++ b/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh b/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
index 8fe50b699cf..e35d0fd7fce 100755
--- a/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
+++ b/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh b/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
index d3b7a97e980..f7e15f6ca62 100755
--- a/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
+++ b/egs/wsj/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2017  Hainan Xu
diff --git a/egs/wsj/s5/local/rnnlm/tuning/run_tdnn_a.sh b/egs/wsj/s5/local/rnnlm/tuning/run_tdnn_a.sh
index c6eec2f98ec..efc05aebe0a 100755
--- a/egs/wsj/s5/local/rnnlm/tuning/run_tdnn_a.sh
+++ b/egs/wsj/s5/local/rnnlm/tuning/run_tdnn_a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2015  Guoguo Chen
diff --git a/egs/wsj/s5/local/run_basis_fmllr.sh b/egs/wsj/s5/local/run_basis_fmllr.sh
index 93c00246b25..590ec0e9f13 100755
--- a/egs/wsj/s5/local/run_basis_fmllr.sh
+++ b/egs/wsj/s5/local/run_basis_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 lang_suffix=
 
diff --git a/egs/wsj/s5/local/run_bnf.sh b/egs/wsj/s5/local/run_bnf.sh
index 4bd2f6f8399..293e58deb60 100644
--- a/egs/wsj/s5/local/run_bnf.sh
+++ b/egs/wsj/s5/local/run_bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Note: In order to run BNF, run run_bnf.sh
 . ./path.sh
diff --git a/egs/wsj/s5/local/run_bnf_sgmm.sh b/egs/wsj/s5/local/run_bnf_sgmm.sh
index 8e2aadf214b..afabe6cc43f 100644
--- a/egs/wsj/s5/local/run_bnf_sgmm.sh
+++ b/egs/wsj/s5/local/run_bnf_sgmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script builds the SGMM system on top of the kaldi internal bottleneck features.
 
diff --git a/egs/wsj/s5/local/run_deltas.sh b/egs/wsj/s5/local/run_deltas.sh
index aca869e63fb..cb0ef4de096 100644
--- a/egs/wsj/s5/local/run_deltas.sh
+++ b/egs/wsj/s5/local/run_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/local/run_gender_dep.sh b/egs/wsj/s5/local/run_gender_dep.sh
index 050474d70b6..5f20194f0fc 100755
--- a/egs/wsj/s5/local/run_gender_dep.sh
+++ b/egs/wsj/s5/local/run_gender_dep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is not really finished, all it does is train a model with its
 # means adapted to the female data, to demonstrate MAP adaptation.  To have real
diff --git a/egs/wsj/s5/local/run_mmi_tri4b.sh b/egs/wsj/s5/local/run_mmi_tri4b.sh
index 2f05eddd884..6639b7cfa9c 100755
--- a/egs/wsj/s5/local/run_mmi_tri4b.sh
+++ b/egs/wsj/s5/local/run_mmi_tri4b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 . ./cmd.sh
 
 steps/make_denlats.sh --nj 30 --sub-split 30 --cmd "$train_cmd" \
diff --git a/egs/wsj/s5/local/run_nnet2.sh b/egs/wsj/s5/local/run_nnet2.sh
index 728356e3ec7..a3f96840c2b 100755
--- a/egs/wsj/s5/local/run_nnet2.sh
+++ b/egs/wsj/s5/local/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 
diff --git a/egs/wsj/s5/local/run_raw_fmllr.sh b/egs/wsj/s5/local/run_raw_fmllr.sh
index 69f716b80f5..2771543cf61 100644
--- a/egs/wsj/s5/local/run_raw_fmllr.sh
+++ b/egs/wsj/s5/local/run_raw_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/align_raw_fmllr.sh --nj 10 --cmd "$train_cmd" --use-graphs true \
diff --git a/egs/wsj/s5/local/run_rnnlms.sh b/egs/wsj/s5/local/run_rnnlms.sh
index 6f2be505ec1..50476be06f3 100755
--- a/egs/wsj/s5/local/run_rnnlms.sh
+++ b/egs/wsj/s5/local/run_rnnlms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./cmd.sh
 . ./path.sh
diff --git a/egs/wsj/s5/local/run_segmentation.sh b/egs/wsj/s5/local/run_segmentation.sh
index 3c0b8e5b0a8..6334e78898d 100755
--- a/egs/wsj/s5/local/run_segmentation.sh
+++ b/egs/wsj/s5/local/run_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/wsj/s5/local/run_segmentation_long_utts.sh b/egs/wsj/s5/local/run_segmentation_long_utts.sh
index b2f4362edcb..1cfded97d47 100644
--- a/egs/wsj/s5/local/run_segmentation_long_utts.sh
+++ b/egs/wsj/s5/local/run_segmentation_long_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 # Apache 2.0
diff --git a/egs/wsj/s5/local/run_sgmm2.sh b/egs/wsj/s5/local/run_sgmm2.sh
index f391797ee58..34132403434 100755
--- a/egs/wsj/s5/local/run_sgmm2.sh
+++ b/egs/wsj/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is invoked from ../run.sh
 # It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
diff --git a/egs/wsj/s5/local/run_vtln.sh b/egs/wsj/s5/local/run_vtln.sh
index 1a341fab142..bd54b6f9b31 100755
--- a/egs/wsj/s5/local/run_vtln.sh
+++ b/egs/wsj/s5/local/run_vtln.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 lang_suffix=
 
diff --git a/egs/wsj/s5/local/run_vtln2.sh b/egs/wsj/s5/local/run_vtln2.sh
index a1ec15978bc..581afb8e058 100755
--- a/egs/wsj/s5/local/run_vtln2.sh
+++ b/egs/wsj/s5/local/run_vtln2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 lang_suffix=
 
diff --git a/egs/wsj/s5/local/score_combine.sh b/egs/wsj/s5/local/score_combine.sh
index 65caab06ecc..c4d3c13886a 100755
--- a/egs/wsj/s5/local/score_combine.sh
+++ b/egs/wsj/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/wsj/s5/local/score_mbr.sh b/egs/wsj/s5/local/score_mbr.sh
index 04b84ccce5a..8c752368906 100755
--- a/egs/wsj/s5/local/score_mbr.sh
+++ b/egs/wsj/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Script for minimum bayes risk decoding.
 
diff --git a/egs/wsj/s5/local/wer_hyp_filter b/egs/wsj/s5/local/wer_hyp_filter
index 939cd08720d..2c841afca60 100755
--- a/egs/wsj/s5/local/wer_hyp_filter
+++ b/egs/wsj/s5/local/wer_hyp_filter
@@ -1,4 +1,5 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
 s:<NOISE>::g
 s:<SPOKEN_NOISE>::g
 s:<UNK>::g
@@ -8,4 +9,5 @@ s/-HOLDER/HOLDER/g
 s/COMPAIGN/CAMPAIGN/g
 s/APPROACHES-/APPROACHES/g
 s/RESEACHERS/RESEARCHERS/g
+'
 
diff --git a/egs/wsj/s5/local/wer_output_filter b/egs/wsj/s5/local/wer_output_filter
index 939cd08720d..c52e9fa7e10 100755
--- a/egs/wsj/s5/local/wer_output_filter
+++ b/egs/wsj/s5/local/wer_output_filter
@@ -1,4 +1,5 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
 s:<NOISE>::g
 s:<SPOKEN_NOISE>::g
 s:<UNK>::g
@@ -8,4 +9,4 @@ s/-HOLDER/HOLDER/g
 s/COMPAIGN/CAMPAIGN/g
 s/APPROACHES-/APPROACHES/g
 s/RESEACHERS/RESEARCHERS/g
-
+'
diff --git a/egs/wsj/s5/local/wer_ref_filter b/egs/wsj/s5/local/wer_ref_filter
index 939cd08720d..2c841afca60 100755
--- a/egs/wsj/s5/local/wer_ref_filter
+++ b/egs/wsj/s5/local/wer_ref_filter
@@ -1,4 +1,5 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
 s:<NOISE>::g
 s:<SPOKEN_NOISE>::g
 s:<UNK>::g
@@ -8,4 +9,5 @@ s/-HOLDER/HOLDER/g
 s/COMPAIGN/CAMPAIGN/g
 s/APPROACHES-/APPROACHES/g
 s/RESEACHERS/RESEARCHERS/g
+'
 
diff --git a/egs/wsj/s5/local/wsj_data_prep.sh b/egs/wsj/s5/local/wsj_data_prep.sh
index 04f2f6390d8..844fb449949 100755
--- a/egs/wsj/s5/local/wsj_data_prep.sh
+++ b/egs/wsj/s5/local/wsj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/wsj/s5/local/wsj_extend_char_dict.sh b/egs/wsj/s5/local/wsj_extend_char_dict.sh
index 3419fdc10e5..c7c4882acee 100755
--- a/egs/wsj/s5/local/wsj_extend_char_dict.sh
+++ b/egs/wsj/s5/local/wsj_extend_char_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017   Hossein Hadian
 
 # This script extends the word list by including OOVs from the training
diff --git a/egs/wsj/s5/local/wsj_extend_dict.sh b/egs/wsj/s5/local/wsj_extend_dict.sh
index c2b11b8dc8b..dc6ac81423d 100755
--- a/egs/wsj/s5/local/wsj_extend_dict.sh
+++ b/egs/wsj/s5/local/wsj_extend_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script builds a larger word-list and dictionary
 # than used for the LMs supplied with the WSJ corpus.
diff --git a/egs/wsj/s5/local/wsj_format_data.sh b/egs/wsj/s5/local/wsj_format_data.sh
index 897b904db83..d20b5272084 100755
--- a/egs/wsj/s5/local/wsj_format_data.sh
+++ b/egs/wsj/s5/local/wsj_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 #           2015  Guoguo Chen
diff --git a/egs/wsj/s5/local/wsj_format_local_lms.sh b/egs/wsj/s5/local/wsj_format_local_lms.sh
index c415a806fff..c91aa1257e5 100755
--- a/egs/wsj/s5/local/wsj_format_local_lms.sh
+++ b/egs/wsj/s5/local/wsj_format_local_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012
 #           Guoguo Chen 2014
diff --git a/egs/wsj/s5/local/wsj_prepare_char_dict.sh b/egs/wsj/s5/local/wsj_prepare_char_dict.sh
index d2ff3afd851..a44e7b91dca 100755
--- a/egs/wsj/s5/local/wsj_prepare_char_dict.sh
+++ b/egs/wsj/s5/local/wsj_prepare_char_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Hossein Hadian
 
diff --git a/egs/wsj/s5/local/wsj_prepare_dict.sh b/egs/wsj/s5/local/wsj_prepare_dict.sh
index c644f91bc6e..8ae8233878f 100755
--- a/egs/wsj/s5/local/wsj_prepare_dict.sh
+++ b/egs/wsj/s5/local/wsj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2010-2012 Microsoft Corporation  
 #           2012-2014 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/wsj/s5/local/wsj_train_lms.sh b/egs/wsj/s5/local/wsj_train_lms.sh
index 0807210be18..a5aaf415e44 100755
--- a/egs/wsj/s5/local/wsj_train_lms.sh
+++ b/egs/wsj/s5/local/wsj_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script trains LMs on the WSJ LM-training data.
 # It requires that you have already run wsj_extend_dict.sh,
diff --git a/egs/wsj/s5/local/wsj_train_rnnlms.sh b/egs/wsj/s5/local/wsj_train_rnnlms.sh
index 6a7c08ed55a..a1be5f0bae2 100755
--- a/egs/wsj/s5/local/wsj_train_rnnlms.sh
+++ b/egs/wsj/s5/local/wsj_train_rnnlms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
 #           2015  Guoguo Chen
diff --git a/egs/wsj/s5/run.sh b/egs/wsj/s5/run.sh
index 3f7737240a2..c4e9326a374 100755
--- a/egs/wsj/s5/run.sh
+++ b/egs/wsj/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 stage=0
 train=true   # set to false to disable the training-related scripts
diff --git a/egs/wsj/s5/steps/align_basis_fmllr.sh b/egs/wsj/s5/steps/align_basis_fmllr.sh
index e5510c5ab7e..c81e41f384d 100755
--- a/egs/wsj/s5/steps/align_basis_fmllr.sh
+++ b/egs/wsj/s5/steps/align_basis_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2013  GoVivace Inc (Author: Nagendra Goel)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/align_basis_fmllr_lats.sh b/egs/wsj/s5/steps/align_basis_fmllr_lats.sh
index 426168496cc..357d21c2c84 100755
--- a/egs/wsj/s5/steps/align_basis_fmllr_lats.sh
+++ b/egs/wsj/s5/steps/align_basis_fmllr_lats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/align_fmllr.sh b/egs/wsj/s5/steps/align_fmllr.sh
index 327978e680f..0d361d10d89 100755
--- a/egs/wsj/s5/steps/align_fmllr.sh
+++ b/egs/wsj/s5/steps/align_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/align_fmllr_lats.sh b/egs/wsj/s5/steps/align_fmllr_lats.sh
index b331b40d73c..0d141132b00 100755
--- a/egs/wsj/s5/steps/align_fmllr_lats.sh
+++ b/egs/wsj/s5/steps/align_fmllr_lats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/align_lvtln.sh b/egs/wsj/s5/steps/align_lvtln.sh
index 9efba2b9096..86525342f86 100755
--- a/egs/wsj/s5/steps/align_lvtln.sh
+++ b/egs/wsj/s5/steps/align_lvtln.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar
 
diff --git a/egs/wsj/s5/steps/align_raw_fmllr.sh b/egs/wsj/s5/steps/align_raw_fmllr.sh
index 639dde559a4..13e4ed66cf3 100755
--- a/egs/wsj/s5/steps/align_raw_fmllr.sh
+++ b/egs/wsj/s5/steps/align_raw_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/align_sgmm2.sh b/egs/wsj/s5/steps/align_sgmm2.sh
index d2f829f7e3e..e569a7655d8 100755
--- a/egs/wsj/s5/steps/align_sgmm2.sh
+++ b/egs/wsj/s5/steps/align_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/align_si.sh b/egs/wsj/s5/steps/align_si.sh
index 0bfebe6b0fc..347f310914b 100755
--- a/egs/wsj/s5/steps/align_si.sh
+++ b/egs/wsj/s5/steps/align_si.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/best_path_weights.sh b/egs/wsj/s5/steps/best_path_weights.sh
index d34d574173f..1176f981a96 100755
--- a/egs/wsj/s5/steps/best_path_weights.sh
+++ b/egs/wsj/s5/steps/best_path_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014-17 Vimal Manohar
 
diff --git a/egs/wsj/s5/steps/chain2 b/egs/wsj/s5/steps/chain2
new file mode 120000
index 00000000000..cf32f42661d
--- /dev/null
+++ b/egs/wsj/s5/steps/chain2
@@ -0,0 +1 @@
+nnet3/chain2
\ No newline at end of file
diff --git a/egs/wsj/s5/steps/cleanup/clean_and_segment_data.sh b/egs/wsj/s5/steps/cleanup/clean_and_segment_data.sh
index fb386fa244f..a4479b2a320 100755
--- a/egs/wsj/s5/steps/cleanup/clean_and_segment_data.sh
+++ b/egs/wsj/s5/steps/cleanup/clean_and_segment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh b/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh
index cc8da298d2f..6854e9f09c1 100755
--- a/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh
+++ b/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 #           2016  Johns Hopkins University (author: Daniel Povey)
@@ -132,7 +132,7 @@ if [ $stage -le 1 ]; then
   echo "$0: Building biased-language-model decoding graphs..."
 
 
-  steps/cleanup/make_biased_lm_graphs.sh $graph_opts \
+  steps/cleanup/make_biased_lm_graphs.sh $graph_opts --scale-opts "$scale_opts" \
     --nj $nj --cmd "$cmd" \
      $data $lang $dir $dir/graphs
 fi
diff --git a/egs/wsj/s5/steps/cleanup/debug_lexicon.sh b/egs/wsj/s5/steps/cleanup/debug_lexicon.sh
index eca807ad247..183875eab0e 100755
--- a/egs/wsj/s5/steps/cleanup/debug_lexicon.sh
+++ b/egs/wsj/s5/steps/cleanup/debug_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/cleanup/decode_fmllr_segmentation.sh b/egs/wsj/s5/steps/cleanup/decode_fmllr_segmentation.sh
index d1297ccd836..b0e2fa16fd6 100755
--- a/egs/wsj/s5/steps/cleanup/decode_fmllr_segmentation.sh
+++ b/egs/wsj/s5/steps/cleanup/decode_fmllr_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen, 2015 GoVivace Inc. (Nagendra Goel)
 #           2017  Vimal Manohar
diff --git a/egs/wsj/s5/steps/cleanup/decode_segmentation.sh b/egs/wsj/s5/steps/cleanup/decode_segmentation.sh
index 628741e1e7c..fa4972ab898 100755
--- a/egs/wsj/s5/steps/cleanup/decode_segmentation.sh
+++ b/egs/wsj/s5/steps/cleanup/decode_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen, 2015 GoVivace Inc. (Nagendra Goel)
 #           2017  Vimal Manohar
diff --git a/egs/wsj/s5/steps/cleanup/decode_segmentation_nnet3.sh b/egs/wsj/s5/steps/cleanup/decode_segmentation_nnet3.sh
index 02a9d87d26b..3fdeb5cf664 100755
--- a/egs/wsj/s5/steps/cleanup/decode_segmentation_nnet3.sh
+++ b/egs/wsj/s5/steps/cleanup/decode_segmentation_nnet3.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen, 2015 GoVivace Inc. (Nagendra Goel)
 #           2017  Vimal Manohar
diff --git a/egs/wsj/s5/steps/cleanup/find_bad_utts.sh b/egs/wsj/s5/steps/cleanup/find_bad_utts.sh
index 9bb67abeff9..b84e0f02ed0 100755
--- a/egs/wsj/s5/steps/cleanup/find_bad_utts.sh
+++ b/egs/wsj/s5/steps/cleanup/find_bad_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/cleanup/find_bad_utts_nnet.sh b/egs/wsj/s5/steps/cleanup/find_bad_utts_nnet.sh
index b18efe35a3c..f92149a3bb6 100755
--- a/egs/wsj/s5/steps/cleanup/find_bad_utts_nnet.sh
+++ b/egs/wsj/s5/steps/cleanup/find_bad_utts_nnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey)
 #           2016       Api.ai (Author: Ilya Platonov)      
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/cleanup/make_biased_lm_graphs.sh b/egs/wsj/s5/steps/cleanup/make_biased_lm_graphs.sh
index d957ce4d5c7..c1ed3747ce6 100755
--- a/egs/wsj/s5/steps/cleanup/make_biased_lm_graphs.sh
+++ b/egs/wsj/s5/steps/cleanup/make_biased_lm_graphs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2016     Johns Hopkins University (Author: Daniel Povey)
 #                2016     Vimal Manohar
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/cleanup/make_segmentation_data_dir.sh b/egs/wsj/s5/steps/cleanup/make_segmentation_data_dir.sh
index 62e8dc7de45..30af36255d9 100755
--- a/egs/wsj/s5/steps/cleanup/make_segmentation_data_dir.sh
+++ b/egs/wsj/s5/steps/cleanup/make_segmentation_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/cleanup/make_segmentation_graph.sh b/egs/wsj/s5/steps/cleanup/make_segmentation_graph.sh
index 6705ab6db54..3d60cad3325 100755
--- a/egs/wsj/s5/steps/cleanup/make_segmentation_graph.sh
+++ b/egs/wsj/s5/steps/cleanup/make_segmentation_graph.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/cleanup/make_utterance_graph.sh b/egs/wsj/s5/steps/cleanup/make_utterance_graph.sh
index 277c5a2da1c..0c55d4afd6e 100755
--- a/egs/wsj/s5/steps/cleanup/make_utterance_graph.sh
+++ b/egs/wsj/s5/steps/cleanup/make_utterance_graph.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh b/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh
index 92c575d0740..5871f5a5a4a 100755
--- a/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh
+++ b/egs/wsj/s5/steps/cleanup/segment_long_utterances.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 #           2016  Vimal Manohar
diff --git a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh
index f0df1e7730c..05d34804906 100755
--- a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh
+++ b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 #           2016  Vimal Manohar
diff --git a/egs/wsj/s5/steps/cleanup/split_long_utterance.sh b/egs/wsj/s5/steps/cleanup/split_long_utterance.sh
index 6f3637ed112..a6d3f58d3fa 100755
--- a/egs/wsj/s5/steps/cleanup/split_long_utterance.sh
+++ b/egs/wsj/s5/steps/cleanup/split_long_utterance.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/combine_ali_dirs.sh b/egs/wsj/s5/steps/combine_ali_dirs.sh
index b74b004cac6..9f2b4d3cc94 100755
--- a/egs/wsj/s5/steps/combine_ali_dirs.sh
+++ b/egs/wsj/s5/steps/combine_ali_dirs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2016  Xiaohui Zhang  Apache 2.0.
 # Copyright 2019  SmartAction (kkm)
 
@@ -166,10 +166,13 @@ do_combine() {
   # Merge (presumed already sorted) scp's into a single script.
   sort -m $temp_dir/$ark.*.scp > $temp_dir/$ark.scp || exit 1
 
+  inputs=$(for n in `seq $nj`; do echo $temp_dir/$ark.$n.scp; done)
+  utils/split_scp.pl --utt2spk=$data/utt2spk $temp_dir/$ark.scp $inputs
+
   echo "$0: Splitting combined $entities into $nj archives on speaker boundary."
   $cmd JOB=1:$nj $dest/log/chop_combined_$entities.JOB.log \
     $copy_program \
-      "scp:utils/split_scp.pl --utt2spk=$data/utt2spk --one-based -j $nj JOB $temp_dir/$ark.scp |" \
+      "scp:$temp_dir/$ark.JOB.scp" \
       "ark:| gzip -c > $dest/$ark.JOB.gz" || exit 1
 
   # Get some interesting stats, and signal an error if error threshold exceeded.
diff --git a/egs/wsj/s5/steps/combine_trans_dirs.sh b/egs/wsj/s5/steps/combine_trans_dirs.sh
new file mode 100644
index 00000000000..821bd0aa021
--- /dev/null
+++ b/egs/wsj/s5/steps/combine_trans_dirs.sh
@@ -0,0 +1,133 @@
+#!/usr/bin/env bash
+# Copyright 2016  Xiaohui Zhang  Apache 2.0.
+# Copyright 2019  SmartAction (kkm)
+# Copyright 2019  manhong wang (marvin)
+
+# This script only combines transform file in the aligments dirs, egs: trans.1,  and
+# validates matching of the utterances and alignments after combining. you would need this fmllr trans
+# files after you combine ali or lat dirs(combine_ali_dirs.sh or combine_lat_dis.sh).
+
+# Begin configuration section.
+cmd=run.pl
+tolerance=10
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging.
+
+[[ -f path.sh ]] && . ./path.sh
+. parse_options.sh || exit 1
+
+export LC_ALL=C
+
+if [[ $# -lt 3 ]]; then
+  cat >&2 <<EOF
+Usage: $0 [options] <data> <dest-dir> <src-dir1> <src-dir2> ...
+ e.g.: $0 data/train exp/tri3_trans_combined exp/tri3_trans_1 exp_tri3_trans_2
+Options:
+ --tolerance <int,%>    # maximum percentage of missing trans
+                        # w.r.t. total utterances in <data> before error is
+                        # reported [10]
+
+Note:we do not checks that certain important files are present and compatible in all
+source directories (phones.txt, tree) here.Because you would run combine_trans_dirs.sh 
+or combine_lat_dis.sh first.
+
+EOF
+  exit 1;
+fi
+
+
+data=$1
+dest=$2
+shift 2
+first_src=$1
+
+do_trans=true    
+
+
+# All checks passed, ok to prepare directory. but we do not Copy model and other files from
+# the first source.
+
+for src in $@; do
+  if [[ "$(cd 2>/dev/null -P -- "$src" && pwd)" = \
+        "$(cd 2>/dev/null -P -- "$dest" && pwd)" ]]; then
+    echo "$0: error: Source $src is same as target $dest."
+    exit 1
+  fi
+  if $do_trans && [[ ! -f $src/trans.1 ]]; then
+    echo "$0: warning: transform (trans.*) are not present in $src, not" \
+         "combining. please check you files" 
+    exit 1
+  fi
+done
+
+if [ ! -f $dest/ali.1.gz  ] && [ ! -f $dest/lat.1.gz ] ; then 
+    echo "$0: warning: we assume you have combined the ali or lat dirs " \
+         "please run combine_ali_dir.sh or combine_lat_dir.sh firstly"
+    exit 1
+fi
+
+nj=$(cat $dest/num_jobs)
+
+if [ -f $dest/trans.1 ] ; then rm $dest/trans.* ;fi    #remove old trans.*
+
+# Make temporary directory, delete on signal, but not on 'exit 1'.
+temp_dir=$(mktemp -d $dest/temp.XXXXXX) || exit 1
+cleanup() { rm -rf "$temp_dir"; }
+trap cleanup HUP INT TERM
+echo "$0: note: Temporary directory $temp_dir will not be deleted in case of" \
+     "script failure, so you could examine it for troubleshooting."
+
+do_combine_trans() {
+  local ark=$1 entities=$2 copy_program=$3
+  shift 3
+
+  echo "$0: Gathering $entities from each source directory."
+  # Assign all source gzipped archive names to an exported variable, one each
+  # per source directory, so that we can copy archives in a job per source.
+  src_id=0
+  for src in $@; do
+    src_id=$((src_id + 1))
+    nj_src=$(cat $src/num_jobs) || exit 1
+    # Create and export variable src_arcs_${src_id} for the job runner.
+    # Each numbered variable will contain the list of archives, e. g.:
+    # src_arcs_1="exp/tri3_ali/trans.1 exp/tri3_ali/trans.1 ..."
+    # ('printf' repeats its format as long as there are more arguments).
+    printf "$src/$ark.%d " $(seq $nj_src) > $temp_dir/src_arks.${src_id}
+  done
+  
+  # Gather archives in parallel jobs.
+  $cmd JOB=1:$src_id $dest/log/gather_$entities.JOB.log \
+    $copy_program \
+      "ark:cat \$(cat $temp_dir/src_arks.JOB) |" \
+      "ark,scp:$temp_dir/$ark.JOB,$temp_dir/$ark.JOB.scp" || exit 1
+
+  # Merge (presumed already sorted) scp's into a single script.
+  sort -m $temp_dir/$ark.*.scp > $temp_dir/$ark.scp || exit 1
+
+  echo "$0: Splitting combined $entities into $nj archives on speaker boundary."
+  $cmd JOB=1:$nj $dest/log/chop_combined_$entities.JOB.log \
+    $copy_program \
+      "scp:utils/split_scp.pl  -j $nj JOB --one-based $temp_dir/$ark.scp |" \
+      "ark:$dest/$ark.JOB" || exit 1
+
+  # Get some interesting stats.
+  n_utt=$(wc -l <$data/spk2utt)
+  n_trans=$(wc -l <$temp_dir/$ark.scp)
+  n_utt_no_trans_pct=$(perl -e "print int(($n_utt - $n_trans)/$n_utt * 100 + .5);")
+  echo "$0: Combined $n_trans $entities for $n_utt utterances." 
+
+  if (( $n_utt_no_trans_pct >= $tolerance )); then
+    echo "$0: error: Percentage of utterances missing $entities," \
+         "${n_utt_no_trans_pct}%, is at or above error tolerance ${tolerance}%."
+    exit 1
+  fi
+
+  return 0
+}
+
+$do_trans && do_combine_trans trans 'transforms' copy-matrix "$@"
+
+cleanup     # Delete the temporary directory on success.
+
+echo "$0: Stored combined fmllr trans in $dest"  
+exit 0
diff --git a/egs/wsj/s5/steps/compare_alignments.sh b/egs/wsj/s5/steps/compare_alignments.sh
index d94d2197fee..b62adec6111 100755
--- a/egs/wsj/s5/steps/compare_alignments.sh
+++ b/egs/wsj/s5/steps/compare_alignments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/compute_cmvn_stats.sh b/egs/wsj/s5/steps/compute_cmvn_stats.sh
index 6c05c66a0bc..73b4e7730b8 100755
--- a/egs/wsj/s5/steps/compute_cmvn_stats.sh
+++ b/egs/wsj/s5/steps/compute_cmvn_stats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2016  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/conf/apply_calibration.sh b/egs/wsj/s5/steps/conf/apply_calibration.sh
index c1a22e274b8..104125d9793 100755
--- a/egs/wsj/s5/steps/conf/apply_calibration.sh
+++ b/egs/wsj/s5/steps/conf/apply_calibration.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015, Brno University of Technology (Author: Karel Vesely). Apache 2.0.
 
 # Trains logistic regression, which calibrates the per-word confidences,
diff --git a/egs/wsj/s5/steps/conf/get_ctm_conf.sh b/egs/wsj/s5/steps/conf/get_ctm_conf.sh
index 5ce39b1ddb6..01e017cdd60 100755
--- a/egs/wsj/s5/steps/conf/get_ctm_conf.sh
+++ b/egs/wsj/s5/steps/conf/get_ctm_conf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # This script produces CTM files from a decoding directory that has lattices
diff --git a/egs/wsj/s5/steps/conf/lattice_depth_per_frame.sh b/egs/wsj/s5/steps/conf/lattice_depth_per_frame.sh
index 7167bd970bb..e40159f82a8 100755
--- a/egs/wsj/s5/steps/conf/lattice_depth_per_frame.sh
+++ b/egs/wsj/s5/steps/conf/lattice_depth_per_frame.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015  Brno University of Technology (Author: Karel Vesely)
 # Licensed under the Apache License, Version 2.0 (the "License")
 
diff --git a/egs/wsj/s5/steps/conf/train_calibration.sh b/egs/wsj/s5/steps/conf/train_calibration.sh
index c2aca05056e..b08a8ca3e92 100755
--- a/egs/wsj/s5/steps/conf/train_calibration.sh
+++ b/egs/wsj/s5/steps/conf/train_calibration.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015, Brno University of Technology (Author: Karel Vesely). Apache 2.0.
 
 # Trains logistic regression, which calibrates the per-word confidences in 'CTM'.
diff --git a/egs/wsj/s5/steps/copy_ali_dir.sh b/egs/wsj/s5/steps/copy_ali_dir.sh
index 60618a2f4bf..2642c198a81 100755
--- a/egs/wsj/s5/steps/copy_ali_dir.sh
+++ b/egs/wsj/s5/steps/copy_ali_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2019   Phani Sankar Nidadavolu
 # Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/copy_lat_dir.sh b/egs/wsj/s5/steps/copy_lat_dir.sh
index dd1e10fb307..fd39ee5bb82 100755
--- a/egs/wsj/s5/steps/copy_lat_dir.sh
+++ b/egs/wsj/s5/steps/copy_lat_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2019   Phani Sankar Nidadavolu
 # Apache 2.0.
 
@@ -69,6 +69,6 @@ rm $dir/lat_tmp.*
 
 echo $nj > $dir/num_jobs
 
-for f in cmvn_opts splice_opts final.mdl splice_opts tree frame_subsampling_factor; do
+for f in phones.txt cmvn_opts splice_opts final.mdl splice_opts tree frame_subsampling_factor; do
   if [ -f $src_dir/$f ]; then cp $src_dir/$f $dir/$f; fi
 done
diff --git a/egs/wsj/s5/steps/copy_trans_dir.sh b/egs/wsj/s5/steps/copy_trans_dir.sh
new file mode 100644
index 00000000000..4ccaf438727
--- /dev/null
+++ b/egs/wsj/s5/steps/copy_trans_dir.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+# Copyright 2019   Phani Sankar Nidadavolu
+# Copyright 2019   manhong wang(marvin)
+# Apache 2.0.
+
+#This script creates fmllr transform for the aug dirs by copying 
+#the trans of original train dir after you copy_ali_dirs.sh or copy_lat_dirs.sh
+#Note :  wo do not accept --nj here ,which shoud keep same as ali file
+prefixes="reverb1 babble music noise"
+include_original=true
+cmd=run.pl
+write_binary=true
+
+. ./path.sh
+. utils/parse_options.sh
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <out-data> <src-ali-dir> <out-ali-dir>"
+  echo "This script creates fmllr transform for the aug dirs by copying "
+  echo " the trans of original train dir"
+  echo "While copying it adds prefix to the utterances specified by prefixes option"
+  echo "Note that the original train dir does not have any prefix"
+  echo "To include the original training directory in the copied "
+  echo "version set the --include-original option to true"
+  echo "main options (for others, see top of script file)"
+  echo "  --prefixes <string of prefixes to add>    # All the prefixes of aug data to be included"
+  echo "  --include-original <true/false>           # If true, will copy the alignements of original dir"
+  exit 1
+fi
+
+data=$1
+src_dir=$2
+dir=$3
+
+if [ ! -d $dir ]; then
+    echo "$0: warning : you may need combine ali or lat first !" && exit 1
+fi
+
+if [ ! -f $src_dir/trans.1 ] ; then
+    echo "$0: no trans exist in $src_dir dir"  && exit 1
+fi
+
+
+nj=$(cat $dir/num_jobs)
+rm -f $dir/trans* 2>/dev/null
+
+# Copy the fmllr trans temporarily
+echo "creating temporary trans in $dir"
+$cmd  JOB=1:$nj $dir/log/copy_trans_temp.JOB.log \
+  copy-matrix --binary=$write_binary \
+  "ark:cat $src_dir/trans.JOB |" \
+  ark,scp:$dir/trans_tmp.JOB.ark,$dir/trans_tmp.JOB.scp || exit 1
+
+# Make copies of utterances for perturbed data
+for p in $prefixes; do
+  cat $dir/trans_tmp.*.scp | awk -v p=$p '{print p"-"$0}'
+done | sort -k1,1 > $dir/trans_out.scp.aug
+
+if [ "$include_original" == "true" ]; then
+  cat $dir/trans_tmp.*.scp | awk '{print $0}' | sort -k1,1 > $dir/trans_out.scp.clean
+  cat $dir/trans_out.scp.clean $dir/trans_out.scp.aug | sort -k1,1 > $dir/trans_out.scp
+else
+  cat $dir/trans_out.scp.aug | sort -k1,1 > $dir/trans_out.scp.old
+fi
+
+utils/filter_scp.pl  ${data}/spk2utt  $dir/trans_out.scp.old  >  $dir/trans_out.scp
+utils/split_data.sh ${data} $nj
+
+# Copy and dump the trans for perturbed data
+echo Creating fmllr trans for augmented data by copying fmllr trans from clean data
+$cmd  JOB=1:$nj $dir/log/copy_out_trans.JOB.log \
+  copy-matrix --binary=$write_binary \
+  "scp:utils/split_scp.pl  --one-based -j $nj JOB $dir/trans_out.scp |" \
+  ark:$dir/trans.JOB || exit 1
+
+n_aug_trans=`wc -l $data/spk2utt`
+n_copy_trans=`wc -l $dir/trans_out.scp`
+echo "copy $n_copy_trans speaker's  fmllr trans of total $n_aug_trans"
+rm $dir/trans_out.scp.aug  $dir/trans_out.scp.old $dir/trans_out.scp   $dir/trans_tmp.*
+exit 0
diff --git a/egs/wsj/s5/steps/data/make_musan.sh b/egs/wsj/s5/steps/data/make_musan.sh
index 40ec9b9a279..eb7f575b68f 100755
--- a/egs/wsj/s5/steps/data/make_musan.sh
+++ b/egs/wsj/s5/steps/data/make_musan.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015   David Snyder
 #           2019   Phani Sankar Nidadavolu
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 28161798ee1..ea504244d38 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -136,7 +136,10 @@ def pick_item_with_probability(x):
         collection (list or dictionary) where the values contain a field called probability
     """
     if isinstance(x, dict):
-        plist = list(set(x.values()))
+        keylist = list(x.keys())
+        keylist.sort()
+        random.shuffle(keylist)
+        plist = [x[k] for k in keylist]
     else:
         plist = x
     total_p = sum(item.probability for item in plist)
@@ -468,7 +471,7 @@ def smooth_probability_distribution(set_list, smoothing_weight=0.0, target_sum=1
       uniform_probability = 0
       if num_unspecified > 0 and accumulated_prob < 1:
           uniform_probability = (1 - accumulated_prob) / float(num_unspecified)
-      elif num_unspecified > 0 and accumulate_prob >= 1:
+      elif num_unspecified > 0 and accumulated_prob >= 1:
           warnings.warn("The sum of probabilities specified by user is larger than or equal to 1. "
                         "The items without probabilities specified will be given zero to their probabilities.")
 
diff --git a/egs/wsj/s5/steps/decode.sh b/egs/wsj/s5/steps/decode.sh
index e9ca2f17cc5..8c85724c043 100755
--- a/egs/wsj/s5/steps/decode.sh
+++ b/egs/wsj/s5/steps/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/decode_basis_fmllr.sh b/egs/wsj/s5/steps/decode_basis_fmllr.sh
index afb914e7f0d..14c06d052ca 100755
--- a/egs/wsj/s5/steps/decode_basis_fmllr.sh
+++ b/egs/wsj/s5/steps/decode_basis_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012   Carnegie Mellon University (Author: Yajie Miao)
 #                  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/wsj/s5/steps/decode_biglm.sh b/egs/wsj/s5/steps/decode_biglm.sh
index 0663391430d..f57191ed290 100755
--- a/egs/wsj/s5/steps/decode_biglm.sh
+++ b/egs/wsj/s5/steps/decode_biglm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/decode_combine.sh b/egs/wsj/s5/steps/decode_combine.sh
index 63c60071458..953bb42987c 100755
--- a/egs/wsj/s5/steps/decode_combine.sh
+++ b/egs/wsj/s5/steps/decode_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/decode_fmllr.sh b/egs/wsj/s5/steps/decode_fmllr.sh
index 0522fe7bb8f..142a41400bd 100755
--- a/egs/wsj/s5/steps/decode_fmllr.sh
+++ b/egs/wsj/s5/steps/decode_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey)
 
diff --git a/egs/wsj/s5/steps/decode_fmllr_extra.sh b/egs/wsj/s5/steps/decode_fmllr_extra.sh
index 04d4c2ae343..e8cb87410c4 100755
--- a/egs/wsj/s5/steps/decode_fmllr_extra.sh
+++ b/egs/wsj/s5/steps/decode_fmllr_extra.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 
diff --git a/egs/wsj/s5/steps/decode_fmmi.sh b/egs/wsj/s5/steps/decode_fmmi.sh
index 5460d37ff28..dd006db9661 100755
--- a/egs/wsj/s5/steps/decode_fmmi.sh
+++ b/egs/wsj/s5/steps/decode_fmmi.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/decode_fromlats.sh b/egs/wsj/s5/steps/decode_fromlats.sh
index ee719c0e132..4822953ea0e 100755
--- a/egs/wsj/s5/steps/decode_fromlats.sh
+++ b/egs/wsj/s5/steps/decode_fromlats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/decode_lvtln.sh b/egs/wsj/s5/steps/decode_lvtln.sh
index ac58b2ee04b..9aa64fe3686 100755
--- a/egs/wsj/s5/steps/decode_lvtln.sh
+++ b/egs/wsj/s5/steps/decode_lvtln.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2014  Vimal Manohar
diff --git a/egs/wsj/s5/steps/decode_nolats.sh b/egs/wsj/s5/steps/decode_nolats.sh
index 28d1113acd5..5d2a4614e08 100755
--- a/egs/wsj/s5/steps/decode_nolats.sh
+++ b/egs/wsj/s5/steps/decode_nolats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey)
 #                      Vimal Manohar
diff --git a/egs/wsj/s5/steps/decode_raw_fmllr.sh b/egs/wsj/s5/steps/decode_raw_fmllr.sh
index 069dc84a711..b09a3dcdc0a 100755
--- a/egs/wsj/s5/steps/decode_raw_fmllr.sh
+++ b/egs/wsj/s5/steps/decode_raw_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey)
 
diff --git a/egs/wsj/s5/steps/decode_sgmm2.sh b/egs/wsj/s5/steps/decode_sgmm2.sh
index 87a4762351e..bb78b1d46cf 100755
--- a/egs/wsj/s5/steps/decode_sgmm2.sh
+++ b/egs/wsj/s5/steps/decode_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh b/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh
index 1cdd9885314..8fd5c29aa50 100755
--- a/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh
+++ b/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/decode_sgmm2_rescore.sh b/egs/wsj/s5/steps/decode_sgmm2_rescore.sh
index c258ad00067..3a909e88104 100755
--- a/egs/wsj/s5/steps/decode_sgmm2_rescore.sh
+++ b/egs/wsj/s5/steps/decode_sgmm2_rescore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/decode_sgmm2_rescore_project.sh b/egs/wsj/s5/steps/decode_sgmm2_rescore_project.sh
index 277d72fbeba..1562acdc1ea 100755
--- a/egs/wsj/s5/steps/decode_sgmm2_rescore_project.sh
+++ b/egs/wsj/s5/steps/decode_sgmm2_rescore_project.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/decode_with_map.sh b/egs/wsj/s5/steps/decode_with_map.sh
index ab507debd11..53ff4142536 100755
--- a/egs/wsj/s5/steps/decode_with_map.sh
+++ b/egs/wsj/s5/steps/decode_with_map.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Neha Agrawal, Cisco Systems;
 #                 Johns Hopkins University (Author: Daniel Povey);
diff --git a/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh b/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh
index 21b48e649a5..223333e63d2 100755
--- a/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh
+++ b/egs/wsj/s5/steps/diagnostic/analyze_alignments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2016.  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/diagnostic/analyze_lats.sh b/egs/wsj/s5/steps/diagnostic/analyze_lats.sh
index df1a6d64801..a36fc016a0b 100755
--- a/egs/wsj/s5/steps/diagnostic/analyze_lats.sh
+++ b/egs/wsj/s5/steps/diagnostic/analyze_lats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2016.  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py b/egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py
index 6ed2bf78115..8ae5e1ef61c 100755
--- a/egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py
+++ b/egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py
@@ -9,6 +9,15 @@
 import argparse
 import sys, os
 from collections import defaultdict
+from io import open
+import codecs
+
+# reference: http://www.macfreek.nl/memory/Encoding_of_Python_stdout
+if sys.version_info.major == 2:
+    sys.stdout = codecs.getwriter('utf-8')(sys.stdout, 'strict')
+else:
+    assert sys.version_info.major == 3
+    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')
 
 
 parser = argparse.ArgumentParser(description="This script reads stats created in analyze_lats.sh "
@@ -29,13 +38,13 @@
 # set up phone_int2text to map from phone to printed form.
 phone_int2text = {}
 try:
-    f = open(args.lang + "/phones.txt", "r");
+    f = open(args.lang + "/phones.txt", "r", encoding='utf-8')
     for line in f.readlines():
         [ word, number] = line.split()
         phone_int2text[int(number)] = word
     f.close()
 except:
-    sys.exit("analyze_lattice_depth_stats.py: error opening or reading {0}/phones.txt".format(
+    sys.exit(u"analyze_lattice_depth_stats.py: error opening or reading {0}/phones.txt".format(
             args.lang))
 # this is a special case... for begin- and end-of-sentence stats,
 # we group all nonsilence phones together.
@@ -49,14 +58,14 @@
     # open lang/phones/silence.csl-- while there are many ways of obtaining the
     # silence/nonsilence phones, we read this because it's present in graph
     # directories as well as lang directories.
-    filename = "{0}/phones/silence.csl".format(args.lang)
+    filename = u"{0}/phones/silence.csl".format(args.lang)
     f = open(filename, "r")
     line = f.readline()
     for silence_phone in line.split(":"):
         nonsilence.remove(int(silence_phone))
     f.close()
 except Exception as e:
-    sys.exit("analyze_lattice_depth_stats.py: error processing {0}/phones/silence.csl: {1}".format(
+    sys.exit(u"analyze_lattice_depth_stats.py: error processing {0}/phones/silence.csl: {1}".format(
             args.lang, str(e)))
 
 # phone_depth_counts is a dict of dicts.
@@ -80,7 +89,7 @@
         break
     a = line.split()
     if len(a) != 3:
-        sys.exit("analyze_lattice_depth_stats.py: reading stdin, could not interpret line: " + line)
+        sys.exit(u"analyze_lattice_depth_stats.py: reading stdin, could not interpret line: " + line)
     try:
         phone, depth, count = [ int(x) for x in a ]
 
@@ -92,11 +101,11 @@
         universal_phone = -1
         phone_depth_counts[universal_phone][depth] += count
     except Exception as e:
-        sys.exit("analyze_lattice_depth_stats.py: unexpected phone {0} "
-                 "seen (lang directory mismatch?): line is {1}, error is {2}".format(phone, line, str(e)))
+        sys.exit(u"analyze_lattice_depth_stats.py: unexpected phone {0} "
+                 u"seen (lang directory mismatch?): line is {1}, error is {2}".format(phone, line, str(e)))
 
 if total_frames == 0:
-    sys.exit("analyze_lattice_depth_stats.py: read no input")
+    sys.exit(u"analyze_lattice_depth_stats.py: read no input")
 
 
 # If depth_to_count is a map from depth-in-frames to count,
@@ -125,8 +134,8 @@ def GetMean(depth_to_count):
     return this_total_depth / this_total_frames
 
 
-print("The total amount of data analyzed assuming 100 frames per second "
-      "is {0} hours".format("%.1f" % (total_frames / 360000.0)))
+print(u"The total amount of data analyzed assuming 100 frames per second "
+      u"is {0} hours".format("%.1f" % (total_frames / 360000.0)))
 
 # the next block prints lines like (to give some examples):
 # Nonsilence phones as a group account for 74.4% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,2,7) and mean=3.1
@@ -152,18 +161,18 @@ def GetMean(depth_to_count):
         try:
             phone_text = phone_int2text[phone]
         except:
-            sys.exit("analyze_lattice_depth_stats.py: phone {0} is not covered on phones.txt "
-                     "(lang/alignment mismatch?)".format(phone))
-        preamble = "Phone {phone_text} accounts for {percent}% of frames, with".format(
+            sys.exit(u"analyze_lattice_depth_stats.py: phone {0} is not covered on phones.txt "
+                     u"(lang/alignment mismatch?)".format(phone))
+        preamble = u"Phone {phone_text} accounts for {percent}% of frames, with".format(
             phone_text = phone_text, percent = "%.1f" % frequency_percentage)
     elif phone == 0:
-        preamble = "Nonsilence phones as a group account for {percent}% of frames, with".format(
+        preamble = u"Nonsilence phones as a group account for {percent}% of frames, with".format(
             percent = "%.1f" % frequency_percentage)
     else:
         assert phone == -1
         preamble = "Overall,";
 
-    print("{preamble} lattice depth (10,50,90-percentile)=({p10},{p50},{p90}) and mean={mean}".format(
+    print(u"{preamble} lattice depth (10,50,90-percentile)=({p10},{p50},{p90}) and mean={mean}".format(
             preamble = preamble,
             p10 = depth_percentile_10,
             p50 = depth_percentile_50,
diff --git a/egs/wsj/s5/steps/diagnostic/analyze_phone_length_stats.py b/egs/wsj/s5/steps/diagnostic/analyze_phone_length_stats.py
index 5ebd9e7369b..549c1875a8b 100755
--- a/egs/wsj/s5/steps/diagnostic/analyze_phone_length_stats.py
+++ b/egs/wsj/s5/steps/diagnostic/analyze_phone_length_stats.py
@@ -8,6 +8,15 @@
 import argparse
 import sys, os
 from collections import defaultdict
+from io import open
+import codecs
+
+# reference: http://www.macfreek.nl/memory/Encoding_of_Python_stdout
+if sys.version_info.major == 2:
+    sys.stdout = codecs.getwriter('utf-8')(sys.stdout, 'strict')
+else:
+    assert sys.version_info.major == 3
+    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')
 
 
 parser = argparse.ArgumentParser(description="This script reads stats created in analyze_alignments.sh "
@@ -31,7 +40,7 @@
 # set up phone_int2text to map from phone to printed form.
 phone_int2text = {}
 try:
-    f = open(args.lang + "/phones.txt", "r");
+    f = open(args.lang + "/phones.txt", "r", encoding='utf-8')
     for line in f.readlines():
         [ word, number] = line.split()
         phone_int2text[int(number)] = word
@@ -112,8 +121,8 @@
     optional_silence_phone_text = phone_int2text[optional_silence_phone]
     f.close()
     if optional_silence_phone in nonsilence:
-        print("analyze_phone_length_stats.py: was expecting the optional-silence phone to "
-              "be a member of the silence phones, it is not.  This script won't work correctly.")
+        print(u"analyze_phone_length_stats.py: was expecting the optional-silence phone to "
+              u"be a member of the silence phones, it is not.  This script won't work correctly.")
 except:
     largest_count = 0
     optional_silence_phone = 1
@@ -124,8 +133,8 @@
                 largest_count = this_count
                 optional_silence_phone = p
     optional_silence_phone_text = phone_int2text[optional_silence_phone]
-    print("analyze_phone_length_stats.py: could not get optional-silence phone from "
-          "{0}/phones/optional_silence.int, guessing that it's {1} from the stats. ".format(
+    print(u"analyze_phone_length_stats.py: could not get optional-silence phone from "
+          u"{0}/phones/optional_silence.int, guessing that it's {1} from the stats. ".format(
             args.lang, optional_silence_phone_text))
 
 
@@ -175,8 +184,8 @@ def GetMean(length_to_count):
     # maybe half a second.  If your database is not like this, you should know;
     # you may want to mess with the segmentation to add more silence.
     if frequency_percentage < 80.0:
-        print("analyze_phone_length_stats.py: WARNING: optional-silence {0} is seen only {1}% "
-              "of the time at utterance {2}.  This may not be optimal.".format(
+        print(u"analyze_phone_length_stats.py: WARNING: optional-silence {0} is seen only {1}% "
+              u"of the time at utterance {2}.  This may not be optimal.".format(
                 optional_silence_phone_text, frequency_percentage, boundary_type))
 
 
@@ -213,8 +222,8 @@ def GetMean(length_to_count):
         except:
             sys.exit("analyze_phone_length_stats.py: phone {0} is not covered on phones.txt "
                      "(lang/alignment mismatch?)".format(phone))
-        print("{text}, {phone_text} accounts for {percent}% of phone occurrences, with "
-              "duration (median, mean, 95-percentile) is ({median},{mean},{percentile95}) frames.".format(
+        print(u"{text}, {phone_text} accounts for {percent}% of phone occurrences, with "
+              u"duration (median, mean, 95-percentile) is ({median},{mean},{percentile95}) frames.".format(
                 text = text, phone_text = phone_text,
                 percent = "%.1f" % frequency_percentage,
                 median = duration_median, mean = "%.1f" % duration_mean,
@@ -245,16 +254,16 @@ def GetMean(length_to_count):
     opt_sil_total_frame_percent = total_optsil_frames * 100.0 / total_frames['all']
     internal_frame_percent = total_frames['internal'] * 100.0 / total_frames['all']
 
-    print("The optional-silence phone {0} occupies {1}% of frames overall ".format(
+    print(u"The optional-silence phone {0} occupies {1}% of frames overall ".format(
             optional_silence_phone_text, "%.1f" % opt_sil_total_frame_percent))
     hours_total = total_frames['all'] / 360000.0;
     hours_nonsil = (total_frames['all'] - total_optsil_frames) / 360000.0
-    print("Limiting the stats to the {0}% of frames not covered by an utterance-[begin/end] phone, "
-          "optional-silence {1} occupies {2}% of frames.".format("%.1f" % internal_frame_percent,
+    print(u"Limiting the stats to the {0}% of frames not covered by an utterance-[begin/end] phone, "
+          u"optional-silence {1} occupies {2}% of frames.".format("%.1f" % internal_frame_percent,
                                                                  optional_silence_phone_text,
                                                                  "%.1f" % opt_sil_internal_frame_percent))
-    print("Assuming 100 frames per second, the alignments represent {0} hours of data, "
-          "or {1} hours if {2} frames are excluded.".format(
+    print(u"Assuming 100 frames per second, the alignments represent {0} hours of data, "
+          u"or {1} hours if {2} frames are excluded.".format(
             "%.1f" % hours_total, "%.1f" % hours_nonsil, optional_silence_phone_text))
 
     opt_sil_internal_phone_percent = (sum(internal_opt_sil_phone_lengths.values()) *
@@ -262,7 +271,7 @@ def GetMean(length_to_count):
     duration_median = GetPercentile(internal_opt_sil_phone_lengths, 0.5)
     duration_mean = GetMean(internal_opt_sil_phone_lengths)
     duration_percentile_95 = GetPercentile(internal_opt_sil_phone_lengths, 0.95)
-    print("Utterance-internal optional-silences {0} comprise {1}% of utterance-internal phones, with duration "
-          "(median, mean, 95-percentile) = ({2},{3},{4})".format(
+    print(u"Utterance-internal optional-silences {0} comprise {1}% of utterance-internal phones, with duration "
+          u"(median, mean, 95-percentile) = ({2},{3},{4})".format(
                 optional_silence_phone_text, "%.1f" % opt_sil_internal_phone_percent,
                 duration_median, "%0.1f" % duration_mean, duration_percentile_95))
diff --git a/egs/wsj/s5/steps/dict/apply_g2p.sh b/egs/wsj/s5/steps/dict/apply_g2p.sh
index 1f66c838010..e7802e5b339 100755
--- a/egs/wsj/s5/steps/dict/apply_g2p.sh
+++ b/egs/wsj/s5/steps/dict/apply_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Copyright 2016  Xiaohui Zhang
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/dict/apply_g2p_phonetisaurus.sh b/egs/wsj/s5/steps/dict/apply_g2p_phonetisaurus.sh
index a793f91fd0a..866dd54f541 100755
--- a/egs/wsj/s5/steps/dict/apply_g2p_phonetisaurus.sh
+++ b/egs/wsj/s5/steps/dict/apply_g2p_phonetisaurus.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Copyright 2016  Xiaohui Zhang
 #           2018  Ruizhe Huang
diff --git a/egs/wsj/s5/steps/dict/train_g2p.sh b/egs/wsj/s5/steps/dict/train_g2p.sh
index 75eb3fc88ec..a5cb863672b 100755
--- a/egs/wsj/s5/steps/dict/train_g2p.sh
+++ b/egs/wsj/s5/steps/dict/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014  Johns Hopkins University (Author: Yenda Trmal)
 # Copyright 2016  Xiaohui Zhang
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/dict/train_g2p_phonetisaurus.sh b/egs/wsj/s5/steps/dict/train_g2p_phonetisaurus.sh
index 94c483e09e2..0e9798317aa 100755
--- a/egs/wsj/s5/steps/dict/train_g2p_phonetisaurus.sh
+++ b/egs/wsj/s5/steps/dict/train_g2p_phonetisaurus.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Intellisist, Inc. (Author: Navneeth K)
 #           2017  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/get_ctm.sh b/egs/wsj/s5/steps/get_ctm.sh
index 6ebce2049d9..efbb45fd166 100755
--- a/egs/wsj/s5/steps/get_ctm.sh
+++ b/egs/wsj/s5/steps/get_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # This script produces CTM files from a decoding directory that has lattices                                                                         
diff --git a/egs/wsj/s5/steps/get_ctm_conf_fast.sh b/egs/wsj/s5/steps/get_ctm_conf_fast.sh
index 088fbd4a9cf..8a39d9749d4 100755
--- a/egs/wsj/s5/steps/get_ctm_conf_fast.sh
+++ b/egs/wsj/s5/steps/get_ctm_conf_fast.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #           2017  Vimal Manohar
 #           2018  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/get_ctm_fast.sh b/egs/wsj/s5/steps/get_ctm_fast.sh
index b0fae12b7bc..fb52c76cd60 100755
--- a/egs/wsj/s5/steps/get_ctm_fast.sh
+++ b/egs/wsj/s5/steps/get_ctm_fast.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #           2017  Vimal Manohar
 #           2018  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/get_fmllr_basis.sh b/egs/wsj/s5/steps/get_fmllr_basis.sh
index 9b60af1fa51..62b20e4dd63 100755
--- a/egs/wsj/s5/steps/get_fmllr_basis.sh
+++ b/egs/wsj/s5/steps/get_fmllr_basis.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012   Carnegie Mellon University (Author: Yajie Miao)
 #                  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/wsj/s5/steps/get_lexicon_probs.sh b/egs/wsj/s5/steps/get_lexicon_probs.sh
index 14091ba6340..89441942571 100755
--- a/egs/wsj/s5/steps/get_lexicon_probs.sh
+++ b/egs/wsj/s5/steps/get_lexicon_probs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/get_prons.sh b/egs/wsj/s5/steps/get_prons.sh
index 4c5453edbe2..6836a71e65d 100755
--- a/egs/wsj/s5/steps/get_prons.sh
+++ b/egs/wsj/s5/steps/get_prons.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2014  Johns Hopkins University (Author: Daniel Povey)
 #            2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/get_train_ctm.sh b/egs/wsj/s5/steps/get_train_ctm.sh
index 6942014fc88..87ea1fd938e 100755
--- a/egs/wsj/s5/steps/get_train_ctm.sh
+++ b/egs/wsj/s5/steps/get_train_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 
 # This script produces CTM files from a training directory that has alignments
diff --git a/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py b/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py
index 0de9074517f..af641237f5f 100644
--- a/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py
+++ b/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py
@@ -223,6 +223,45 @@ def _get_dropout_proportions(dropout_schedule, data_fraction):
                 component_dropout_schedule, data_fraction)))
     return dropout_proportions
 
+def get_dropout_edit_option(dropout_schedule, data_fraction, iter_):
+    """Return an option to be passed to nnet3-copy (or nnet3-am-copy)
+    that will set the appropriate dropout proportion.  If no dropout
+    is being used (dropout_schedule is None), returns the empty
+    string, otherwise returns something like
+    "--edits='set-dropout-proportion name=* proportion=0.625'"
+    Arguments:
+        dropout_schedule: Value for the --trainer.dropout-schedule option.
+            See help for --trainer.dropout-schedule.
+            See _self_test() for examples.
+        data_fraction: real number in [0,1] that says how far along
+            in training we are.
+        iter_: iteration number (needed for debug printing only)
+    See ReadEditConfig() in nnet3/nnet-utils.h to see how
+    set-dropout-proportion directive works.
+    """
+
+    if dropout_schedule is None:
+        return ""
+
+    dropout_proportions = _get_dropout_proportions(
+        dropout_schedule, data_fraction)
+
+    edit_config_lines = []
+    dropout_info = []
+
+    for component_name, dropout_proportion in dropout_proportions:
+        edit_config_lines.append(
+            "set-dropout-proportion name={0} proportion={1}".format(
+                component_name, dropout_proportion))
+        dropout_info.append("pattern/dropout-proportion={0}/{1}".format(
+            component_name, dropout_proportion))
+
+    if _debug_dropout:
+        logger.info("On iteration %d, %s", iter_, ', '.join(dropout_info))
+
+    return "--edits='{0}'".format(";".join(edit_config_lines))
+
+
 
 def get_dropout_edit_string(dropout_schedule, data_fraction, iter_):
     """Return an nnet3-copy --edits line to modify raw_model_string to
diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/composite_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/composite_layers.py
index bf2a90916ae..928ca445ccc 100644
--- a/egs/wsj/s5/steps/libs/nnet3/xconfig/composite_layers.py
+++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/composite_layers.py
@@ -80,7 +80,8 @@ def set_default_configs(self):
                        'time-stride':1,
                        'l2-regularize':0.0,
                        'max-change': 0.75,
-                       'self-repair-scale': 1.0e-05}
+                       'self-repair-scale': 1.0e-05,
+                       'context': 'default'}
 
     def set_derived_configs(self):
         pass
@@ -104,6 +105,10 @@ def check_configs(self):
             raise RuntimeError('bypass-scale is nonzero but output-dim != input-dim: {0} != {1}'
                                ''.format(output_dim, input_dim))
 
+        if not self.config['context'] in ['default', 'left-only', 'shift-left', 'none']:
+            raise RuntimeError('context must be default, left-only shift-left or none, got {}'.format(
+                self.config['context']))
+
 
     def output_name(self, auxiliary_output=None):
         assert auxiliary_output is None
@@ -142,9 +147,16 @@ def _generate_config(self):
         bypass_scale = self.config['bypass-scale']
         dropout_proportion = self.config['dropout-proportion']
         time_stride = self.config['time-stride']
-        if time_stride != 0:
+        context = self.config['context']
+        if time_stride != 0 and context != 'none':
             time_offsets1 = '{0},0'.format(-time_stride)
-            time_offsets2 = '0,{0}'.format(time_stride)
+            if context == 'default':
+                time_offsets2 = '0,{0}'.format(time_stride)
+            elif context == 'shift-left':
+                time_offsets2 = '{0},0'.format(-time_stride)
+            else:
+                assert context == 'left-only'
+                time_offsets2 = '0'
         else:
             time_offsets1 = '0'
             time_offsets2 = '0'
diff --git a/egs/wsj/s5/steps/lmrescore.sh b/egs/wsj/s5/steps/lmrescore.sh
index 88db8ae15dc..4fa63e613a3 100755
--- a/egs/wsj/s5/steps/lmrescore.sh
+++ b/egs/wsj/s5/steps/lmrescore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
@@ -9,6 +9,7 @@ cmd=run.pl
 skip_scoring=false
 self_loop_scale=0.1  # only matters for mode 4.
 acoustic_scale=0.1   # only matters for mode 5.
+scoring_opts=
 # End configuration section.
 
 echo "$0 $@"  # Print the command line for logging
@@ -141,7 +142,7 @@ rm $outdir/Ldet.fst 2>/dev/null || true
 if ! $skip_scoring ; then
   [ ! -x local/score.sh ] && \
     echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
-  local/score.sh --cmd "$cmd" $data $newlang $outdir
+  local/score.sh $scoring_opts --cmd "$cmd" $data $newlang $outdir
 else
   echo "Not scoring because requested so..."
 fi
diff --git a/egs/wsj/s5/steps/lmrescore_const_arpa.sh b/egs/wsj/s5/steps/lmrescore_const_arpa.sh
index 796ff5fc95c..3106261389e 100755
--- a/egs/wsj/s5/steps/lmrescore_const_arpa.sh
+++ b/egs/wsj/s5/steps/lmrescore_const_arpa.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/lmrescore_const_arpa_undeterminized.sh b/egs/wsj/s5/steps/lmrescore_const_arpa_undeterminized.sh
index a075b8debe8..7d4b983e761 100755
--- a/egs/wsj/s5/steps/lmrescore_const_arpa_undeterminized.sh
+++ b/egs/wsj/s5/steps/lmrescore_const_arpa_undeterminized.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 #           2017  Vimal Manohar
diff --git a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
index 049e15df303..633be09f2bf 100755
--- a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
+++ b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Guoguo Chen
 #           2017  Hainan Xu
diff --git a/egs/wsj/s5/steps/make_denlats.sh b/egs/wsj/s5/steps/make_denlats.sh
index 6499d4e90fd..75fffeaddd3 100755
--- a/egs/wsj/s5/steps/make_denlats.sh
+++ b/egs/wsj/s5/steps/make_denlats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # Create denominator lattices for MMI/MPE training.
diff --git a/egs/wsj/s5/steps/make_denlats_sgmm2.sh b/egs/wsj/s5/steps/make_denlats_sgmm2.sh
index 0aa13617869..48b6b8c0595 100755
--- a/egs/wsj/s5/steps/make_denlats_sgmm2.sh
+++ b/egs/wsj/s5/steps/make_denlats_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #           2014  Guoguo Chen
 
diff --git a/egs/wsj/s5/steps/make_fbank.sh b/egs/wsj/s5/steps/make_fbank.sh
index 29153458f9b..7c414170ddf 100755
--- a/egs/wsj/s5/steps/make_fbank.sh
+++ b/egs/wsj/s5/steps/make_fbank.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2016  Karel Vesely
 # Copyright 2012-2016  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/wsj/s5/steps/make_fbank_pitch.sh b/egs/wsj/s5/steps/make_fbank_pitch.sh
index 7f971df54ae..ee12d360732 100755
--- a/egs/wsj/s5/steps/make_fbank_pitch.sh
+++ b/egs/wsj/s5/steps/make_fbank_pitch.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  The Shenzhen Key Laboratory of Intelligent Media and Speech,
 #                 PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi)
diff --git a/egs/wsj/s5/steps/make_index.sh b/egs/wsj/s5/steps/make_index.sh
index 6c29dbbe8b6..22ff2845a3e 100755
--- a/egs/wsj/s5/steps/make_index.sh
+++ b/egs/wsj/s5/steps/make_index.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/make_mfcc.sh b/egs/wsj/s5/steps/make_mfcc.sh
index 37433f87dcd..7600ae30411 100755
--- a/egs/wsj/s5/steps/make_mfcc.sh
+++ b/egs/wsj/s5/steps/make_mfcc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2016  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/make_mfcc_pitch.sh b/egs/wsj/s5/steps/make_mfcc_pitch.sh
index dda31667d6a..aa9acaf55bf 100755
--- a/egs/wsj/s5/steps/make_mfcc_pitch.sh
+++ b/egs/wsj/s5/steps/make_mfcc_pitch.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  The Shenzhen Key Laboratory of Intelligent Media and Speech,
 #                 PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi)
diff --git a/egs/wsj/s5/steps/make_mfcc_pitch_online.sh b/egs/wsj/s5/steps/make_mfcc_pitch_online.sh
index 001c1e4c6f4..117645ae596 100755
--- a/egs/wsj/s5/steps/make_mfcc_pitch_online.sh
+++ b/egs/wsj/s5/steps/make_mfcc_pitch_online.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  The Shenzhen Key Laboratory of Intelligent Media and Speech,
 #                 PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi)
diff --git a/egs/wsj/s5/steps/make_phone_graph.sh b/egs/wsj/s5/steps/make_phone_graph.sh
index aaf88cc66d2..f1a4b9edd31 100755
--- a/egs/wsj/s5/steps/make_phone_graph.sh
+++ b/egs/wsj/s5/steps/make_phone_graph.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # steps/make_phone_graph.sh data/train_100k_nodup/ data/lang exp/tri2_ali_100k_nodup/ exp/tri2
 
diff --git a/egs/wsj/s5/steps/make_plp.sh b/egs/wsj/s5/steps/make_plp.sh
index c4a987aaeeb..54a82fb9247 100755
--- a/egs/wsj/s5/steps/make_plp.sh
+++ b/egs/wsj/s5/steps/make_plp.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2016  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/make_plp_pitch.sh b/egs/wsj/s5/steps/make_plp_pitch.sh
index 9f565d8a5bf..8b8c78578af 100755
--- a/egs/wsj/s5/steps/make_plp_pitch.sh
+++ b/egs/wsj/s5/steps/make_plp_pitch.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  The Shenzhen Key Laboratory of Intelligent Media and Speech,
 #                 PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi)
diff --git a/egs/wsj/s5/steps/nnet/align.sh b/egs/wsj/s5/steps/nnet/align.sh
index f976711fa58..fc1391e041d 100755
--- a/egs/wsj/s5/steps/nnet/align.sh
+++ b/egs/wsj/s5/steps/nnet/align.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2015 Brno University of Technology (author: Karel Vesely)
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/nnet/decode.sh b/egs/wsj/s5/steps/nnet/decode.sh
index c374905b127..a8011fc71f6 100755
--- a/egs/wsj/s5/steps/nnet/decode.sh
+++ b/egs/wsj/s5/steps/nnet/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015 Brno University of Technology (author: Karel Vesely), Daniel Povey
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/nnet/ivector/extract_ivectors.sh b/egs/wsj/s5/steps/nnet/ivector/extract_ivectors.sh
index fcfbd0f2eb4..9df88b961e5 100755
--- a/egs/wsj/s5/steps/nnet/ivector/extract_ivectors.sh
+++ b/egs/wsj/s5/steps/nnet/ivector/extract_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright     2013  Daniel Povey
 #               2016  Brno University of Technology (author: Karel Vesely)
diff --git a/egs/wsj/s5/steps/nnet/ivector/train_diag_ubm.sh b/egs/wsj/s5/steps/nnet/ivector/train_diag_ubm.sh
index ebd36a9e8e4..87c714bbc05 100755
--- a/egs/wsj/s5/steps/nnet/ivector/train_diag_ubm.sh
+++ b/egs/wsj/s5/steps/nnet/ivector/train_diag_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2012  Johns Hopkins University (Author: Daniel Povey)
 #             2013  Daniel Povey
diff --git a/egs/wsj/s5/steps/nnet/ivector/train_ivector_extractor.sh b/egs/wsj/s5/steps/nnet/ivector/train_ivector_extractor.sh
index 252035a525f..c99b0d62a9d 100755
--- a/egs/wsj/s5/steps/nnet/ivector/train_ivector_extractor.sh
+++ b/egs/wsj/s5/steps/nnet/ivector/train_ivector_extractor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2013  Daniel Povey
 #             2016  Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/wsj/s5/steps/nnet/make_bn_feats.sh b/egs/wsj/s5/steps/nnet/make_bn_feats.sh
index 631f3d5243a..a1d01116e9e 100755
--- a/egs/wsj/s5/steps/nnet/make_bn_feats.sh
+++ b/egs/wsj/s5/steps/nnet/make_bn_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015 Brno University of Technology (author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/nnet/make_denlats.sh b/egs/wsj/s5/steps/nnet/make_denlats.sh
index 52bf7ce4e95..d0e07271502 100755
--- a/egs/wsj/s5/steps/nnet/make_denlats.sh
+++ b/egs/wsj/s5/steps/nnet/make_denlats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2013 Brno University of Technology (author: Karel Vesely), Daniel Povey
 # Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet/make_fmllr_feats.sh b/egs/wsj/s5/steps/nnet/make_fmllr_feats.sh
index 60b8b0fea7c..657ee266ab8 100755
--- a/egs/wsj/s5/steps/nnet/make_fmllr_feats.sh
+++ b/egs/wsj/s5/steps/nnet/make_fmllr_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Brno University of Technology (author: Karel Vesely),
 #                 
diff --git a/egs/wsj/s5/steps/nnet/make_fmmi_feats.sh b/egs/wsj/s5/steps/nnet/make_fmmi_feats.sh
index 2874f00067b..b8d6748bfd1 100755
--- a/egs/wsj/s5/steps/nnet/make_fmmi_feats.sh
+++ b/egs/wsj/s5/steps/nnet/make_fmmi_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Brno University of Technology (author: Karel Vesely),
 #
diff --git a/egs/wsj/s5/steps/nnet/pretrain_dbn.sh b/egs/wsj/s5/steps/nnet/pretrain_dbn.sh
index 0f0d6e767e2..56b5efb07f9 100755
--- a/egs/wsj/s5/steps/nnet/pretrain_dbn.sh
+++ b/egs/wsj/s5/steps/nnet/pretrain_dbn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013-2015 Brno University of Technology (author: Karel Vesely)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/steps/nnet/train.sh b/egs/wsj/s5/steps/nnet/train.sh
index e73fa5f77d1..8406663082b 100755
--- a/egs/wsj/s5/steps/nnet/train.sh
+++ b/egs/wsj/s5/steps/nnet/train.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2017  Brno University of Technology (author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/nnet/train_mmi.sh b/egs/wsj/s5/steps/nnet/train_mmi.sh
index 5ecfdf7144a..c6d3a5a0276 100755
--- a/egs/wsj/s5/steps/nnet/train_mmi.sh
+++ b/egs/wsj/s5/steps/nnet/train_mmi.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013-2015  Brno University of Technology (author: Karel Vesely)
 # Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet/train_mpe.sh b/egs/wsj/s5/steps/nnet/train_mpe.sh
index 4edec6a5955..4a2fbc646bf 100755
--- a/egs/wsj/s5/steps/nnet/train_mpe.sh
+++ b/egs/wsj/s5/steps/nnet/train_mpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2013-2017  Brno University of Technology (author: Karel Vesely)
 # Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet/train_scheduler.sh b/egs/wsj/s5/steps/nnet/train_scheduler.sh
index 6377acf6c7b..1bfb9103853 100755
--- a/egs/wsj/s5/steps/nnet/train_scheduler.sh
+++ b/egs/wsj/s5/steps/nnet/train_scheduler.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2017  Brno University of Technology (author: Karel Vesely)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/nnet2/adjust_priors.sh b/egs/wsj/s5/steps/nnet2/adjust_priors.sh
index 3cdcfb4ae73..bcf70c82388 100755
--- a/egs/wsj/s5/steps/nnet2/adjust_priors.sh
+++ b/egs/wsj/s5/steps/nnet2/adjust_priors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
diff --git a/egs/wsj/s5/steps/nnet2/align.sh b/egs/wsj/s5/steps/nnet2/align.sh
index fa040d692ad..66680843139 100755
--- a/egs/wsj/s5/steps/nnet2/align.sh
+++ b/egs/wsj/s5/steps/nnet2/align.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Brno University of Technology (Author: Karel Vesely)
 #           2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/nnet2/check_ivectors_compatible.sh b/egs/wsj/s5/steps/nnet2/check_ivectors_compatible.sh
index 7c5d3a3254d..6b046b563ba 100755
--- a/egs/wsj/s5/steps/nnet2/check_ivectors_compatible.sh
+++ b/egs/wsj/s5/steps/nnet2/check_ivectors_compatible.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2016, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/wsj/s5/steps/nnet2/convert_lda_to_raw.sh b/egs/wsj/s5/steps/nnet2/convert_lda_to_raw.sh
index 18de3b81d95..32c413bb6eb 100755
--- a/egs/wsj/s5/steps/nnet2/convert_lda_to_raw.sh
+++ b/egs/wsj/s5/steps/nnet2/convert_lda_to_raw.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014    Johns Hopkins University (Author: Daniel Povey).
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet2/convert_nnet1_to_nnet2.sh b/egs/wsj/s5/steps/nnet2/convert_nnet1_to_nnet2.sh
index 2ec7424ef1f..6813aeaf4df 100755
--- a/egs/wsj/s5/steps/nnet2/convert_nnet1_to_nnet2.sh
+++ b/egs/wsj/s5/steps/nnet2/convert_nnet1_to_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014    Johns Hopkins University (Author: Daniel Povey).
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet2/create_appended_model.sh b/egs/wsj/s5/steps/nnet2/create_appended_model.sh
index b5748a97d86..70149955f94 100755
--- a/egs/wsj/s5/steps/nnet2/create_appended_model.sh
+++ b/egs/wsj/s5/steps/nnet2/create_appended_model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #  Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 #  Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet2/decode.sh b/egs/wsj/s5/steps/nnet2/decode.sh
index ee06679f5a8..241c8ac5718 100755
--- a/egs/wsj/s5/steps/nnet2/decode.sh
+++ b/egs/wsj/s5/steps/nnet2/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey).
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet2/dump_bottleneck_features.sh b/egs/wsj/s5/steps/nnet2/dump_bottleneck_features.sh
index 0746a3188a1..9417e95060e 100755
--- a/egs/wsj/s5/steps/nnet2/dump_bottleneck_features.sh
+++ b/egs/wsj/s5/steps/nnet2/dump_bottleneck_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #           2014  Pegah Ghahremani
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/nnet2/get_egs.sh b/egs/wsj/s5/steps/nnet2/get_egs.sh
index ceae90681bf..7094a003e20 100755
--- a/egs/wsj/s5/steps/nnet2/get_egs.sh
+++ b/egs/wsj/s5/steps/nnet2/get_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 # This script, which will generally be called from other neural-net training
diff --git a/egs/wsj/s5/steps/nnet2/get_egs2.sh b/egs/wsj/s5/steps/nnet2/get_egs2.sh
index 27d46951d26..c5b469ede1f 100755
--- a/egs/wsj/s5/steps/nnet2/get_egs2.sh
+++ b/egs/wsj/s5/steps/nnet2/get_egs2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #
diff --git a/egs/wsj/s5/steps/nnet2/get_egs_discriminative2.sh b/egs/wsj/s5/steps/nnet2/get_egs_discriminative2.sh
index 82c045bcd6c..55fe8e9c401 100755
--- a/egs/wsj/s5/steps/nnet2/get_egs_discriminative2.sh
+++ b/egs/wsj/s5/steps/nnet2/get_egs_discriminative2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet2/get_ivector_id.sh b/egs/wsj/s5/steps/nnet2/get_ivector_id.sh
index 1ec60d1f514..7542d0d332c 100755
--- a/egs/wsj/s5/steps/nnet2/get_ivector_id.sh
+++ b/egs/wsj/s5/steps/nnet2/get_ivector_id.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2016, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/egs/wsj/s5/steps/nnet2/get_lda.sh b/egs/wsj/s5/steps/nnet2/get_lda.sh
index e40d4a7dac9..ae421e7328b 100755
--- a/egs/wsj/s5/steps/nnet2/get_lda.sh
+++ b/egs/wsj/s5/steps/nnet2/get_lda.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 # This script, which will generally be called from other neural-net training
diff --git a/egs/wsj/s5/steps/nnet2/get_lda_block.sh b/egs/wsj/s5/steps/nnet2/get_lda_block.sh
index 70ebce8a577..621a6442fab 100755
--- a/egs/wsj/s5/steps/nnet2/get_lda_block.sh
+++ b/egs/wsj/s5/steps/nnet2/get_lda_block.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 # This script, which will generally be called from other neural-net training
diff --git a/egs/wsj/s5/steps/nnet2/get_perturbed_feats.sh b/egs/wsj/s5/steps/nnet2/get_perturbed_feats.sh
index 9fd92e6d056..650913bf81a 100755
--- a/egs/wsj/s5/steps/nnet2/get_perturbed_feats.sh
+++ b/egs/wsj/s5/steps/nnet2/get_perturbed_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 # begin configuration section
diff --git a/egs/wsj/s5/steps/nnet2/make_denlats.sh b/egs/wsj/s5/steps/nnet2/make_denlats.sh
index b46cbe6437b..71f078c9710 100755
--- a/egs/wsj/s5/steps/nnet2/make_denlats.sh
+++ b/egs/wsj/s5/steps/nnet2/make_denlats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # Create denominator lattices for MMI/MPE training.
diff --git a/egs/wsj/s5/steps/nnet2/relabel_egs.sh b/egs/wsj/s5/steps/nnet2/relabel_egs.sh
index 34b64c8afe2..cad8d815a69 100755
--- a/egs/wsj/s5/steps/nnet2/relabel_egs.sh
+++ b/egs/wsj/s5/steps/nnet2/relabel_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar. Apache 2.0.
 # This script, which will generally be called during the neural-net training
diff --git a/egs/wsj/s5/steps/nnet2/relabel_egs2.sh b/egs/wsj/s5/steps/nnet2/relabel_egs2.sh
index 7f182306104..5092507ee46 100755
--- a/egs/wsj/s5/steps/nnet2/relabel_egs2.sh
+++ b/egs/wsj/s5/steps/nnet2/relabel_egs2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Vimal Manohar.
 #           2014  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/wsj/s5/steps/nnet2/remove_egs.sh b/egs/wsj/s5/steps/nnet2/remove_egs.sh
index 143a5d0d86a..b4aa1dd3574 100755
--- a/egs/wsj/s5/steps/nnet2/remove_egs.sh
+++ b/egs/wsj/s5/steps/nnet2/remove_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey).  
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet2/retrain_fast.sh b/egs/wsj/s5/steps/nnet2/retrain_fast.sh
index 8c82c361d82..058af22d608 100755
--- a/egs/wsj/s5/steps/nnet2/retrain_fast.sh
+++ b/egs/wsj/s5/steps/nnet2/retrain_fast.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey).
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet2/retrain_simple2.sh b/egs/wsj/s5/steps/nnet2/retrain_simple2.sh
index 73cfb3d2d49..ded4691cbb7 100755
--- a/egs/wsj/s5/steps/nnet2/retrain_simple2.sh
+++ b/egs/wsj/s5/steps/nnet2/retrain_simple2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #                2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/retrain_tanh.sh b/egs/wsj/s5/steps/nnet2/retrain_tanh.sh
index bc338ec8136..7e53bdd9260 100755
--- a/egs/wsj/s5/steps/nnet2/retrain_tanh.sh
+++ b/egs/wsj/s5/steps/nnet2/retrain_tanh.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet2/train_block.sh b/egs/wsj/s5/steps/nnet2/train_block.sh
index 1e79bb76473..a8c92af13ab 100755
--- a/egs/wsj/s5/steps/nnet2/train_block.sh
+++ b/egs/wsj/s5/steps/nnet2/train_block.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 # this is as train_tanh.sh but for on top of fbank feats-- we have block-diagonal
diff --git a/egs/wsj/s5/steps/nnet2/train_convnet_accel2.sh b/egs/wsj/s5/steps/nnet2/train_convnet_accel2.sh
index 9f3e9234389..01f4a875b09 100755
--- a/egs/wsj/s5/steps/nnet2/train_convnet_accel2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_convnet_accel2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #                2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative.sh b/egs/wsj/s5/steps/nnet2/train_discriminative.sh
index 157c9151075..dd1ea7f2e72 100755
--- a/egs/wsj/s5/steps/nnet2/train_discriminative.sh
+++ b/egs/wsj/s5/steps/nnet2/train_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative2.sh b/egs/wsj/s5/steps/nnet2/train_discriminative2.sh
index 1dc5c1a2c1d..baa63a78eb8 100755
--- a/egs/wsj/s5/steps/nnet2/train_discriminative2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_discriminative2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh b/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh
index 4c0449b9f63..8fc679eeede 100755
--- a/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet2/train_more.sh b/egs/wsj/s5/steps/nnet2/train_more.sh
index 708238f6cbd..0a1d1abe425 100755
--- a/egs/wsj/s5/steps/nnet2/train_more.sh
+++ b/egs/wsj/s5/steps/nnet2/train_more.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey). 
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet2/train_more2.sh b/egs/wsj/s5/steps/nnet2/train_more2.sh
index f83056f2315..2f94080596a 100755
--- a/egs/wsj/s5/steps/nnet2/train_more2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_more2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey). 
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet2/train_multilang2.sh b/egs/wsj/s5/steps/nnet2/train_multilang2.sh
index 41d9bc95059..7ed9a92c348 100755
--- a/egs/wsj/s5/steps/nnet2/train_multilang2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_multilang2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey). 
 #           2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh b/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh
index 27d1313446d..8f082e67c8e 100755
--- a/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh b/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh
index 247d452e714..34d2017a922 100755
--- a/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh
+++ b/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm.sh b/egs/wsj/s5/steps/nnet2/train_pnorm.sh
index 1e47d84f155..9f7ddc0c888 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh
index 1b2cac6b441..be234213130 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #                2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh
index 2ad328f06aa..6542e8a204d 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #           2014  Pegah Ghahremani
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh
index cdb63aa7863..a738d5da89f 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Guoguo Chen
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh
index 497d2826f48..694e3cb85cf 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh
index ca7be971f0d..b8e8a197c6e 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh
index 069dea9ffcc..245276bda55 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh
index f70a9bacbaf..f132bab5fa8 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh
index ada91f2765f..86c51028f61 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).
 #                2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet2/train_tanh.sh b/egs/wsj/s5/steps/nnet2/train_tanh.sh
index a6530ba4dfc..d1f2ad4d920 100755
--- a/egs/wsj/s5/steps/nnet2/train_tanh.sh
+++ b/egs/wsj/s5/steps/nnet2/train_tanh.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh b/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh
index 960cb8a364a..31e19ee27b3 100755
--- a/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh
+++ b/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #      2014  Pegah Ghahremani
diff --git a/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh b/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh
index 1be38c550d7..c78ea92d07d 100755
--- a/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh
+++ b/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet2/update_nnet.sh b/egs/wsj/s5/steps/nnet2/update_nnet.sh
index abcebce273a..a819b6de135 100755
--- a/egs/wsj/s5/steps/nnet2/update_nnet.sh
+++ b/egs/wsj/s5/steps/nnet2/update_nnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).
 #           2013  Xiaohui Zhang
diff --git a/egs/wsj/s5/steps/nnet3/adjust_priors.sh b/egs/wsj/s5/steps/nnet3/adjust_priors.sh
index 1873e6ee698..1dd792bee32 100755
--- a/egs/wsj/s5/steps/nnet3/adjust_priors.sh
+++ b/egs/wsj/s5/steps/nnet3/adjust_priors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./path.sh
 
diff --git a/egs/wsj/s5/steps/nnet3/align.sh b/egs/wsj/s5/steps/nnet3/align.sh
index 948d5a4dc38..2fce0894ee6 100755
--- a/egs/wsj/s5/steps/nnet3/align.sh
+++ b/egs/wsj/s5/steps/nnet3/align.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Brno University of Technology (Author: Karel Vesely)
 #           2013  Johns Hopkins University (Author: Daniel Povey)
 #           2015  Vijayaditya Peddinti
diff --git a/egs/wsj/s5/steps/nnet3/align_lats.sh b/egs/wsj/s5/steps/nnet3/align_lats.sh
index 201cc3552ba..007d274d6b1 100755
--- a/egs/wsj/s5/steps/nnet3/align_lats.sh
+++ b/egs/wsj/s5/steps/nnet3/align_lats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Brno University of Technology (Author: Karel Vesely)
 #           2013  Johns Hopkins University (Author: Daniel Povey)
 #           2015  Vijayaditya Peddinti
diff --git a/egs/wsj/s5/steps/nnet3/chain/align_lats.sh b/egs/wsj/s5/steps/nnet3/chain/align_lats.sh
new file mode 100644
index 00000000000..a8c169429f6
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain/align_lats.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+# Copyright 2012  Brno University of Technology (Author: Karel Vesely)
+#           2013  Johns Hopkins University (Author: Daniel Povey)
+#           2015  Vijayaditya Peddinti
+#           2016  Vimal Manohar
+#           2017  Pegah Ghahremani
+# Apache 2.0
+
+# Computes training alignments using nnet3 DNN, with output to lattices.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+stage=-1
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --self-loop-scale=1.0"
+acoustic_scale=1.0
+post_decode_acwt=10.0
+beam=20
+iter=final
+frames_per_chunk=50
+extra_left_context=0
+extra_right_context=0
+extra_left_context_initial=-1
+extra_right_context_final=-1
+online_ivector_dir=
+graphs_scp=
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.: $0 data/train data/lang exp/nnet4 exp/nnet4_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split${nj}
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || \
+   split_data.sh $data $nj || exit 1;
+
+extra_files=
+if [ ! -z "$online_ivector_dir" ]; then
+  steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1
+  extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
+fi
+
+for f in $srcdir/tree $srcdir/${iter}.mdl $data/feats.scp $lang/L.fst $extra_files; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+cp $srcdir/{tree,${iter}.mdl} $dir || exit 1;
+
+utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
+cp $lang/phones.txt $dir || exit 1;
+## Set up features.  Note: these are different from the normal features
+## because we have one rspecifier that has the features for the entire
+## training set, not separate ones for each batch.
+echo "$0: feature type is raw"
+
+cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
+cp $srcdir/cmvn_opts $dir 2>/dev/null
+
+feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
+
+ivector_opts=
+if [ ! -z "$online_ivector_dir" ]; then
+  ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
+  ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period"
+fi
+
+echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
+
+frame_subsampling_opt=
+if [ -f $srcdir/frame_subsampling_factor ]; then
+  # e.g. for 'chain' systems
+  frame_subsampling_factor=$(cat $srcdir/frame_subsampling_factor)
+  frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor"
+  cp $srcdir/frame_subsampling_factor $dir
+  if [ "$frame_subsampling_factor" -gt 1 ] && \
+     [ "$scale_opts" == "--transition-scale=1.0 --self-loop-scale=0.1" ]; then
+    echo "$0: frame-subsampling-factor is not 1 (so likely a chain system),"
+    echo "...  but the scale opts are the defaults.  You probably want"
+    echo "--scale-opts '--transition-scale=1.0 --self-loop-scale=1.0'"
+    sleep 1
+  fi
+fi
+
+if [ ! -z "$graphs_scp" ]; then
+  if [ ! -f $graphs_scp ]; then
+    echo "Could not find graphs $graphs_scp" && exit 1
+  fi
+  tra="scp:utils/filter_scp.pl $sdata/JOB/utt2spk $graphs_scp |"
+  prog=compile-train-graphs-fsts
+else
+  tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
+  prog=compile-train-graphs
+fi
+
+if [ $stage -le 0 ]; then
+  ## because nnet3-latgen-faster doesn't support adding the transition-probs to the
+  ## graph itself, we need to bake them into the compiled graphs.  This means we can't reuse previously compiled graphs,
+  ## because the other scripts write them without transition probs.
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    $prog --read-disambig-syms=$lang/phones/disambig.int \
+    $scale_opts \
+    $dir/tree $srcdir/${iter}.mdl  $lang/L.fst "$tra" \
+    "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1
+fi
+
+if [ $stage -le 1 ]; then
+  # Warning: nnet3-latgen-faster doesn't support a retry-beam so you may get more
+  # alignment errors (however, it does have a default min-active=200 so this
+  # will tend to reduce alignment errors).
+  # --allow_partial=false makes sure we reach the end of the decoding graph.
+  # --word-determinize=false makes sure we retain the alternative pronunciations of
+  #   words (including alternatives regarding optional silences).
+  #  --lattice-beam=$beam keeps all the alternatives that were within the beam,
+  #    it means we do no pruning of the lattice (lattices from a training transcription
+  #    will be small anyway).
+  $cmd JOB=1:$nj $dir/log/generate_lattices.JOB.log \
+    nnet3-latgen-faster --acoustic-scale=$acoustic_scale $ivector_opts $frame_subsampling_opt \
+    --frames-per-chunk=$frames_per_chunk \
+    --extra-left-context=$extra_left_context \
+    --extra-right-context=$extra_right_context \
+    --extra-left-context-initial=$extra_left_context_initial \
+    --extra-right-context-final=$extra_right_context_final \
+    --beam=$beam --lattice-beam=$beam \
+    --allow-partial=false --word-determinize=false \
+    $srcdir/${iter}.mdl "ark:gunzip -c $dir/fsts.JOB.gz |" \
+    "$feats" "ark:|lattice-copy --acoustic-scale=$post_decode_acwt ark:- ark:- | gzip -c >$dir/lat.JOB.gz" || exit 1;
+fi
+
+echo "$0: done generating lattices from training transcripts."
\ No newline at end of file
diff --git a/egs/wsj/s5/steps/nnet3/chain/build_tree.sh b/egs/wsj/s5/steps/nnet3/chain/build_tree.sh
index 757963f13a7..1177ce492a1 100755
--- a/egs/wsj/s5/steps/nnet3/chain/build_tree.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/build_tree.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
 #  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet3/chain/build_tree_multiple_sources.sh b/egs/wsj/s5/steps/nnet3/chain/build_tree_multiple_sources.sh
index 31fa0aa6b4b..637dccba7ea 100755
--- a/egs/wsj/s5/steps/nnet3/chain/build_tree_multiple_sources.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/build_tree_multiple_sources.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
 #           2017  Vimal Manohar
 #  Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet3/chain/e2e/get_egs_e2e.sh b/egs/wsj/s5/steps/nnet3/chain/e2e/get_egs_e2e.sh
index 570962063be..e4c9c5adf47 100755
--- a/egs/wsj/s5/steps/nnet3/chain/e2e/get_egs_e2e.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/e2e/get_egs_e2e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey)
 # Copyright   2017  Hossein Hadian
diff --git a/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh b/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh
index 07d5ee8cfb8..eda34a62c96 100755
--- a/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2017  Hossein Hadian
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh
index 4dcf40518a2..a8ac94884b6 100755
--- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #
diff --git a/egs/wsj/s5/steps/nnet3/chain/get_model_context.sh b/egs/wsj/s5/steps/nnet3/chain/get_model_context.sh
new file mode 100755
index 00000000000..39b7bbab62f
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain/get_model_context.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+# Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+#             2019  Idiap Research Institute (Author: Srikanth Madikeri)
+#
+# This script computes the total left and right context needed for example (eg)
+# creation from a set of 'chain' models.
+# See the usage message for more information about input and output formats.
+
+# Begin configuration section.
+frame_subsampling_factor=1   # The total frame subsampling factor of the bottom
+                             # + top model, i.e. the relative difference in
+                             # frame rate between the input of the bottom model
+                             # and the output of the top model.  Would normally
+                             # be 3.
+
+langs=default                # the list of languages.  This script checks that
+                             # in the dir (first arg to the script), each
+                             # language exists as $lang.mdl, and it warns if
+                             # any model files appear (which might indicate a
+                             # script bug).
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 2 ]; then
+  cat 1>&2 <<EOF
+Usage: $0 [opts] <model-dir> <output-info-file>
+This script works out some acoustic-context-related information,
+and writes it, long with  the options provided to the script,
+to the <output-info-file> provided.  An example of what
+output-info-file> might contain after this script is called, is:
+langs default
+frame_subsampling_factor 3
+bottom_subsampling_factor 3
+model_left_context 22
+model_right_context 22
+  e.g.: $0 --frame-subsampling-factor 3 
+          --langs 'default' exp/chaina/tdnn1a_sp/0 exp/chaina/tdnn1a_sp/0/info.txt
+ Options:
+     --frame-subsampling-factor    # (default: 1)  Total frame subsampling factor of
+                                   # both models combined, i.e. ratio of
+                                   # frame rate of input features vs.
+                                   # alignments and decoding (e.g. 3).
+     --bottom-subsampling-factor   # (default: 1) Controls the frequency at which
+                                   # the output of the bottom model is
+                                   # evaluated, and the interpretation of frame
+                                   # offsets in the top config file.  Must be a
+                                   # divisor of --frame-subsampling-factor
+     --langs                       # The list of languages (must be in quotes,
+                                   # to be parsed as a single arg).  May be
+                                   # 'default' or e.g. 'english french'
+EOF
+  exit 1;
+fi
+
+
+dir=$1
+info_file=$2
+
+# die on error or undefined variable.
+set -e -u
+
+if [ ! -d $dir ]; then
+  echo 1>&2 "$0: expected directory $dir to exist"
+  exit 1
+fi
+
+if [ -z $langs ]; then
+  echo 1>&2 "$0: list of languages (--langs option) is empty"
+  exit 1
+fi
+
+if  ! [ $frame_subsampling_factor -ge 1 ]; then
+  echo 1>&2 "$0: there was a problem with the options --frame-subsampling-factor=$frame_subsampling_factor"
+  exit 1
+fi
+
+mkdir -p $dir/temp
+
+for lang in $langs; do
+  if [ ! -s $dir/$lang.mdl ]; then
+    echo 1>&2 "$0: expected file $dir/$lang.mdl to exist and be nonempty (check --langs option)"
+    exit 1
+  fi
+  nnet3-am-info $dir/$lang.mdl > $dir/temp/$lang.info
+  this_left_context=$(grep '^left-context:' $dir/temp/$lang.info | awk '{print $2}')
+  this_right_context=$(grep '^right-context:' $dir/temp/$lang.info | awk '{print $2}')
+done
+
+left_context=$this_left_context
+right_context=$this_right_context
+
+
+cat >$info_file <<EOF
+frame_subsampling_factor $frame_subsampling_factor
+langs $langs
+model_left_context $left_context
+model_right_context $right_context
+EOF
+
+
+echo "$0: Finished getting model context"
diff --git a/egs/wsj/s5/steps/nnet3/chain/get_phone_post.sh b/egs/wsj/s5/steps/nnet3/chain/get_phone_post.sh
index 9925403a3ac..60ec1598a5a 100755
--- a/egs/wsj/s5/steps/nnet3/chain/get_phone_post.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/get_phone_post.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
 #  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/nnet3/chain/make_weighted_den_fst.sh b/egs/wsj/s5/steps/nnet3/chain/make_weighted_den_fst.sh
index 3b6371168ce..f0da3d0df3a 100755
--- a/egs/wsj/s5/steps/nnet3/chain/make_weighted_den_fst.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/make_weighted_den_fst.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Vimal Manohar
 #           2017 Pegah Ghahremani
diff --git a/egs/wsj/s5/steps/nnet3/chain/multilingual/combine_egs.sh b/egs/wsj/s5/steps/nnet3/chain/multilingual/combine_egs.sh
index 410a8710b2f..58d1c1ca74f 100755
--- a/egs/wsj/s5/steps/nnet3/chain/multilingual/combine_egs.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/multilingual/combine_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017     Pegah Ghahremani
 #           2017-18  Vimal Manohar
diff --git a/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh b/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh
index f5340fb4611..61d448cb9bf 100755
--- a/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh
+++ b/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # THIS SCRIPT IS DEPRECATED, see ./train.py
 
diff --git a/egs/wsj/s5/steps/nnet3/chain2/combine_egs.sh b/egs/wsj/s5/steps/nnet3/chain2/combine_egs.sh
new file mode 100755
index 00000000000..da479cca5aa
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/combine_egs.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+#
+# Copyright 2020 Srikanth Madikeri (Idiap Research Institute)
+# Apache 2.0
+#
+# This script combines egs folder generated with chain2 recipes to prepare a single egs folder
+# for multilingual training
+
+echo "$0 $@"  # Print the command line for logging
+. ./cmd.sh
+set -e
+
+# Begin configuration section
+cmd=
+block_size=256
+stage=0
+frames_per_job=1500000  
+left_context=13
+right_context=9
+# TODO: add lang2weight support
+lang2weight=            # array of weights one per input languge to scale example's output
+                        # w.r.t its input language during training.
+lang_list=
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+[[ -f local.conf ]] && . local.conf
+
+if [ $# -lt 3 ]; then
+  cat <<EOF
+  This script generates examples for multilingual LF-MMI training.
+  The input egs directories are generated with chain2 get_egs scripts.
+
+  Usage: $0 [opts] <num-input-langs,N> <lang1-egs-dir> ...<langN-egs-dir> <multilingual-egs-dir>
+   e.g.: $0 [opts] 2 exp/lang1/egs exp/lang2/egs exp/multi/egs
+
+  Options:
+      --cmd (utils/run.pl|utils/queue.pl <queue opts>)  # how to run jobs.
+EOF
+  exit 1;
+fi
+
+num_langs=$1
+if [ $# != $[$num_langs+2] ]; then
+  echo "$0: num of input example dirs provided is not compatible with num_langs $num_langs."
+  echo "Usage:$0 [opts] <num-input-langs,N> <lang1-egs-dir> ...<langN-egs-dir> <multilingual-egs-dir>"
+  echo "Usage:$0 [opts] 2 exp/lang1/egs exp/lang2/egs exp/multi/egs"
+  exit 1;
+fi
+megs_dir=${@: -1} # multilingual directory
+mkdir -p $megs_dir
+shift 1
+args=("$@")
+
+required="info.txt train.scp train_subset.scp heldout_subset.scp"
+train_scp_list=
+train_diagnostic_scp_list=
+valid_diagnostic_scp_list=
+combine_scp_list=
+
+# we don't copy lang because there wont be a single lang
+check_params="feat_dim left_context right_context left_context_initial right_context_final ivector_dim" 
+ivec_dim=`fgrep ivector_dim ${args[0]}/info.txt | awk '{print $2}'`
+# if [ $ivec_dim -ne 0 ];then check_params="$check_params final.ie.id"; fi
+
+echo "dir_type randomized_chain_egs" > $megs_dir/info.txt
+for param in $check_params frames_per_chunk; do
+    awk "/^$param/" ${args[0]}/info.txt
+    
+done >> $megs_dir/info.txt
+# the arguments to grep make sure we only grep the line that starts with excatly the word lang and we take the first such line
+#lang_list=$(for i in `seq 0 $num_langs`; do awk '/^lang/' ${args[0]}/info.txt | awk '{print $2}'; done)
+echo "langs ${lang_list[@]}" >> $megs_dir/info.txt
+
+tot_num_archives=0
+tot_num_scps=0
+for lang in $(seq 0 $[$num_langs-1]);do
+  multi_egs_dir[$lang]=${args[$lang]}
+  for f in $required; do
+    if [ ! -f ${multi_egs_dir[$lang]}/$f ]; then
+      echo "$0: no such file ${multi_egs_dir[$lang]}/$f." && exit 1;
+    fi
+  done
+  num_chunks=$(fgrep num_chunks ${multi_egs_dir[$lang]}/info.txt | awk '{print $2}')
+  curr_frames_per_chunk_avg=`awk '/^frames_per_chunk_avg/  {print $2;}' ${multi_egs_dir[$lang]}/info.txt`
+  tot_num_archives=$[tot_num_archives+((num_chunks*curr_frames_per_chunk_avg)/frames_per_job+1)]
+  tot_num_scps=$[tot_num_scps+num_scps]
+  train_diagnostic_scp_list="$train_diagnostic_scp_list ${args[$lang]}/train_subset.scp"
+  valid_diagnostic_scp_list="$valid_diagnostic_scp_list ${args[$lang]}/valid_subset.scp"
+  for f in $check_params; do
+    if [ `grep -c "^$f" ${multi_egs_dir[$lang]}/info.txt` -ge 1 ]; then
+      f1=$(fgrep -m 1 $f $megs_dir/info.txt | awk '{print $2}')
+      f2=$(fgrep -m 1 $f ${multi_egs_dir[$lang]}/info.txt | awk '{print $2}')
+      if [ "$f1" != "$f2" ]  ; then
+        echo "$0: mismatch for $f in $megs_dir vs. ${multi_egs_dir[$lang]}($f1 vs. $f2)."
+        exit 1;
+      fi
+    else
+      echo "$0: parameter $f does not exist in $megs_dir or ${multi_egs_dir[$lang]}/$f ."
+    fi
+  done
+done
+num_scp_files=$tot_num_archives
+echo "num_scp_files $num_scp_files" >> $megs_dir/info.txt
+sed_cmd=
+for lang in $(seq 0 $[$num_langs-1]);do
+    lang_name=${lang_list[$lang]}
+    weight=`echo $lang2weight | tr ',' ' ' | cut -d ' ' -f$[$lang+1]`
+    sed_cmd="$sed_cmd s/.*lang=${lang_name}.*/$weight/;"
+done
+
+dir=$megs_dir/
+if [ $stage -le 0 ]; then
+    echo "$0: Creating $num_scp_files scp files."
+    for lang in $(seq 0 $[$num_langs-1]);do
+        lang_name=${lang_list[$lang]}
+        [ ! -d $dir/temp_${lang_name}/ ] && mkdir $dir/temp_${lang_name}/
+        # randomize, append language name as a query and split input scp into $num_blocks blocks
+        utils/shuffle_list.pl ${args[$lang]}/train.scp | \
+            awk -v lang_name="$lang_name" \
+                '{if ($1 !~ /?/){$1=$1"?lang=" lang_name; print;} else {$1=$1"&lang=" lang_name; print;}}' > $dir/temp_${lang_name}/train.shuffled.scp 
+            utils/split_scp.pl $dir/temp_${lang_name}/train.shuffled.scp \
+                $(for i in $(seq $num_scp_files); do echo $dir/temp_${lang_name}/train.$i.scp; done) || exit 1
+        # split each block into sub-blocks
+        for i in `seq $num_scp_files`; do
+            utils/split_scp.pl <(utils/shuffle_list.pl $dir/temp_${lang_name}/train.$i.scp) \
+                $(for j in $(seq $num_scp_files); do echo $dir/temp_${lang_name}/train.$i.$j.scp; done)
+        done
+    done
+
+    for j in `seq $num_scp_files`; do
+        input_list=$(for lang in $(seq 0 $[$num_langs-1]);do lang_name=${lang_list[$lang]}; echo $dir/temp_${lang_name}/train.*.$j.scp; done)
+        # the shuffling is probably not required because we will do it once again before
+        # merging examples
+        cat $input_list | utils/shuffle_list.pl > $dir/train.$j.scp
+        sed "$sed_cmd" < <(awk '{print $1}' $dir/train.$j.scp) > $dir/train.weight.$j.ark.col2
+        paste -d ' ' <(awk '{print $1}' $dir/train.$j.scp) $dir/train.weight.$j.ark.col2 > $dir/train.weight.$j.ark
+        rm $dir/train.weight.$j.ark.col2
+    done
+fi
+
+if [ $stage -le 1 ]; then
+    for subset_file  in train_subset heldout_subset; do
+        for lang in $(seq 0 $[$num_langs-1]);do
+            lang_name=${lang_list[$lang]}
+            cat ${args[$lang]}/${subset_file}.scp  | \
+            awk -v lang_name="$lang_name" \
+                '{if ($1 !~ /?/){$1=$1"?lang=" lang_name; print;} else {$1=$1"&lang=" lang_name; print;}}' 
+        done > $dir/${subset_file}.scp
+        sed "$sed_cmd" < <(awk '{print $1}' $dir/${subset_file}.scp) > $dir/${subset_file}.weight.ark.col2
+        paste -d ' ' <(awk '{print $1}' $dir/${subset_file}.scp) $dir/${subset_file}.weight.ark.col2 > $dir/${subset_file}.weight.ark
+        rm $dir/${subset_file}.weight.ark.col2
+    done
+fi
+
+if [ $stage -le 2 ]; then
+    echo "$0: Clean up"
+    for lang in $(seq 0 $[$num_langs-1]);do
+        lang_name=${lang_list[$lang]}
+        rm -r $dir/temp_${lang_name}/
+    done
+fi
+
+echo "$0: Finished preparing multilingual training example."
diff --git a/egs/wsj/s5/steps/nnet3/chain2/compute_preconditioning_matrix.sh b/egs/wsj/s5/steps/nnet3/chain2/compute_preconditioning_matrix.sh
new file mode 100755
index 00000000000..c867d6490b1
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/compute_preconditioning_matrix.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+# Copyright 2019 Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
+
+rand_prune=4.0
+nj=8
+cmd=run.pl
+lda_acc_opts=
+lda_transform_opts=
+lda_sum_opts=
+egs_opts=
+stage=0
+use_scp=true
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+    echo "Usage: $0 [opts] <model> <egs-folder> <lda-output-folder>" 
+    echo "e.g. $0 exp/chain/tdnn1a_sp/configs/init.raw exp/chain/tdnn1a_sp/egs/ exp/chain/tdnn1a_sp"
+    echo ""
+    echo "This script computes pre-conditioning matrix given the model (usually init.raw file from the config folder),"
+    echo "egs-folder which has train.*.scp files to be used to train LDA, and"
+    echo "lda-output-folder that will contain lda.mat file."
+    echo ""
+    echo "Main options (for others, see top of script file)"
+    echo "  --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
+    echo "  --nj <int;8> # number of jobs. this is also the number of train.*.scp files in egs/"
+    echo "  --lda-acc-opts # options to be passed to nnet3-chain-acc-lda-stats"
+    echo "  --lda-sum-opts # options to be passed to sum-lda-accs"
+    echo "  --lda-transform-opts # options to be passed to nnet-get-feature-transform"
+    exit 1;
+fi
+
+model=$1
+egs=$2
+ldafolder=$3
+
+if [ ! -d $ldafolder ]; then
+    echo "Creating $ldafolder"
+    mkdir -p $ldafolder || exit 1
+fi
+
+
+if [ $stage -le 0 ]; then
+        if $use_scp; then
+            egs_rspecifier="ark:nnet3-chain-copy-egs $egs_opts scp:$egs/train.JOB.scp ark:- |"
+        else
+            egs_rspecifier="ark:nnet3-chain-copy-egs $egs_opts ark:$egs/train.JOB.ark ark:- |"
+        fi
+        echo "$0: Accumulating LDA stats"
+        $cmd JOB=1:$nj $ldafolder/log/acc.JOB.log \
+                nnet3-chain-acc-lda-stats $lda_acc_opts --rand-prune=${rand_prune} \
+                $model "${egs_rspecifier}" \
+                $ldafolder/JOB.lda_stats || exit 1
+fi
+
+if [ $stage -le 1 ]; then
+    echo "$0: Summing LDA stats"
+    lda_stats_files=
+    for i in `seq 1 $nj`; do
+        lda_stats_files="$lda_stats_files $ldafolder/$i.lda_stats"
+    done
+
+    $cmd $ldafolder/log/sum_transform_stats.log \
+        sum-lda-accs $lda_sum_opts $ldafolder/lda_stats $lda_stats_files || exit 1
+    rm $lda_stats_files
+fi
+
+if [ $stage -le 2 ]; then
+    echo "$0: Computing LDA transform"
+    $cmd $ldafolder/log/get_transform.log \
+        nnet-get-feature-transform $lda_transform_opts \
+        $ldafolder/lda.mat $ldafolder/lda_stats || exit 1
+
+    rm $ldafolder/lda_stats
+    ln -rs $ldafolder/lda.mat $ldafolder/configs/lda.mat
+fi
+
+echo "$0: Finished computing LDA transform"
+exit 0;
diff --git a/egs/wsj/s5/steps/nnet3/chain2/get_raw_egs.sh b/egs/wsj/s5/steps/nnet3/chain2/get_raw_egs.sh
new file mode 100755
index 00000000000..e24396d9275
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/get_raw_egs.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+
+# Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
+#
+# This script dumps 'raw' egs for 'chain' training.  What 'raw' means in this
+# context is that they need to be further processed to merge egs of the same
+# speaker, etc.  So they won't be directly consumed by training, but by
+# by the script process_egs.sh.
+
+
+
+# Begin configuration section.
+cmd=run.pl
+frames_per_chunk=150  # Number of frames (at feature frame rate) per example.  You
+                      # are allowed to make this a comma-separated list,
+                      # e.g. 150,110,100, meaning that a range of eg widths are
+                      # allowed (but this may not be as helpful when using our
+                      # adaptation framework, since it will tend to split up
+                      # utterances into separate minibatches.
+
+frame_subsampling_factor=3 # frames-per-second of features we train on divided
+                           # by frames-per-second at output of chain model
+alignment_subsampling_factor=3 # frames-per-second of input alignments divided
+                               # by frames-per-second at output of chain model
+constrained=true  # 'constrained=true' is the traditional setup; 'constrained=false'
+                  # gives you the 'unconstrained' egs creation in which the time
+                  # boundaries are not enforced inside chunks.
+left_context=0    # amount of left-context per eg (i.e. extra frames of input
+                  # features not present in the output supervision).  Would
+                  # normally depend on the model context, plus desired 'extra'
+                  # context (e.g. for LSTM).
+right_context=0   # amount of right-context per eg.
+
+left_context_initial=-1   # if >=0, right-context for last chunk of an utterance.
+right_context_final=-1     # if >=0, right-context for last chunk of an utterance.
+
+compress=true   # set this to false to disable compression (e.g. if you want to
+                # see whether results are affected).  Note: if the features on
+                # disk were originally compressed, nnet3-chain-get-egs will dump
+                # compressed features regardless (since there is no further loss
+                # in that case).
+
+lang=default   # the language name.  will usually be 'default' in single-language
+               # setups.  Requires because it's part of the name of some of
+               # the input files.
+
+right_tolerance=  # chain right tolerance == max label delay.  Only relevant if
+                  # constrained=true.  At frame rate of alignments.  Code
+                  # default is 5.
+left_tolerance=   # chain left tolerance (versus alignments from lattices).
+                  # Only relevant if constrained=true.  At frame rate of
+                  # alignments.  Code default is 5.
+
+stage=0
+max_jobs_run=40         # This should be set to the maximum number of
+                        # nnet3-chain-get-egs jobs you are comfortable to run in
+                        # parallel; you can increase it if your disk speed is
+                        # greater and you have more machines.
+
+
+srand=0         # rand seed for nnet3-chain-get-egs, nnet3-chain-copy-egs and nnet3-chain-shuffle-egs
+online_ivector_dir=  # can be used if we are including speaker information as iVectors.
+cmvn_opts=  # can be used for specifying CMVN options, if feature type is not lda (if lda,
+            # it doesn't make sense to use different options than were used as input to the
+            # LDA transform).  This is used to turn off CMVN in the online-nnet experiments.
+
+lattice_lm_scale=     # If supplied, the graph/lm weight of the lattices will be
+                      # used (with this scale) in generating supervisions
+                      # This is 0 by default for conventional supervised training,
+                      # but may be close to 1 for the unsupervised part of the data
+                      # in semi-supervised training. The optimum is usually
+                      # 0.5 for unsupervised data.
+lattice_prune_beam=        # If supplied, the lattices will be pruned to this beam,
+                           # before being used to get supervisions.
+
+acwt=0.1   # For pruning.  Should be, for instance, 1.0 for chain lattices.
+deriv_weights_scp=
+
+# end configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ]; then
+  echo "Usage: $0 [opts] <data> <chain-dir> <lattice-dir> <raw-egs-dir>"
+  echo " e.g.: $0 data/train exp/chain/tdnn1a_sp exp/tri3_lats exp/chain/tdnn1a_sp/raw_egs"
+  echo ""
+  echo "From <chain-dir>, 0/<lang>.mdl (for the transition-model), <lang>.tree (the tree), "
+  echo "   den_fsts/<lang>.den.fst, and den_fsts/<lang>.normalization.fst (the normalization "
+  echo "   FST, derived from the denominator FST echo are read (where <lang> is specified"
+  echo "   by the --lang option (its default values is 'default')"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options (alternative to this"
+  echo "                                                   # command line)"
+  echo "  --max-jobs-run <max-jobs-run>                    # The maximum number of jobs you want to run in"
+  echo "                                                   # parallel (increase this only if you have good disk and"
+  echo "                                                   # network speed).  default=6"
+  echo "  --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --frame-subsampling-factor <factor;3>            # factor by which num-frames at nnet output is reduced "
+  echo "  --lang       <language-name;'default'>           # Name of the language, determines names of some inputs."
+  echo "  --frames-per-chunk <frames;150>                  # number of supervised frames per chunk on disk"
+  echo "                                                   # ... may be a comma separated list, but we advise a single"
+  echo "                                                   #  number in most cases, due to interaction with the need "
+  echo "                                                   # to group egs from the same speaker into groups."
+  echo "  --left-context <int;0>                           # Number of frames on left side to append for feature input"
+  echo "  --right-context <int;0>                          # Number of frames on right side to append for feature input"
+  echo "  --left-context-initial <int;-1>                  # Left-context for first chunk of an utterance"
+  echo "  --right-context-final <int;-1>                   # Right-context for last chunk of an utterance"
+  echo "  --lattice-lm-scale <float>                       # If supplied, the graph/lm weight of the lattices will be "
+  echo "                                                   # used (with this scale) in generating supervisions"
+  echo "  --lattice-prune-beam <float>                     # If supplied, the lattices will be pruned to this beam, "
+  echo "                                                   # before being used to get supervisions."
+  echo "  --acwt <float;0.1>                               # Acoustic scale -- should be acoustic scale at which the "
+  echo "                                                   # supervision lattices are to be interpreted.  Affects pruning"
+  echo "  --deriv-weights-scp <str>                        # If supplied, adds per-frame weights to the supervision."
+  echo "                                                   # (e.g., might be relevant for unsupervised training)."
+  echo "  --stage <stage|0>                                # Used to run this script from somewhere in"
+  echo "                                                   # the middle."
+  exit 1;
+fi
+
+data=$1
+chaindir=$2
+latdir=$3
+dir=$4
+
+tree=$chaindir/${lang}.tree
+trans_mdl=$chaindir/init/${lang}.mdl  # contains the transition model and a nnet, but
+                                   # we won't be making use of the nnet part.
+normalization_fst=$chaindir/den_fsts/${lang}.normalization.fst
+den_fst=$chaindir/den_fsts/${lang}.den.fst
+
+[ ! -z "$online_ivector_dir" ] && \
+  extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
+
+for f in $data/feats.scp $latdir/lat.1.gz $latdir/final.mdl \
+         $tree $normalization_fst $den_fst $extra_files; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+if [ ! -f $trans_mdl ]; then
+    trans_mdl=$chaindir/init/${lang}_trans.mdl
+    if [ ! -f $trans_mdl ]; then
+        echo "$0: cannot find transition model in $chaindir/init/${lang}_trans.mdl or $trans_mdl"
+        exit 1
+    fi
+fi
+
+nj=$(cat $latdir/num_jobs) || exit 1
+if [ -f $latdir/per_utt ]; then
+  sdata=$data/split${nj}utt
+  utils/split_data.sh --per-utt $data $nj
+else
+  sdata=$data/split$nj
+  utils/split_data.sh $data $nj
+fi
+
+mkdir -p $dir/log  $dir/misc
+
+cp $tree $dir/misc/
+copy-transition-model $trans_mdl $dir/misc/${lang}.trans_mdl
+cp $normalization_fst $den_fst $dir/misc/
+cp $data/utt2spk $dir/misc/
+if [ -f $data/utt2uniq ]; then
+  cp $data/utt2uniq $dir/misc/
+elif [ -f $dir/misc/utt2uniq ]; then
+  rm $dir/misc/utt2uniq
+fi
+
+if [ -e $dir/storage ]; then
+  # Make soft links to storage directories, if distributing this way..  See
+  # utils/create_split_dir.pl.
+  echo "$0: creating data links"
+  utils/create_data_link.pl $(for x in $(seq $nj); do echo $dir/cegs.$x.ark; done)
+fi
+
+
+lats_rspecifier="ark:gunzip -c $latdir/lat.JOB.gz |"
+if [ ! -z $lattice_prune_beam ]; then
+  if [ "$lattice_prune_beam" == "0" ] || [ "$lattice_prune_beam" == "0.0" ]; then
+    lats_rspecifier="$lats_rspecifier lattice-1best --acoustic-scale=$acwt ark:- ark:- |"
+  else
+    lats_rspecifier="$lats_rspecifier lattice-prune --acoustic-scale=$acwt --beam=$lattice_prune_beam ark:- ark:- |"
+  fi
+fi
+
+egs_opts="--long-key=true --left-context=$left_context --right-context=$right_context --num-frames=$frames_per_chunk --frame-subsampling-factor=$frame_subsampling_factor --compress=$compress"
+[ $left_context_initial -ge 0 ] && egs_opts="$egs_opts --left-context-initial=$left_context_initial"
+[ $right_context_final -ge 0 ] && egs_opts="$egs_opts --right-context-final=$right_context_final"
+
+[ ! -z "$deriv_weights_scp" ] && egs_opts="$egs_opts --deriv-weights-rspecifier=scp:$deriv_weights_scp"
+
+
+chain_supervision_all_opts="--lattice-input=true --frame-subsampling-factor=$alignment_subsampling_factor"
+[ ! -z $right_tolerance ] && \
+  chain_supervision_all_opts="$chain_supervision_all_opts --right-tolerance=$right_tolerance"
+
+[ ! -z $left_tolerance ] && \
+  chain_supervision_all_opts="$chain_supervision_all_opts --left-tolerance=$left_tolerance"
+
+if ! $constrained; then
+  # e2e supervision
+  chain_supervision_all_opts="$chain_supervision_all_opts --convert-to-pdfs=false"
+  egs_opts="$egs_opts --transition-model=$chaindir/0.trans_mdl"
+fi
+
+if [ ! -z "$lattice_lm_scale" ]; then
+  chain_supervision_all_opts="$chain_supervision_all_opts --lm-scale=$lattice_lm_scale"
+
+  normalization_fst_scale=$(perl -e "
+  if ($lattice_lm_scale >= 1.0 || $lattice_lm_scale < 0) {
+    print STDERR \"Invalid --lattice-lm-scale $lattice_lm_scale\"; exit(1);
+  }
+  print (1.0 - $lattice_lm_scale);") || exit 1
+  egs_opts="$egs_opts --normalization-fst-scale=$normalization_fst_scale"
+fi
+
+if [ ! -z "$online_ivector_dir" ]; then
+  ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
+  ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period"
+else
+  ivector_opts=""
+fi
+
+feats="scp:$sdata/JOB/feats.scp"
+if [ ! -z $cmvn_opts ]; then
+    if [ ! -f $data/cmvn.scp ]; then
+        echo "Cannot find $data/cmvn.scp. But cmvn_opts=$cmvn_opts"
+        exit 1
+    fi
+    if [ `echo $cmvn_opts | fgrep -c true` -eq 1 ]; then
+        feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
+    fi
+fi
+
+if [ $stage -le 0 ]; then
+  $cmd --max-jobs-run $max_jobs_run JOB=1:$nj $dir/log/get_egs.JOB.log \
+       lattice-align-phones --replace-output-symbols=true $latdir/final.mdl \
+       "$lats_rspecifier" ark:- \| \
+       chain-get-supervision $chain_supervision_all_opts \
+       $dir/misc/${lang}.tree $dir/misc/${lang}.trans_mdl ark:- ark:- \| \
+       nnet3-chain-get-egs $ivector_opts --srand=\$[JOB+$srand] $egs_opts \
+       "$normalization_fst" "$feats" ark,s,cs:- \
+       ark,scp:$dir/cegs.JOB.ark,$dir/cegs.JOB.scp || exit 1;
+fi
+
+
+if [ $stage -le 1 ]; then
+  num_input_frames=$(steps/nnet2/get_num_frames.sh $data)
+  frames_and_chunks=$(for n in $(seq $nj); do cat $dir/log/get_egs.$n.log; done | \
+           perl -e '$nc=0; $nf=0; while(<STDIN>) {
+     if (m/Split .+ into (\d+) chunks/) { $this_nc = $1;  }
+     if (m/Average chunk length was (\d+.\d+) frames/) { $nf += $1 * $this_nc;  $nc += $this_nc; }
+    } print "$nf $nc"; ')
+    echo $frames_and_chunks
+  num_chunks=$(echo $frames_and_chunks | awk '{print $2}')
+  frames_per_chunk_avg=$[num_input_frames/num_chunks]
+  feat_dim=$(feat-to-dim scp:$sdata/1/feats.scp -)
+  num_leaves=$(tree-info $tree | awk '/^num-pdfs/ {print $2}')
+  if [ $left_context_initial -lt 0 ]; then
+    left_context_initial=$left_context
+  fi
+  if [ $right_context_final -lt 0 ]; then
+    right_context_final=$right_context
+  fi
+
+  cat >$dir/info.txt <<EOF
+dir_type raw_chain_egs
+num_input_frames $num_input_frames
+num_chunks $num_chunks
+lang $lang
+feat_dim $feat_dim
+num_leaves $num_leaves
+frames_per_chunk $frames_per_chunk
+frames_per_chunk_avg $frames_per_chunk_avg
+left_context $left_context
+left_context_initial $left_context_initial
+right_context $right_context
+right_context_final $right_context_final
+EOF
+
+  if [ ! -z "$online_ivector_dir" ]; then
+      ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1;
+      echo $ivector_dim > $dir/info/ivector_dim
+      echo ivector_dim $ivector_dim >> $dir/info.txt
+      echo final.ie.id `cat $online_ivector_dir/final.ie.id` >> $dir/info.txt
+      ivector_id=`steps/nnet2/get_ivector_id.sh $online_ivector_dir || exit 1`
+      echo ivector_id $ivector_id
+      ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
+      echo ivector_period $ivector_period
+      ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period"
+  else
+      ivector_opts=""
+  fi
+
+  if ! cat $dir/info.txt | awk '{if (NF == 1) exit(1);}'; then
+    echo "$0: we failed to obtain at least one of the fields in $dir/info.txt"
+    exit 1
+  fi
+fi
+
+
+if [ $stage -le 2 ]; then
+  for n in $(seq $nj); do cat $dir/cegs.$n.scp; done > $dir/all.scp
+fi
+
+echo "$0: Finished preparing raw egs"
diff --git a/egs/wsj/s5/steps/nnet3/chain2/internal/get_best_model.sh b/egs/wsj/s5/steps/nnet3/chain2/internal/get_best_model.sh
new file mode 100755
index 00000000000..8cc46a006e5
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/internal/get_best_model.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
+# This script is the equivalent of get_successful_models function in the python library.
+# It takes a list of models and returns either the best model (the deafult) or a list of
+# models to average.
+
+models_to_average=false
+difference_threshold=1.0
+output=output
+
+
+# echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# -le 1 ]; then
+    echo "Usage: $0: [options] <model-1-log> <model-2-log> .... <model-N-log>"
+    echo "where <model-n> is one of the n models to choose from."
+    echo ""
+    echo "--models-to-average: when true, returns the models to be averaged rather than the single best model"
+    echo "--difference-threshold: used to reject models. models with objf < max-value - difference_threshold are rejected"
+    echo "--output: the objf of the this output layer is used for model selection"
+    echo ""
+    exit 1;
+fi
+
+if ! $models_to_average; then
+    if [ $# -eq 1 ]; then
+        basename $1 | tr '.' ' ' | awk '{ print $(NF-1) }'
+        exit 0;
+    fi
+    model_log_list=$(for arg in $*; do echo $arg; done)
+    first_log=$1
+    log_line=`fgrep -m 1 "Overall average objective function for '$output' is" $first_log`
+    colno=`echo $log_line | cut -d '=' -f1 | wc -w`
+    ((colno+=2))
+    filename=$(fgrep -m 1 "Overall average objective function for '$output' is" $model_log_list | \
+        cut -d ' ' -f1,$colno | tr ':' ' ' | \
+        awk '{print $1,$3}' | \
+        sort -k2,2 -g | tail -1 | cut -d ' ' -f1)
+    basename $filename | tr '.' ' ' | awk '{ print $(NF-1) }'
+fi
diff --git a/egs/wsj/s5/steps/nnet3/chain2/internal/get_train_schedule.py b/egs/wsj/s5/steps/nnet3/chain2/internal/get_train_schedule.py
new file mode 100755
index 00000000000..a7bf72c2ad1
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/internal/get_train_schedule.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+
+# Copyright 2019    Johns Hopkins University (author: Daniel Povey)
+# Copyright         Hossein Hadian
+# Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  
+
+
+# Apache 2.0.
+
+""" This script outputs information about a neural net training schedule,
+    to be used by ../train.sh, in the form of lines that can be selected
+    and sourced by the shell.
+"""
+
+import argparse
+import sys
+
+sys.path.insert(0, 'steps')
+import libs.nnet3.train.common as common_train_lib
+import libs.common as common_lib
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""Output training schedule information to be consumed by ../train.sh""",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument("--frame-subsampling-factor", type=int, default=3,
+                        help="""Frame subsampling factor for the combined model
+                        (bottom+top), will normally be 3.  Required here in order
+                        to deal with frame-shifted versions of the input.""")
+    parser.add_argument("--initial-effective-lrate",
+                        type=float,
+                        dest='initial_effective_lrate', default=0.001,
+                        help="""Effective learning rate used on the first iteration,
+                        determines schedule via geometric interpolation with
+                        --final-effective-lrate.   Actual learning rate is
+                        this times the num-jobs on that iteration.""")
+    parser.add_argument("--final-effective-lrate", type=float,
+                        dest='final_effective_lrate', default=0.0001,
+                        help="""Learning rate used on the final iteration, see
+                        --initial-effective-lrate for more documentation.""")
+    parser.add_argument("--num-jobs-initial", type=int, default=1,
+                        help="""Number of parallel neural net jobs to use at
+                        the start of training""")
+    parser.add_argument("--num-jobs-final", type=int, default=1,
+                        help="""Number of parallel neural net jobs to use at
+                        the end of training.  Would normally
+                        be >= --num-jobs-initial""")
+    parser.add_argument("--num-epochs", type=float, default=4.0,
+                        help="""The number of epochs to train for.
+                        Note: the 'real' number of times we see each
+                        utterance is this number times --frame-subsampling-factor
+                        (to cover frame-shifted copies of the data), times
+                        the value of --num-repeats given to process_egs.sh,
+                        times any factor arising from data augmentation.""")
+    parser.add_argument("--dropout-schedule", type=str,
+                        help="""Use this to specify the dropout schedule (how the dropout probability varies
+                        with time, 0 == no dropout).  You specify a piecewise
+                        linear function on the domain [0,1], where 0 is the
+                        start and 1 is the end of training; the
+                        function-argument (x) rises linearly with the amount of
+                        data you have seen, not iteration number (this improves
+                        invariance to num-jobs-{initial-final}).  E.g. '0,0.2,0'
+                        means 0 at the start; 0.2 after seeing half the data;
+                        and 0 at the end.  You may specify the x-value of
+                        selected points, e.g.  '0,0.2@0.25,0' means that the 0.2
+                        dropout-proportion is reached a quarter of the way
+                        through the data.  The start/end x-values are at
+                        x=0/x=1, and other unspecified x-values are interpolated
+                        between known x-values.  You may specify different rules
+                        for different component-name patterns using
+                        'pattern1=func1 pattern2=func2', e.g. 'relu*=0,0.1,0
+                        lstm*=0,0.2,0'.  More general should precede less
+                        general patterns, as they are applied sequentially.""")
+
+    parser.add_argument("--num-scp-files", type=int, default=0, required=True,
+                        help="""The number of .scp files in the egs dir.""")
+    parser.add_argument("--schedule-out", type=str, required=True,
+                        help="""Output file containing the training schedule.  The output
+                        is lines, one per training iteration.
+                        Each line (one per iteration) is a list of ;-separated commands setting shell
+                        variables.  Currently the following variables are set:
+                        iter, num_jobs, inv_num_jobs, scp_indexes, frame_shifts, dropout_opt, lrate.
+                        """)
+
+    print(sys.argv, file=sys.stderr)
+    args = parser.parse_args()
+
+    return args
+
+def get_schedules(args):
+    num_scp_files_expanded = args.num_scp_files * args.frame_subsampling_factor
+    num_scp_files_to_process = int(args.num_epochs * num_scp_files_expanded)
+    num_scp_files_processed = 0
+    num_iters = ((num_scp_files_to_process * 2)
+                 // (args.num_jobs_initial + args.num_jobs_final))
+
+    with open(args.schedule_out, 'w', encoding='latin-1') as ostream:
+        for iter in range(num_iters):
+            current_num_jobs = int(0.5 + args.num_jobs_initial
+                                   + (args.num_jobs_final - args.num_jobs_initial)
+                                   * float(iter) / num_iters)
+            # as a special case, for iteration zero we use just one job
+            # regardless of the --num-jobs-initial and --num-jobs-final.  This
+            # is because the model averaging does not work reliably for a
+            # freshly initialized model.
+            # if iter == 0:
+            #     current_num_jobs = 1
+
+            lrate = common_train_lib.get_learning_rate(iter, current_num_jobs,
+                                                       num_iters,
+                                                       num_scp_files_processed,
+                                                       num_scp_files_to_process,
+                                                       args.initial_effective_lrate,
+                                                       args.final_effective_lrate)
+
+            if args.dropout_schedule == "":
+                args.dropout_schedule = None
+            dropout_edit_option = common_train_lib.get_dropout_edit_option(
+                args.dropout_schedule,
+                float(num_scp_files_processed) / max(1, (num_scp_files_to_process - args.num_jobs_final)),
+                iter)
+
+            frame_shifts = []
+            egs = []
+            for job in range(1, current_num_jobs + 1):
+                # k is a zero-based index that we will derive the other indexes from.
+                k = num_scp_files_processed + job - 1
+                # work out the 1-based scp index.
+                scp_index = (k % args.num_scp_files) + 1
+                # previous : frame_shift = (k/num_scp_files) % frame_subsampling_factor
+                frame_shift = ((scp_index + k // args.num_scp_files)
+                               % args.frame_subsampling_factor)
+
+                # Instead of frame shifts like [0, 1, 2], we make them more like
+                # [0, 1, -1].  This is clearer in intent, and keeps the
+                # supervision starting at frame zero, which IIRC is a
+                # requirement somewhere in the 'chaina' code.
+#               TODO: delete this section if no longer useful
+                # if frame_shift > (args.frame_subsampling_factor // 2):
+                #     frame_shift = frame_shift - args.frame_subsampling_factor
+
+                frame_shifts.append(str(frame_shift))
+                egs.append(str(scp_index))
+
+
+            print("""iter={iter}; num_jobs={nj}; inv_num_jobs={nj_inv}; scp_indexes=(pad {indexes}); frame_shifts=(pad {shifts}); dropout_opt="{opt}"; lrate={lrate}""".format(
+                iter=iter, nj=current_num_jobs, nj_inv=(1.0 / current_num_jobs),
+                indexes = ' '.join(egs), shifts=' '.join(frame_shifts),
+                opt=dropout_edit_option, lrate=lrate), file=ostream)
+            num_scp_files_processed = num_scp_files_processed + current_num_jobs
+
+
+def main():
+    args = get_args()
+    get_schedules(args)
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/wsj/s5/steps/nnet3/chain2/process_egs.sh b/egs/wsj/s5/steps/nnet3/chain2/process_egs.sh
new file mode 100755
index 00000000000..d3b48f133fe
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/process_egs.sh
@@ -0,0 +1,159 @@
+#!/bin/bash
+
+# Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
+#
+# This script takes nnet examples dumped by steps/chain/get_raw_egs.sh and
+# combines the chunks into groups by speaker (to the extent possible; it may
+# need to combine speakers in some cases), locally randomizes the result, and
+# dumps the resulting egs to disk.  Chunks of these will later be globally
+# randomized (at the scp level) by steps/chaina/randomize_egs.sh
+
+
+# Begin configuration section.
+cmd=run.pl
+num_repeats=1  # number of times we repeat the same chunks with different
+               # grouping.  
+compress=true   # set this to false to disable compression (e.g. if you want to see whether
+                # results are affected).
+
+num_utts_subset=300     # number of utterances in validation and training
+                        # subsets used for shrinkage and diagnostics.
+
+
+shuffle_buffer_size=5000   # Size of buffer (containing grouped egs) to use
+                           # for random shuffle.
+
+stage=0
+nj=5             # the number of parallel jobs to run.
+srand=0
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 2 ]; then
+  echo "Usage: $0 [opts] <raw-egs-dir> <processed-egs-dir>"
+  echo " e.g.: $0 exp/chaina/tdnn1a_sp/raw_egs exp/chaina/tdnn1a_sp/processed_egs"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options (alternative to this"
+  echo "                                                   # command line)"
+  echo "  --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-repeats <n;2>                              # Number of times we group the same chunks into different"
+  echo "                                                   # groups.  For now only the values 1 and 2 are"
+  echo "                                                   # recommended, due to the very simple way we choose"
+  echo "                                                   # the groups (it's consecutive)."
+  echo "  --nj       <num-jobs;5>                          # Number of jobs to run in parallel.  Usually quite a"
+  echo "                                                   # small number, as we'll be limited by disk access"
+  echo "                                                   # speed."
+  echo "  --compress <bool;true>                           # True if you want the egs to be compressed"
+  echo "                                                   # (e.g. you may set to false for debugging purposes, to"
+  echo "                                                   # check that the compression is not hurting)."
+  echo "  --num-heldout-egs <n;200>                        # Number of egs to put in train_subset.scp and heldout_subset.scp."
+  echo "                                                   # These will be used for diagnostics.  Note: this number is"
+  echo "                                                   # the number of  grouped egs, after merging --chunks-per-group"
+  echo "                                                   # chunks into a single eg."
+  echo "                                                   # ... may be a comma separated list, but we advise a single"
+  echo "                                                   #  number in most cases, due to interaction with the need "
+  echo "                                                   # to group egs from the same speaker into groups."
+  echo "  --stage <stage|0>                                # Used to run this script from somewhere in"
+  echo "                                                   # the middle."
+  exit 1;
+fi
+
+raw_egs_dir=$1
+dir=$2
+
+# die on error or undefined variable.
+set -e -u
+
+if ! steps/chain2/validate_raw_egs.sh $raw_egs_dir; then
+  echo "$0: failed to validate input directory $raw_egs_dir"
+  exit 1
+fi
+
+
+mkdir -p $dir/temp $dir/log
+
+
+if [ $stage -le 0 ]; then
+  echo "$0: choosing heldout_subset and train_subset"
+
+  utt2uniq_opt=
+  if [ -f $raw_egs_dir/misc/utt2uniq ]; then
+      utt2uniq_opt="--utt2uniq=$raw_egs_dir/misc/utt2uniq"
+      echo "$0: File $raw_egs_dir/misc/utt2uniq exists, so ensuring the hold-out set" \
+           "includes all perturbed versions of the same source utterance."
+      utils/utt2spk_to_spk2utt.pl $raw_egs_dir/misc/utt2uniq 2>/dev/null | \
+          utils/shuffle_list.pl 2>/dev/null | \
+            awk -v max_utt=$num_utts_subset '{
+                for (n=2;n<=NF;n++) print $n;
+                printed += NF-1;
+                if (printed >= max_utt) nextfile; }' \
+          | fgrep -f - $raw_egs_dir/all.scp | sort -k1,1 > $dir/temp/heldout_subset.list
+  else
+      awk '{print $1}' $raw_egs_dir/misc/utt2spk | \
+        utils/shuffle_list.pl 2>/dev/null | \
+        head -$num_utts_subset |  fgrep -f - $raw_egs_dir/all.scp | sort -k1,1 > $dir/temp/heldout_subset.list
+  fi
+
+  awk '{print $1}' $raw_egs_dir/misc/utt2spk | \
+     utils/filter_scp.pl --exclude $dir/temp/heldout_subset.list | \
+     utils/shuffle_list.pl 2>/dev/null | \
+     head -$num_utts_subset | fgrep -f - $raw_egs_dir/all.scp | sort -k1,1 > $dir/temp/train_subset.list
+
+  awk '{print $1}' $raw_egs_dir/misc/utt2spk | \
+     utils/filter_scp.pl --exclude $dir/temp/heldout_subset.list | fgrep -f - $raw_egs_dir/all.scp > $dir/temp/train.list
+  fi
+len_valid_uttlist=$(wc -l < $dir/temp/heldout_subset.list)
+len_trainsub_uttlist=$(wc -l <$dir/temp/train_subset.list)
+
+if [ $stage -le 1 ]; then
+
+  for name in heldout_subset train_subset; do
+    echo "$0: merging and shuffling $name egs"
+
+    cp $dir/temp/${name}.list $dir/temp/${name}.scp
+
+    $cmd $dir/log/shuffle_${name}_egs.log \
+      nnet3-chain-shuffle-egs --srand=$srand scp:$dir/temp/${name}.scp ark,scp:$dir/${name}.ark,$dir/${name}.scp
+  done
+
+  # Split up the training list into multiple smaller lists, as it could be long.
+  utils/split_scp.pl $dir/temp/train.list  $(for j in $(seq $nj); do echo $dir/temp/train.$j.scp; done)
+
+  if [ -e $dir/storage ]; then
+    # Make soft links to storage directories, if distributing this way..  See
+    # utils/create_split_dir.pl.
+    echo "$0: creating data links"
+    utils/create_data_link.pl $(for j in $(seq $nj); do echo $dir/train.$j.ark; done) || true
+  fi
+
+  $cmd JOB=1:$nj $dir/log/shuffle_train_egs.JOB.log \
+     nnet3-chain-shuffle-egs --buffer-size=$shuffle_buffer_size \
+         --srand=\$[JOB+$srand] scp:$dir/temp/train.JOB.scp ark,scp:$dir/train.JOB.ark,$dir/train.JOB.scp || exit 1;
+  cat $(for j in $(seq $nj); do echo $dir/train.$j.scp; done) > $dir/train.scp
+fi
+
+cat $raw_egs_dir/info.txt  | awk  -v num_repeats=$num_repeats \
+   '
+  /^dir_type / { print "dir_type processed_chain_egs"; next; }
+  /^num_input_frames / { print "num_input_frames "$2 * num_repeats; next; } # approximate; ignores held-out egs.
+  /^num_chunks / { print "num_chunks " $2 * num_repeats; next; }
+   {print;}
+  END{print "num_repeats " num_repeats;}' >$dir/info.txt
+
+
+
+if ! cat $dir/info.txt | awk '{if (NF == 1) exit(1);}'; then
+  echo "$0: we failed to obtain at least one of the fields in $dir/info.txt"
+  exit 1
+fi
+
+cp -r $raw_egs_dir/misc/ $dir/
+
+
+echo "$0: Finished processing egs"
diff --git a/egs/wsj/s5/steps/nnet3/chain2/randomize_egs.sh b/egs/wsj/s5/steps/nnet3/chain2/randomize_egs.sh
new file mode 100755
index 00000000000..0bb98999f68
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/randomize_egs.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+
+# Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
+#
+# This script takes nnet examples dumped by steps/chain/process_egs.sh,
+# globally randomizes the egs, and divides into multiple .scp files.  This is
+# the form of egs which is consumed by the training script.  All this is done
+# only by manipulating the contents of .scp files.  To keep locality of disk
+# access, we only randomize blocks of egs (e.g.  blocks containing 128 groups of
+# sequences).  This doesn't defeat randomization, because both process_egs.sh
+# and the training script use nnet3-shuffle-egs to do more local randomization.
+
+# Later on, we'll have a multilingual/multi-input-dir version fo this script
+# that combines egs from various data sources and possibly multiple languages.
+# This version assumes there is just one language.
+
+# Begin configuration section.
+cmd=run.pl
+
+groups_per_block=128     # The 'groups' are the egs in the scp file from
+                         # process_egs.sh, containing '--chunks-per-group' sequences
+                         # each.
+num_blocks=256
+
+frames_per_job=3000000   # The number of frames of data we want to process per
+                         # training job (will determine how long each job takes,
+                         # and the frequency of model averaging.  This was
+                         # previously called --frames-per-iter, but
+                         # --frames-per-job is clearer as each job does this
+                         # many.
+
+num_groups_combine=1000  # the number of groups from the training set that we
+                         # randomly choose as input to nnet3-chain-combine;
+                         # these will go to combine.scp.  train_subset.scp and
+                         # heldout_subset.scp are, for now, just copied over
+                         # from the input.
+
+# Later we may provide a mechanism to change the language name; for now we
+# just copy it from the input.
+
+
+srand=0
+stage=0
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 2 ]; then
+  echo "Usage: $0 [opts] <processed-egs-dir> <randomized-egs-dir>"
+  echo " e.g.: $0 --frames-per-job 2000000 exp/chain/tdnn1a_sp/processed_egs exp/chain/tdnn1a_sp/egs"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options (alternative to this"
+  echo "                                                   # command line)"
+  echo "  --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --groups-per-block <n;128>                       # The number of groups (i.e. previously merged egs"
+  echo "                                                   # containing --chunks-per-group chunks) to to consider "
+  echo "                                                   # as one block, where whole blocks are randomized;"
+  echo "                                                   # smaller means more complete randomization but less"
+  echo "                                                   # local disk access."
+  echo "  --frames-per-job <n;3000000>                     # The number of input frames (not counting context)"
+  echo "                                                   # that we aim to have in each scp file after"
+  echo "                                                   # randomization and splitting."
+  echo "  --num-groups-combine <n;1000>                    # The number of randomly chosen groups to"
+  echo "                                                   # put in the subset in 'combine.scp' which will"
+  echo "                                                   # be used in nnet3-chain-combine to decide which"
+  echo "                                                   # models to average over."
+  echo "  --stage <stage|0>                                # Used to run this script from somewhere in"
+  echo "                                                   # the middle."
+  echo "  --srand <srand|0>                                # Random seed, affects randomization."
+  exit 1;
+fi
+
+processed_egs_dir=$1
+dir=$2
+
+# die on error or undefined variable.
+set -e -u
+
+if ! steps/chain2/validate_processed_egs.sh $processed_egs_dir; then
+  echo "$0: could not validate input directory $processed_egs_dir"
+  exit 1
+fi
+
+# Work out how many groups per job and how many frames per job we'll have
+
+info_in=$processed_egs_dir/info.txt
+
+# num_scp_files is the number of archives
+num_input_frames=$(awk '/^num_input_frames/ { nif=$2; print nif}' $info_in)
+frames_per_chunk_avg=$(awk '/^frames_per_chunk_avg/ { fpc=$2; print fpc}' $info_in)
+num_chunks=$(awk '/^num_chunks/ { nc=$2; print nc}' $info_in)
+num_scp_files=$[(num_chunks * frames_per_chunk_avg)/frames_per_job +1]
+[ $num_scp_files -eq 0 ] && num_scp_files=1
+
+frames_per_scp_file=$[(num_chunks*frames_per_chunk_avg)/num_scp_files] # because it may be slightly different from frames_per_job
+
+
+mkdir -p $dir/temp
+
+if [ -d $dir/misc ]; then
+  rm -r $dir/misc
+fi
+
+mkdir -p $dir/misc
+cp $processed_egs_dir/misc/* $dir/misc
+
+utils/shuffle_list.pl  $processed_egs_dir/train.scp > $dir/temp/train.scp
+utils/split_scp.pl $dir/temp/train.scp $(for i in $(seq $num_blocks); do echo $dir/temp/train.$i.scp; done)
+for i in `seq $num_blocks`; do
+    utils/split_scp.pl <(utils/shuffle_list.pl $dir/temp/train.$i.scp) $(for j in $(seq $num_scp_files); do echo $dir/temp/train.$i.$j.scp; done)
+done
+for j in `seq $num_scp_files`; do
+    cat $dir/temp/train.*.$j.scp | utils/shuffle_list.pl > $dir/train.$j.scp
+done
+rm -rf $dir/temp &
+
+cp $processed_egs_dir/heldout_subset.scp $processed_egs_dir/train_subset.scp $dir/
+
+
+# note: there is only one language in $processed_egs_dir (any
+# merging would be done at the randomization stage but that is not supported yet).
+
+lang=$(awk '/^lang / { print $2; }' <$processed_egs_dir/info.txt)
+
+# We'll store info files per language, containing the part of the information
+# that is language-specific, plus a single global info.txt containing stuff that
+# is not language specific.
+# This will get more complicated once we actually support multiple languages,
+# and when we allow multiple input processed egs dirs for the same language.
+
+grep -v -E '^dir_type|^lang|^feat_dim' <$processed_egs_dir/info.txt | \
+  cat <(echo "dir_type randomized_chain_egs") - > $dir/info_$lang.txt
+
+
+cat <<EOF >$dir/info.txt
+dir_type randomized_chain_egs
+num_scp_files $num_scp_files
+langs $lang
+frames_per_scp_file $frames_per_scp_file
+EOF
+# frames_per_job, after rounding, becomes frames_per_scp_file.
+
+# note: frames_per_chunk_avg will be present in the info.txt file as well as
+# the per-language files.
+grep -E '^feat_dim|^frames_per_chunk_avg' <$processed_egs_dir/info.txt >>$dir/info.txt
+
+
+
+if ! cat $dir/info.txt | awk '{if (NF == 1) exit(1);}'; then
+  echo "$0: we failed to obtain at least one of the fields in $dir/info.txt"
+  exit 1
+fi
+
+
+wait;
+echo "$0: Finished randomizing egs"
diff --git a/egs/wsj/s5/steps/nnet3/chain2/train.sh b/egs/wsj/s5/steps/nnet3/chain2/train.sh
new file mode 100755
index 00000000000..3acd6962a9e
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/train.sh
@@ -0,0 +1,284 @@
+#!/bin/bash
+
+# Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
+
+
+# Begin configuration section
+stage=-2
+cmd=run.pl
+gpu_cmd_opt=
+leaky_hmm_coefficient=0.1
+xent_regularize=0.1
+apply_deriv_weights=false   # you might want to set this to true in unsupervised training
+                            # scenarios.
+memory_compression_level=2  # Enables us to use larger minibatch size than we
+                            # otherwise could, but may not be optimal for speed
+                            # (--> set to 0 if you have plenty of memory.
+dropout_schedule=
+srand=0
+max_param_change=2.0    # we use a smaller than normal default (it's normally
+                        # 2.0), because there are two models (bottom and top).
+use_gpu=yes   # can be "yes", "no", "optional", "wait"
+print_interval=10
+momentum=0.0
+parallel_train_opts=
+verbose_opt=
+
+common_opts=           # Options passed through to nnet3-chain-train and nnet3-chain-combine
+
+num_epochs=4.0   #  Note: each epoch may actually contain multiple repetitions of
+                 #  the data, for various reasons:
+                 #    using the --num-repeats option in process_egs.sh
+                 #    data augmentation
+                 #    different data shifts (this includes 3 different shifts
+                 #    of the data if frame_subsampling_factor=3 (see $dir/init/info.txt)
+
+num_jobs_initial=1
+num_jobs_final=1
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+groups_per_minibatch=32  # This is how you set the minibatch size.  Note: if
+                         # chunks_per_group=4, this would mean 128 chunks per
+                         # minibatch.
+
+max_iters_combine=80
+max_models_combine=20
+diagnostic_period=5    # Get diagnostics every this-many iterations
+
+shuffle_buffer_size=1000  # This "buffer_size" variable controls randomization of the groups
+                          # on each iter.
+
+
+l2_regularize=
+out_of_range_regularize=0.01
+multilingual_eg=false
+
+# End configuration section
+
+
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 2 ]; then
+  echo "Usage: $0  [options] <egs-dir>  <model-dir>"
+  echo " e.g.: $0 exp/chain/tdnn1a_sp/egs  exp/chain/tdnn1a_sp"
+  echo ""
+  echo " TODO: more documentation"
+  exit 1
+fi
+
+egs_dir=$1
+dir=$2
+
+set -e -u  # die on failed command or undefined variable
+
+steps/chain2/validate_randomized_egs.sh $egs_dir
+
+for f in $dir/init/info.txt; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist"
+    exit 1
+  fi
+done
+cat $egs_dir/info.txt >> $dir/init/info.txt
+
+
+frame_subsampling_factor=$(awk '/^frame_subsampling_factor/ {print $2}' <$dir/init/info.txt)
+num_scp_files=$(awk '/^num_scp_files/ {print $2}' <$egs_dir/info.txt)
+
+if [ $stage -le -2 ]; then
+    echo "$0: Generating training schedule"
+    steps/chain2/internal/get_train_schedule.py \
+      --frame-subsampling-factor=$frame_subsampling_factor \
+      --num-jobs-initial=$num_jobs_initial \
+      --num-jobs-final=$num_jobs_final \
+      --num-epochs=$num_epochs \
+      --dropout-schedule="$dropout_schedule" \
+      --num-scp-files=$num_scp_files \
+      --frame-subsampling-factor=$frame_subsampling_factor \
+      --initial-effective-lrate=$initial_effective_lrate \
+      --final-effective-lrate=$final_effective_lrate \
+      --schedule-out=$dir/schedule.txt
+fi
+
+
+# won't work at Idiap
+#if [ "$use_gpu" != "no" ]; then gpu_cmd_opt="--gpu 1"; else gpu_cmd_opt=""; fi
+
+num_iters=$(wc -l <$dir/schedule.txt)
+
+echo "$0: will train for $num_epochs epochs = $num_iters iterations"
+
+# source the 1st line of schedule.txt in the shell; this sets
+# lrate and dropout_opt, among other variables.
+. <(head -n 1 $dir/schedule.txt)
+langs=$(awk '/^langs/ { $1=""; print; }' <$dir/init/info.txt | tail -1)
+num_langs=$(echo $langs | wc -w)
+
+mkdir -p $dir/log
+
+# Copy models with initial learning rate and dropout options from $dir/init to $dir/0
+#for lang in $langs; do
+if [ $stage -le -1 ]; then
+  # run.pl $dir/log/init_model_default.log \
+  #     nnet3-am-copy  --learning-rate=$lrate $dropout_opt $dir/init/default.mdl $dir/0.mdl
+  echo "$0: Copying transition model"
+  if [ $num_langs -eq 1 ]; then
+      echo "$0: Num langs is 1"
+      cp $dir/init/default.raw $dir/0.raw
+      if [ -f $dir/init/default_trans.mdl ]; then
+          cp $dir/init/default_trans.mdl $dir/0_trans.mdl 
+      fi
+  else
+      echo "$0: Num langs is $num_langs"
+      cp $dir/init/multi.raw $dir/0.raw
+  fi
+fi
+
+
+l2_regularize_opt=""
+if [ ! -z $l2_regularize ]; then
+    l2_regularize_opt="--l2-regularize=$l2_regularize"
+fi
+
+x=0
+if [ $stage -gt $x ]; then x=$stage; fi
+
+[ $max_models_combine -gt $[num_iters/2] ] && max_models_combine=$[num_iters/2];
+combine_start_iter=$[num_iters+1-max_models_combine]
+
+while [ $x -lt $num_iters ]; do
+  # Source some variables fromm schedule.txt.  The effect will be something
+  # like the following:
+  # iter=0; num_jobs=2; inv_num_jobs=0.5; scp_indexes=(pad 1 2); frame_shifts=(pad 1 2); dropout_opt="--edits='set-dropout-proportion name=* proportion=0.0'" lrate=0.002
+  . <(grep "^iter=$x;" $dir/schedule.txt)
+
+  echo "$0: training, iteration $x of $num_iters, num-jobs is $num_jobs"
+
+  next_x=$[$x+1]
+  den_fst_dir=$egs_dir/misc
+  model_out_prefix=$dir/${next_x}
+  model_out=${model_out_prefix}.mdl
+  multilingual_eg_opts=
+  if $multilingual_eg; then
+       multilingual_eg_opts="--multilingual-eg=true"
+  fi
+
+  # for the first 4 iterations, plus every $diagnostic_period iterations, launch
+  # some diagnostic processes.  We don't do this on iteration 0, because
+  # the batchnorm stats wouldn't be ready
+  if [ $x -gt 0 ] && [ $[x%diagnostic_period] -eq 0 -o $x -lt 5 ]; then
+
+    [ -f $dir/.error_diagnostic ] && rm $dir/.error_diagnostic
+    for name in train heldout; do
+      egs_opts=
+      if $multilingual_eg; then
+          weight_rspecifier=$egs_dir/diagnostic_${name}.weight.ark
+          [[ -f $weight_rspecifier ]] && egs_opts="--weights=ark:$weight_rspecifier"
+      fi
+      $cmd $gpu_cmd_opt $dir/log/diagnostic_${name}.$x.log \
+         nnet3-chain-train2 --use-gpu=$use_gpu \
+            --leaky-hmm-coefficient=$leaky_hmm_coefficient \
+            --xent-regularize=$xent_regularize \
+            --out-of-range-regularize=$out_of_range_regularize \
+            $l2_regularize_opt \
+            --print-interval=10  \
+           "nnet3-copy --learning-rate=$lrate $dir/${x}.raw - |" $den_fst_dir \
+           "ark:nnet3-chain-copy-egs $egs_opts scp:$egs_dir/${name}_subset.scp ark:- | nnet3-chain-merge-egs $multilingual_eg_opts --minibatch-size=1:64 ark:- ark:-|" \
+           $dir/${next_x}_${name}.mdl || touch $dir/.error_diagnostic &
+    done
+  fi
+
+  cache_io_opt="--write-cache=$dir/cache.$next_x"
+  if [ $x -gt 0 -a -f $dir/cache.$x ]; then
+      cache_io_opt="$cache_io_opt --read-cache=$dir/cache.$x"
+  fi
+  for j in $(seq $num_jobs); do
+    scp_index=${scp_indexes[$j]}
+    frame_shift=${frame_shifts[$j]}
+
+    egs_opts=
+    if $multilingual_eg; then
+        weight_rspecifier=$egs_dir/train.weight.$scp_index.ark
+        [[ -f $weight_rspecifier ]] && egs_opts="--weights=ark:$weight_rspecifier"
+    fi
+    $cmd $gpu_cmd_opt $dir/log/train.$x.$j.log \
+         nnet3-chain-train2  \
+             $parallel_train_opts $verbose_opt \
+            --out-of-range-regularize=$out_of_range_regularize \
+             $cache_io_opt \
+             --use-gpu=$use_gpu --apply-deriv-weights=$apply_deriv_weights \
+             --leaky-hmm-coefficient=$leaky_hmm_coefficient --xent-regularize=$xent_regularize \
+             --print-interval=$print_interval --max-param-change=$max_param_change \
+             --momentum=$momentum \
+             --l2-regularize-factor=$inv_num_jobs \
+             $l2_regularize_opt \
+             --srand=$srand \
+             "nnet3-copy --learning-rate=$lrate $dir/${x}.raw - |" $den_fst_dir \
+             "ark:nnet3-chain-copy-egs $egs_opts --frame-shift=$frame_shift scp:$egs_dir/train.$scp_index.scp ark:- | nnet3-chain-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:- | nnet3-chain-merge-egs $multilingual_eg_opts --minibatch-size=$groups_per_minibatch ark:- ark:-|" \
+             ${model_out_prefix}.$j.raw || touch $dir/.error &
+  done
+  wait
+  if [ -f $dir/.error ]; then
+    echo "$0: error detected training on iteration $x"
+    exit 1
+  fi
+  if [ $x -ge 1 ]; then
+      models_to_average=$(for j in `seq $num_jobs`; do echo ${model_out_prefix}.$j.raw; done)
+      $cmd $dir/log/average.$x.log \
+          nnet3-average $models_to_average $dir/$next_x.raw  || exit 1;
+      rm $models_to_average
+  else
+      lang=$(echo $langs | awk '{print $1}')
+      model_index=`steps/nnet3/chain2/internal/get_best_model.sh --output output-${lang} $dir/log/train.$x.*.log`
+      cp ${model_out_prefix}.$model_index.raw $dir/$next_x.raw
+      rm ${model_out_prefix}.*.raw
+  fi
+  [ -f $dir/$x/.error_diagnostic ] && echo "$0: error getting diagnostics on iter $x" && exit 1;
+
+  # TODO: cleanup
+  if [ -f $dir/cache.$x ]; then
+      rm $dir/cache.$x
+  fi
+  delete_iter=$[x-2]
+  if [ $delete_iter -lt $combine_start_iter ]; then
+      if [ -f $dir/$delete_iter.raw ]; then
+          rm $dir/$delete_iter.raw
+      fi
+  fi
+  if [ -f $dir/${next_x}_train.mdl ]; then
+      rm $dir/${next_x}_{train,heldout}.mdl
+  fi
+  x=$[x+1]
+done
+
+
+
+if [ $stage -le $num_iters ]; then
+  echo "$0: doing model combination"
+  den_fst_dir=$egs_dir/misc
+  input_models=$(for x in $(seq $combine_start_iter $num_iters); do echo $dir/${x}.raw; done)
+  output_model_dir=$dir/final
+
+   $cmd $gpu_cmd_opt $dir/log/combine.log \
+      nnet3-chain-combine2 --use-gpu=$use_gpu \
+        --leaky-hmm-coefficient=$leaky_hmm_coefficient \
+        --print-interval=10  \
+        $den_fst_dir $input_models \
+        "ark:nnet3-chain-merge-egs $multilingual_eg_opts  scp:$egs_dir/train_subset.scp ark:-|" \
+        $dir/final.raw || exit 1;
+   if ! $multilingual_eg; then
+       nnet3-copy  --edits="rename-node old-name=output new-name=output-dummy; rename-node old-name=output-default new-name=output" \
+          $dir/final.raw - | \
+          nnet3-am-init $dir/0_trans.mdl - $dir/final.mdl
+   fi
+
+fi
+
+echo "$0: done"
+exit 0
diff --git a/egs/wsj/s5/steps/nnet3/chain2/validate_processed_egs.sh b/egs/wsj/s5/steps/nnet3/chain2/validate_processed_egs.sh
new file mode 100755
index 00000000000..66067f7d987
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/validate_processed_egs.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
+#
+# This script validates a directory containing 'processed' egs for 'chain'
+# training, i.e. the output of process_egs.sh.  It also helps to document the
+# expectations on such a directory.
+
+
+if [ -f path.sh ]; then . ./path.sh; fi
+
+
+if [ $# != 1 ]; then
+  echo "Usage: $0  <processed-egs-dir>"
+  echo " e.g.: $0 exp/chain/tdnn1a_sp/processed_egs"
+  echo ""
+  echo "Validates that the processed-egs dir has the expected format"
+fi
+
+dir=$1
+
+# Note: the .ark files are not actually consumed directly downstream (only via
+# the top-level .scp files), but we check them anyway for now.
+for f in $dir/train.scp $dir/info.txt \
+         $dir/heldout_subset.{ark,scp} $dir/train_subset.{ark,scp} \
+         $dir/train.1.scp $dir/train.1.ark; do
+  if ! [ -f $f -a -s $f ]; then
+    echo "$0: expected file $f to exist and be nonempty."
+    exit 1
+  fi
+done
+
+
+if [ $(awk '/^dir_type/ { print $2; }' <$dir/info.txt) != "processed_chain_egs" ]; then
+  grep dir_type $dir/info.txt
+  echo "$0: dir_type should be processed_chain_egs in $dir/info.txt"
+  exit 1
+fi
+
+lang=$(awk '/^lang / {print $2; }' <$dir/info.txt)
+
+for f in $dir/misc/$lang.{trans_mdl,normalization.fst,den.fst}; do
+  if ! [ -f $f -a -s $f ]; then
+    echo "$0: expected file $f to exist and be nonempty."
+    exit 1
+  fi
+done
+
+echo "$0: sucessfully validated processed egs in $dir"
diff --git a/egs/wsj/s5/steps/nnet3/chain2/validate_randomized_egs.sh b/egs/wsj/s5/steps/nnet3/chain2/validate_randomized_egs.sh
new file mode 100755
index 00000000000..5ba068cb1fa
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/validate_randomized_egs.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
+#
+# This script validates a directory containing 'randomized' egs for 'chain'
+# training, i.e. the output of randomize_egs.sh (this is the final form of the
+# egs which is consumed by the training script).  It also helps to document the
+# expectations on such a directory.
+
+
+if [ -f path.sh ]; then . ./path.sh; fi
+
+
+if [ $# != 1 ]; then
+  echo "Usage: $0  <randomized-egs-dir>"
+  echo " e.g.: $0 exp/chain/tdnn1a_sp/egs"
+  echo ""
+  echo "Validates that the final (randomized) egs dir has the expected format"
+fi
+
+dir=$1
+
+# Note: the .ark files are not actually consumed directly downstream (only via
+# the top-level .scp files), but we check them anyway for now.
+for f in $dir/train.1.scp $dir/info.txt \
+         $dir/heldout_subset.scp $dir/train_subset.scp; do
+  if ! [ -f $f -a -s $f ]; then
+    echo "$0: expected file $f to exist and be nonempty."
+    exit 1
+  fi
+done
+
+
+if [ $(awk '/^dir_type/ { print $2; }' <$dir/info.txt) != "randomized_chain_egs" ]; then
+  grep dir_type $dir/info.txt
+  echo "$0: dir_type should be randomized_chain_egs in $dir/info.txt"
+  exit 1
+fi
+
+langs=$(awk '/^langs / {$1 = ""; print; }' <$dir/info.txt)
+num_scp_files=$(awk '/^num_scp_files / { print $2; }' <$dir/info.txt)
+
+if [ -z "$langs" ]; then
+  echo "$0: expecting the list of languages to be nonempty in $dir/info.txt"
+  exit 1
+fi
+
+for lang in $langs; do
+  for f in $dir/misc/$lang.{trans_mdl,normalization.fst,den.fst} $dir/info_${lang}.txt; do
+    if ! [ -f $f -a -s $f ]; then
+      echo "$0: expected file $f to exist and be nonempty."
+      exit 1
+    fi
+  done
+done
+
+for i in $(seq $num_scp_files); do
+  if ! [ -s $dir/train.$i.scp ]; then
+    echo "$0: expected file $dir/train.$i.scp to exist and be nonempty."
+    exit 1
+  fi
+done
+
+
+echo "$0: sucessfully validated randomized egs in $dir"
diff --git a/egs/wsj/s5/steps/nnet3/chain2/validate_raw_egs.sh b/egs/wsj/s5/steps/nnet3/chain2/validate_raw_egs.sh
new file mode 100755
index 00000000000..2c29693bbf2
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/chain2/validate_raw_egs.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+# Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
+#
+# This script validates a directory containing 'raw' egs for 'chain' training.
+# It also helps to document the expectations on such a directory.
+
+
+
+if [ -f path.sh ]; then . ./path.sh; fi
+
+
+if [ $# != 1 ]; then
+  echo "Usage: $0  <raw-egs-dir>"
+  echo " e.g.: $0 exp/chaina/tdnn1a_sp/raw_egs"
+  echo ""
+  echo "Validates that the raw-egs dir has the expected format"
+fi
+
+dir=$1
+
+for f in $dir/all.scp $dir/cegs.1.ark $dir/info.txt \
+         $dir/misc/utt2spk; do
+  if ! [ -s $f ]; then
+    echo "$0: expected file $f to exist and be nonempty."
+    exit 1
+  fi
+done
+
+
+if [ $(awk '/^dir_type/ { print $2; }' <$dir/info.txt) != "raw_chain_egs" ]; then
+  grep dir_type $dir/info.txt
+  echo "$0: dir_type should be raw_chain_egs in $dir/info.txt"
+  exit 1
+fi
+
+lang=$(awk '/^lang / {print $2; }' <$dir/info.txt)
+
+for f in $dir/misc/$lang.{trans_mdl,normalization.fst,den.fst}; do
+  if ! [ -s $f ]; then
+    echo "$0: expected file $f to exist and be nonempty."
+    exit 1
+  fi
+done
+
+echo "$0: sucessfully validated raw egs in $dir"
diff --git a/egs/wsj/s5/steps/nnet3/compute_output.sh b/egs/wsj/s5/steps/nnet3/compute_output.sh
index e55f705043b..32dbc155c85 100755
--- a/egs/wsj/s5/steps/nnet3/compute_output.sh
+++ b/egs/wsj/s5/steps/nnet3/compute_output.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
 #                2016  Vimal Manohar
diff --git a/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py b/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py
index edc2f7e4617..d089258990f 100755
--- a/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py
+++ b/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py
@@ -100,7 +100,7 @@ class Nnet3Model(object):
     def __init__(self):
         self.input_dim = -1
         self.output_dim = -1
-        self.ivector_dim = -1
+        self.ivector_dim = 0 
         self.counts = defaultdict(int)
         self.num_components = 0
         self.components_read = 0
@@ -121,7 +121,7 @@ def add_component(self, component, pairs):
         if "<InputDim>" in pairs and self.input_dim == -1:
             self.input_dim = int(pairs["<InputDim>"])
 
-        if "<ConstComponentDim>" in pairs and self.ivector_dim == -1:
+        if "<ConstComponentDim>" in pairs and self.ivector_dim == 0:
             self.ivector_dim = int(pairs["<ConstComponentDim>"])
 
         # remove nnet2 specific tokens and catch descriptors
@@ -163,7 +163,7 @@ def write_config(self, filename):
                                     config_string=config_string))
 
             f.write("\n# Component nodes\n")
-            if self.ivector_dim != -1:
+            if self.ivector_dim != 0:
                 f.write("input-node name=input dim={0}\n".format(self.input_dim-self.ivector_dim))
                 f.write("input-node name=ivector dim={0}\n".format(self.ivector_dim))
             else:
@@ -294,7 +294,7 @@ def parse_component(line, line_buffer):
 def parse_standard_component(component, line, line_buffer):
     # Ignores stats such as ValueSum and DerivSum
     line = consume_token(component, line)
-    pairs = re.findall("(<\w+>) ([\w.]+)", line)
+    pairs = re.findall("(<\w+>) ([\w.-]+)", line)
 
     return dict(pairs)
 
@@ -364,7 +364,7 @@ def parse_end_of_component(component, line, line_buffer):
 def parse_affine_component(component, line, line_buffer):
     assert ("<LinearParams>" in line)
 
-    pairs = dict(re.findall("(<\w+>) ([\w.]+)", line))
+    pairs = dict(re.findall("(<\w+>) ([\w.-]+)", line))
 
     # read the linear params and bias and convert it to a matrix
     weights = parse_weights(line_buffer)
diff --git a/egs/wsj/s5/steps/nnet3/decode.sh b/egs/wsj/s5/steps/nnet3/decode.sh
index bbd81a6db9f..732b185f80f 100755
--- a/egs/wsj/s5/steps/nnet3/decode.sh
+++ b/egs/wsj/s5/steps/nnet3/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
 # Apache 2.0.
@@ -79,7 +79,11 @@ for f in $graphdir/HCLG.fst $data/feats.scp $model $extra_files; do
 done
 
 sdata=$data/split$nj;
-cmvn_opts=`cat $srcdir/cmvn_opts` || exit 1;
+if [ -f $srcdir/cmvn_opts ]; then
+    cmvn_opts=`cat $srcdir/cmvn_opts`
+else
+    cmvn_opts="--norm-means=false --norm-vars=false"
+fi
 thread_string=
 if $use_gpu; then
   if [ $num_threads -eq 1 ]; then
@@ -101,9 +105,10 @@ if [ -f $srcdir/online_cmvn ]; then online_cmvn=true
 else online_cmvn=false; fi
 
 if ! $online_cmvn; then
-echo "$0: feature type is raw"
+  echo "$0: feature type is raw"
   feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
 else
+  echo "$0: feature type is raw (apply-cmvn-online)"
   feats="ark,s,cs:apply-cmvn-online $cmvn_opts --spk2utt=ark:$sdata/JOB/spk2utt $srcdir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- |"
 fi
 
@@ -122,6 +127,11 @@ frame_subsampling_opt=
 if [ -f $srcdir/frame_subsampling_factor ]; then
   # e.g. for 'chain' systems
   frame_subsampling_opt="--frame-subsampling-factor=$(cat $srcdir/frame_subsampling_factor)"
+elif [ -f $srcdir/init/info.txt ]; then
+    frame_subsampling_factor=$(awk '/^frame_subsampling_factor/ {print $2}' <$srcdir/init/info.txt)
+    if [ ! -z $frame_subsampling_factor ]; then
+        frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor"
+    fi
 fi
 
 if [ $stage -le 1 ]; then
diff --git a/egs/wsj/s5/steps/nnet3/decode_grammar.sh b/egs/wsj/s5/steps/nnet3/decode_grammar.sh
index 7ee1efeb7df..f0ee39279df 100755
--- a/egs/wsj/s5/steps/nnet3/decode_grammar.sh
+++ b/egs/wsj/s5/steps/nnet3/decode_grammar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet3/decode_lookahead.sh b/egs/wsj/s5/steps/nnet3/decode_lookahead.sh
new file mode 100755
index 00000000000..47f13dffc07
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/decode_lookahead.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+
+# Copyright 2019       Alpha Cephei Inc (Author: Nickolay Shmmyrev).
+# Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+
+# This script does decoding with a neural-net with lookahead composition of HCL and G graphs.
+
+# Begin configuration section.
+stage=1
+nj=4 # number of decoding jobs.
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+post_decode_acwt=1.0  # can be used in 'chain' systems to scale acoustics by 10 so the
+                      # regular scoring script works.
+cmd=run.pl
+beam=15.0
+frames_per_chunk=50
+max_active=7000
+min_active=200
+ivector_scale=1.0
+lattice_beam=8.0 # Beam we use in lattice generation.
+iter=final
+use_gpu=false # If true, will use a GPU, with nnet3-latgen-faster-batch.
+              # In that case it is recommended to set num-threads to a large
+              # number, e.g. 20 if you have that many free CPU slots on a GPU
+              # node, and to use a small number of jobs.
+scoring_opts=
+skip_diagnostics=false
+skip_scoring=false
+extra_left_context=0
+extra_right_context=0
+extra_left_context_initial=-1
+extra_right_context_final=-1
+online_ivector_dir=
+minimize=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. utils/parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
+  echo "e.g.:   $0 --nj 8 \\"
+  echo "--online-ivector-dir exp/nnet2_online/ivectors_test_eval92 \\"
+  echo "    exp/tri4b/graph_bg data/test_eval92_hires $dir/decode_bg_eval92"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --nj <nj>                                # number of parallel jobs"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 15.0"
+  echo "  --iter <iter>                            # Iteration of model to decode; default is final."
+  echo "  --scoring-opts <string>                  # options to local/score.sh"
+  echo "  --num-threads <n>                        # number of threads to use, default 1."
+  echo "  --use-gpu <true|false>                   # default: false.  If true, we recommend"
+  echo "                                           # to use large --num-threads as the graph"
+  echo "                                           # search becomes the limiting factor."
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+model=$srcdir/$iter.mdl
+
+
+extra_files=
+if [ ! -z "$online_ivector_dir" ]; then
+  steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1
+  extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
+fi
+
+#utils/lang/check_phones_compatible.sh {$srcdir,$graphdir}/phones.txt || exit 1
+
+for f in $graphdir/HCLr.fst $graphdir/Gr.fst $graphdir/disambig_tid.int $data/feats.scp $model $extra_files; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+sdata=$data/split$nj;
+cmvn_opts=`cat $srcdir/cmvn_opts` || exit 1;
+thread_string=
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+
+## Set up features.
+echo "$0: feature type is raw"
+
+feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
+
+if [ ! -z "$online_ivector_dir" ]; then
+  ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
+  ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period"
+fi
+
+if [ "$post_decode_acwt" == 1.0 ]; then
+  lat_wspecifier="ark:|gzip -c >$dir/lat.JOB.gz"
+else
+  lat_wspecifier="ark:|lattice-scale --acoustic-scale=$post_decode_acwt ark:- ark:- | gzip -c >$dir/lat.JOB.gz"
+fi
+
+frame_subsampling_opt=
+if [ -f $srcdir/frame_subsampling_factor ]; then
+  # e.g. for 'chain' systems
+  frame_subsampling_opt="--frame-subsampling-factor=$(cat $srcdir/frame_subsampling_factor)"
+fi
+
+if [ $stage -le 1 ]; then
+  $cmd $queue_opt JOB=1:$nj $dir/log/decode.JOB.log \
+    nnet3-latgen-faster-lookahead $ivector_opts $frame_subsampling_opt \
+     --frames-per-chunk=$frames_per_chunk \
+     --extra-left-context=$extra_left_context \
+     --extra-right-context=$extra_right_context \
+     --extra-left-context-initial=$extra_left_context_initial \
+     --extra-right-context-final=$extra_right_context_final \
+     --minimize=$minimize --max-active=$max_active --min-active=$min_active --beam=$beam \
+     --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=true \
+     --word-symbol-table=$graphdir/words.txt "$model" \
+     $graphdir/HCLr.fst $graphdir/Gr.fst $graphdir/disambig_tid.int "$feats" "$lat_wspecifier" || exit 1;
+fi
+
+
+if [ $stage -le 2 ]; then
+  if ! $skip_diagnostics ; then
+    [ ! -z $iter ] && iter_opt="--iter $iter"
+    steps/diagnostic/analyze_lats.sh --cmd "$cmd" $iter_opt $graphdir $dir
+  fi
+fi
+
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at
+# different acoustic scales to get the final output.
+if [ $stage -le 3 ]; then
+  if ! $skip_scoring ; then
+    [ ! -x local/score.sh ] && \
+      echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+    echo "score best paths"
+    [ "$iter" != "final" ] && iter_opt="--iter $iter"
+    local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
+    echo "score confidence and timing with sclite"
+  fi
+fi
+echo "Decoding done."
+exit 0;
diff --git a/egs/wsj/s5/steps/nnet3/decode_looped.sh b/egs/wsj/s5/steps/nnet3/decode_looped.sh
index f90dc0325d5..c30b860c2f7 100755
--- a/egs/wsj/s5/steps/nnet3/decode_looped.sh
+++ b/egs/wsj/s5/steps/nnet3/decode_looped.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet3/decode_score_fusion.sh b/egs/wsj/s5/steps/nnet3/decode_score_fusion.sh
index cb678e84245..bc8ba4d0a94 100755
--- a/egs/wsj/s5/steps/nnet3/decode_score_fusion.sh
+++ b/egs/wsj/s5/steps/nnet3/decode_score_fusion.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018        Tien-Hong Lo
 
diff --git a/egs/wsj/s5/steps/nnet3/decode_semisup.sh b/egs/wsj/s5/steps/nnet3/decode_semisup.sh
index 25ce232b2c6..3542ae4751d 100755
--- a/egs/wsj/s5/steps/nnet3/decode_semisup.sh
+++ b/egs/wsj/s5/steps/nnet3/decode_semisup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet3/get_degs.sh b/egs/wsj/s5/steps/nnet3/get_degs.sh
index 7853daa4563..8035e0b6cbe 100755
--- a/egs/wsj/s5/steps/nnet3/get_degs.sh
+++ b/egs/wsj/s5/steps/nnet3/get_degs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2016   Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 # Copyright 2014-2015   Vimal Manohar
diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh
index da6ec362ec7..fef708afcef 100755
--- a/egs/wsj/s5/steps/nnet3/get_egs.sh
+++ b/egs/wsj/s5/steps/nnet3/get_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #
diff --git a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh
index d315ff925f4..9f49600b8b8 100755
--- a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh
+++ b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2016   Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 # Copyright 2014-2015   Vimal Manohar
diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh
index 784693ee44c..51c6a031249 100755
--- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh
+++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey).
 #           2015-2016 Vimal Manohar
diff --git a/egs/wsj/s5/steps/nnet3/lstm/train.sh b/egs/wsj/s5/steps/nnet3/lstm/train.sh
index dac3bdf1469..044ca55a32a 100755
--- a/egs/wsj/s5/steps/nnet3/lstm/train.sh
+++ b/egs/wsj/s5/steps/nnet3/lstm/train.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # THIS SCRIPT IS DEPRECATED, see ../train_rnn.py
 
diff --git a/egs/wsj/s5/steps/nnet3/make_bottleneck_features.sh b/egs/wsj/s5/steps/nnet3/make_bottleneck_features.sh
index d9c04784406..ada0f87e964 100755
--- a/egs/wsj/s5/steps/nnet3/make_bottleneck_features.sh
+++ b/egs/wsj/s5/steps/nnet3/make_bottleneck_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016 Pegah Ghahremani
 
@@ -83,7 +83,7 @@ sdata=$data/split$nj
 mkdir -p $logdir
 mkdir -p $bnf_data
 mkdir -p $bnfdir
-echo $nj > $nnetdir/num_jobs
+echo $nj > $bnfdir/num_jobs
 
 [ ! -f $data/feats.scp ] && echo >&2 "The file $data/feats.scp does not exist!" && exit 1;
 [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
@@ -94,7 +94,17 @@ if [ ! -z "$ivector_dir" ];then
   steps/nnet2/check_ivectors_compatible.sh $nnetdir $ivector_dir || exit 1;
 fi
 
-feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
+## Set up features.
+if [ -f $nnetdir/online_cmvn ]; then online_cmvn=true
+else online_cmvn=false; fi
+
+if ! $online_cmvn; then
+  echo "$0: feature type is raw"
+  feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
+else
+  echo "$0: feature type is raw (apply-cmvn-online)"
+  feats="ark,s,cs:apply-cmvn-online $cmvn_opts --spk2utt=ark:$sdata/JOB/spk2utt $nnetdir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- |"
+fi
 ivector_feats="scp:utils/filter_scp.pl $sdata/JOB/utt2spk $ivector_dir/ivector_online.scp |"
 
 if [ $stage -le 1 ]; then
diff --git a/egs/wsj/s5/steps/nnet3/make_denlats.sh b/egs/wsj/s5/steps/nnet3/make_denlats.sh
index 36da179bbaf..dfe5e0b0158 100755
--- a/egs/wsj/s5/steps/nnet3/make_denlats.sh
+++ b/egs/wsj/s5/steps/nnet3/make_denlats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012        Johns Hopkins University (Author: Daniel Povey)
 #           2014-2015   Vimal Manohar
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/nnet3/multilingual/combine_egs.sh b/egs/wsj/s5/steps/nnet3/multilingual/combine_egs.sh
index cdf55ea81d3..836781bb66c 100755
--- a/egs/wsj/s5/steps/nnet3/multilingual/combine_egs.sh
+++ b/egs/wsj/s5/steps/nnet3/multilingual/combine_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017     Pegah Ghahremani
 #           2017-18  Vimal Manohar
diff --git a/egs/wsj/s5/steps/nnet3/nnet3_to_dot.sh b/egs/wsj/s5/steps/nnet3/nnet3_to_dot.sh
index 06ccf9657be..c55ab463313 100755
--- a/egs/wsj/s5/steps/nnet3/nnet3_to_dot.sh
+++ b/egs/wsj/s5/steps/nnet3/nnet3_to_dot.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # script showing use of nnet3_to_dot.py
 # Copyright 2015  Johns Hopkins University (Author: Vijayaditya Peddinti).
diff --git a/egs/wsj/s5/steps/nnet3/tdnn/train.sh b/egs/wsj/s5/steps/nnet3/tdnn/train.sh
index f28994ae68b..e501834ab34 100755
--- a/egs/wsj/s5/steps/nnet3/tdnn/train.sh
+++ b/egs/wsj/s5/steps/nnet3/tdnn/train.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # THIS SCRIPT IS DEPRECATED, see ../train_dnn.py
 
diff --git a/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh b/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh
index 07d82da8b1f..81616a273c0 100755
--- a/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh
+++ b/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # THIS SCRIPT IS DEPRECATED, see ../train_raw_dnn.py
 
diff --git a/egs/wsj/s5/steps/nnet3/train_discriminative.sh b/egs/wsj/s5/steps/nnet3/train_discriminative.sh
index 028a22d6bc2..676e47af2cb 100755
--- a/egs/wsj/s5/steps/nnet3/train_discriminative.sh
+++ b/egs/wsj/s5/steps/nnet3/train_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey)
 #           2014-2015  Vimal Manohar
diff --git a/egs/wsj/s5/steps/nnet3/train_tdnn.sh b/egs/wsj/s5/steps/nnet3/train_tdnn.sh
index f023d38b26c..263da2e99bb 100755
--- a/egs/wsj/s5/steps/nnet3/train_tdnn.sh
+++ b/egs/wsj/s5/steps/nnet3/train_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # THIS SCRIPT IS DEPRECATED, see ./train_dnn.py
 
diff --git a/egs/wsj/s5/steps/online/decode.sh b/egs/wsj/s5/steps/online/decode.sh
index 496f2c2c8a8..dd672d5d3c5 100755
--- a/egs/wsj/s5/steps/online/decode.sh
+++ b/egs/wsj/s5/steps/online/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/online/nnet2/align.sh b/egs/wsj/s5/steps/online/nnet2/align.sh
index c24bbf0291e..963d014c772 100755
--- a/egs/wsj/s5/steps/online/nnet2/align.sh
+++ b/egs/wsj/s5/steps/online/nnet2/align.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright      2012  Brno University of Technology (Author: Karel Vesely)
 #           2013-2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/online/nnet2/copy_data_dir.sh b/egs/wsj/s5/steps/online/nnet2/copy_data_dir.sh
index 0e1096e646c..143e163f33d 100755
--- a/egs/wsj/s5/steps/online/nnet2/copy_data_dir.sh
+++ b/egs/wsj/s5/steps/online/nnet2/copy_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013-2014  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/online/nnet2/copy_ivector_dir.sh b/egs/wsj/s5/steps/online/nnet2/copy_ivector_dir.sh
index b70e5cf21ad..3798347b344 100755
--- a/egs/wsj/s5/steps/online/nnet2/copy_ivector_dir.sh
+++ b/egs/wsj/s5/steps/online/nnet2/copy_ivector_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Johns Hopkins University (author: Hossein Hadian)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/online/nnet2/decode.sh b/egs/wsj/s5/steps/online/nnet2/decode.sh
index 22cab49a5e7..89705f3fee7 100755
--- a/egs/wsj/s5/steps/online/nnet2/decode.sh
+++ b/egs/wsj/s5/steps/online/nnet2/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/online/nnet2/dump_nnet_activations.sh b/egs/wsj/s5/steps/online/nnet2/dump_nnet_activations.sh
index a394b70b0f6..8e0528b73a5 100755
--- a/egs/wsj/s5/steps/online/nnet2/dump_nnet_activations.sh
+++ b/egs/wsj/s5/steps/online/nnet2/dump_nnet_activations.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2013  Daniel Povey
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh b/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh
index 429e184ddf4..4935500c067 100755
--- a/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh
+++ b/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright     2013  Daniel Povey
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh b/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh
index f7264b910ad..ee3166cce34 100755
--- a/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh
+++ b/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright     2013  Daniel Povey
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/online/nnet2/get_egs.sh b/egs/wsj/s5/steps/online/nnet2/get_egs.sh
index 03b2e102398..3bea76f6c50 100755
--- a/egs/wsj/s5/steps/online/nnet2/get_egs.sh
+++ b/egs/wsj/s5/steps/online/nnet2/get_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/online/nnet2/get_egs2.sh b/egs/wsj/s5/steps/online/nnet2/get_egs2.sh
index ed250a63703..60b02122e2e 100755
--- a/egs/wsj/s5/steps/online/nnet2/get_egs2.sh
+++ b/egs/wsj/s5/steps/online/nnet2/get_egs2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #
diff --git a/egs/wsj/s5/steps/online/nnet2/get_egs_discriminative2.sh b/egs/wsj/s5/steps/online/nnet2/get_egs_discriminative2.sh
index 3771b70584d..4826516c7f4 100755
--- a/egs/wsj/s5/steps/online/nnet2/get_egs_discriminative2.sh
+++ b/egs/wsj/s5/steps/online/nnet2/get_egs_discriminative2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/online/nnet2/get_pca_transform.sh b/egs/wsj/s5/steps/online/nnet2/get_pca_transform.sh
index e0b704f8852..924ef583fd3 100755
--- a/egs/wsj/s5/steps/online/nnet2/get_pca_transform.sh
+++ b/egs/wsj/s5/steps/online/nnet2/get_pca_transform.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  David Snyder
 #
diff --git a/egs/wsj/s5/steps/online/nnet2/make_denlats.sh b/egs/wsj/s5/steps/online/nnet2/make_denlats.sh
index 9aa9808207a..553e6ac53ca 100755
--- a/egs/wsj/s5/steps/online/nnet2/make_denlats.sh
+++ b/egs/wsj/s5/steps/online/nnet2/make_denlats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # Create denominator lattices for MMI/MPE training.
diff --git a/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh b/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh
index fc58838c902..ded6be0b84a 100755
--- a/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh
+++ b/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding_retrain.sh b/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding_retrain.sh
index 2e1529aa32a..0a9bca9463b 100755
--- a/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding_retrain.sh
+++ b/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding_retrain.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding_transfer.sh b/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding_transfer.sh
index c44f18db3f1..18b7d05f38f 100755
--- a/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding_transfer.sh
+++ b/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding_transfer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh b/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh
index 76554394205..bbd24201926 100755
--- a/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh
+++ b/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2012  Johns Hopkins University (Author: Daniel Povey)
 #             2013  Daniel Povey
diff --git a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh
index 27cc28326b5..30ad652339a 100755
--- a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh
+++ b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2013  Daniel Povey
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/online/nnet3/decode.sh b/egs/wsj/s5/steps/online/nnet3/decode.sh
index 118cf9e1260..27f78e14ed0 100755
--- a/egs/wsj/s5/steps/online/nnet3/decode.sh
+++ b/egs/wsj/s5/steps/online/nnet3/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 #           2016  Api.ai (Author: Ilya Platonov)
diff --git a/egs/wsj/s5/steps/online/nnet3/decode_wake_word.sh b/egs/wsj/s5/steps/online/nnet3/decode_wake_word.sh
new file mode 100755
index 00000000000..003cf0915ef
--- /dev/null
+++ b/egs/wsj/s5/steps/online/nnet3/decode_wake_word.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+
+# Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
+#           2016  Api.ai (Author: Ilya Platonov)
+#      2019-2020  Yiming Wang
+# Apache 2.0
+
+# This script is modified from steps/online/nnet3/decode.sh for wake word detection decoding
+
+# Begin configuration section.
+stage=0
+nj=4
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+cmd=run.pl
+frames_per_chunk=20
+extra_left_context_initial=0
+min_active=200
+max_active=7000
+beam=15.0
+per_utt=false
+online=true  # only relevant to non-threaded decoder.
+do_speex_compressing=false
+scoring_opts=
+skip_scoring=false
+iter=final
+online_config=
+wake_word="嗨小问"
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the models are, as prepared by steps/online/nnet3/prepare_online_decoding.sh"
+   echo "e.g.: $0 exp/chain/tdnn/graph data/test exp/chain/tdnn_online/decode/"
+   echo ""
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --online-config <config-file>                    # online decoder options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --per-utt <true|false>                           # If true, decode per utterance without"
+   echo "                                                   # carrying forward adaptation info from previous"
+   echo "                                                   # utterances of each speaker.  Default: false"
+   echo "  --online <true|false>                            # Set this to false if you don't really care about"
+   echo "                                                   # simulating online decoding and just want the best"
+   echo "                                                   # results.  This will use all the data within each"
+   echo "                                                   # utterance (plus any previous utterance, if not in"
+   echo "                                                   # per-utterance mode) to estimate the iVectors."
+   echo "  --scoring-opts <string>                          # options to local/score.sh"
+   echo "  --iter <iter>                                    # Iteration of model to decode; default is final."
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+
+if [ "$online_config" == "" ]; then
+  online_config=$srcdir/conf/online.conf;
+fi
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+for f in $online_config $srcdir/${iter}.mdl \
+    $graphdir/HCLG.fst $graphdir/words.txt $data/wav.scp; do
+  if [ ! -f $f ]; then
+    echo "$0: no such file $f"
+    exit 1;
+  fi
+done
+
+if ! $per_utt; then
+  spk2utt_rspecifier="ark:$sdata/JOB/spk2utt"
+else
+  mkdir -p $dir/per_utt
+  for j in $(seq $nj); do
+    awk '{print $1, $1}' <$sdata/$j/utt2spk >$dir/per_utt/utt2spk.$j || exit 1;
+  done
+  spk2utt_rspecifier="ark:$dir/per_utt/utt2spk.JOB"
+fi
+
+if [ -f $data/segments ]; then
+  wav_rspecifier="ark,s,cs:extract-segments scp,p:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- |"
+else
+  wav_rspecifier="ark,s,cs:wav-copy scp,p:$sdata/JOB/wav.scp ark:- |"
+fi
+if $do_speex_compressing; then
+  wav_rspecifier="$wav_rspecifier compress-uncompress-speex ark:- ark:- |"
+fi
+
+wake_word_id=$(cat $graphdir/words.txt | grep $wake_word | awk '{print $2}')
+
+if [ -f $srcdir/frame_subsampling_factor ]; then
+  # e.g. for 'chain' systems
+  frame_subsampling_opt="--frame-subsampling-factor=$(cat $srcdir/frame_subsampling_factor)"
+fi
+
+if [ $stage -le 0 ]; then
+  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
+    online2-wav-nnet3-wake-word-decoder-faster \
+    --frames-per-chunk=$frames_per_chunk \
+    --extra-left-context-initial=$extra_left_context_initial \
+    --online=$online \
+       $frame_subsampling_opt \
+     --config=$online_config \
+     --min-active=$min_active --max-active=$max_active --beam=$beam \
+     --acoustic-scale=$acwt --wake-word-id=$wake_word_id \
+     $srcdir/${iter}.mdl $graphdir/HCLG.fst $spk2utt_rspecifier "$wav_rspecifier" \
+     $graphdir/words.txt ark,t:$dir/trans.JOB.txt \
+     ark,t:$dir/ali.JOB.txt || exit 1;
+fi
+
+if [ $stage -le 1 ]; then
+  for n in $(seq $nj); do
+    cat $dir/trans.$n.txt
+  done > $dir/trans.txt
+  rm -f $dir/trans.*.txt
+  for n in $(seq $nj); do
+    cat $dir/ali.$n.txt
+  done > $dir/ali.txt
+  rm -f $dir/ali.*.txt
+fi
+
+if [ $stage -le 2 ] && ! $skip_scoring ; then
+  [ ! -x local/score_online.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score_online.sh $scoring_opts --wake-word $wake_word $data $graphdir $dir
+fi
+
+exit 0;
diff --git a/egs/wsj/s5/steps/online/nnet3/prepare_online_decoding.sh b/egs/wsj/s5/steps/online/nnet3/prepare_online_decoding.sh
index a20ddd496eb..42a0c25522f 100755
--- a/egs/wsj/s5/steps/online/nnet3/prepare_online_decoding.sh
+++ b/egs/wsj/s5/steps/online/nnet3/prepare_online_decoding.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/online/prepare_online_decoding.sh b/egs/wsj/s5/steps/online/prepare_online_decoding.sh
index a6c17a4f303..be4cc8f3bb1 100755
--- a/egs/wsj/s5/steps/online/prepare_online_decoding.sh
+++ b/egs/wsj/s5/steps/online/prepare_online_decoding.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/oracle_wer.sh b/egs/wsj/s5/steps/oracle_wer.sh
index 970400632c5..298aef5215d 100755
--- a/egs/wsj/s5/steps/oracle_wer.sh
+++ b/egs/wsj/s5/steps/oracle_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey)  2013
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/paste_feats.sh b/egs/wsj/s5/steps/paste_feats.sh
index 5573cf14ee0..63f6dd395c6 100755
--- a/egs/wsj/s5/steps/paste_feats.sh
+++ b/egs/wsj/s5/steps/paste_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Brno University of Technology (Author: Karel Vesely)
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/wsj/s5/steps/resegment_data.sh b/egs/wsj/s5/steps/resegment_data.sh
index 4f9196f9dbd..005d423040e 100755
--- a/egs/wsj/s5/steps/resegment_data.sh
+++ b/egs/wsj/s5/steps/resegment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2013.  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/resegment_text.sh b/egs/wsj/s5/steps/resegment_text.sh
index 58a5ac6548a..4646604c917 100755
--- a/egs/wsj/s5/steps/resegment_text.sh
+++ b/egs/wsj/s5/steps/resegment_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey) 2013.  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/rnnlmrescore.sh b/egs/wsj/s5/steps/rnnlmrescore.sh
index bbb85f2fadb..de6114038b8 100755
--- a/egs/wsj/s5/steps/rnnlmrescore.sh
+++ b/egs/wsj/s5/steps/rnnlmrescore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # please see lmrescore_rnnlm_lat.sh which is a newer script using lattices.
 
diff --git a/egs/wsj/s5/steps/scoring/score_kaldi_cer.sh b/egs/wsj/s5/steps/scoring/score_kaldi_cer.sh
index 59c257d3280..72df0b378fd 100755
--- a/egs/wsj/s5/steps/scoring/score_kaldi_cer.sh
+++ b/egs/wsj/s5/steps/scoring/score_kaldi_cer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/scoring/score_kaldi_compare.sh b/egs/wsj/s5/steps/scoring/score_kaldi_compare.sh
index 32afa296796..3c530bff044 100755
--- a/egs/wsj/s5/steps/scoring/score_kaldi_compare.sh
+++ b/egs/wsj/s5/steps/scoring/score_kaldi_compare.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2016 Nicolas Serrano
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/scoring/score_kaldi_wer.sh b/egs/wsj/s5/steps/scoring/score_kaldi_wer.sh
index 9988c941441..3ddee8e4b12 100755
--- a/egs/wsj/s5/steps/scoring/score_kaldi_wer.sh
+++ b/egs/wsj/s5/steps/scoring/score_kaldi_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
 # Apache 2.0
 
diff --git a/egs/wsj/s5/steps/search_index.sh b/egs/wsj/s5/steps/search_index.sh
index 5db3d39b15a..b967bd129ed 100755
--- a/egs/wsj/s5/steps/search_index.sh
+++ b/egs/wsj/s5/steps/search_index.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/segmentation/ali_to_targets.sh b/egs/wsj/s5/steps/segmentation/ali_to_targets.sh
index 56d93df3c6b..edff8ee50ec 100644
--- a/egs/wsj/s5/steps/segmentation/ali_to_targets.sh
+++ b/egs/wsj/s5/steps/segmentation/ali_to_targets.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh b/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh
index 8135d089f5b..b9a946e4ebf 100755
--- a/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh
+++ b/egs/wsj/s5/steps/segmentation/combine_targets_dirs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Nagendra Kumar Goel
 #           2018 Vimal Manohar   
diff --git a/egs/wsj/s5/steps/segmentation/convert_targets_dir_to_whole_recording.sh b/egs/wsj/s5/steps/segmentation/convert_targets_dir_to_whole_recording.sh
index f7a681e4a31..026abc6343d 100755
--- a/egs/wsj/s5/steps/segmentation/convert_targets_dir_to_whole_recording.sh
+++ b/egs/wsj/s5/steps/segmentation/convert_targets_dir_to_whole_recording.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/segmentation/copy_targets_dir.sh b/egs/wsj/s5/steps/segmentation/copy_targets_dir.sh
index f15206b1f7d..c231e323ecd 100755
--- a/egs/wsj/s5/steps/segmentation/copy_targets_dir.sh
+++ b/egs/wsj/s5/steps/segmentation/copy_targets_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2017  Nagendra Kumar Goel
 #              2014  Johns Hopkins University (author: Nagendra K Goel)
diff --git a/egs/wsj/s5/steps/segmentation/decode_sad.sh b/egs/wsj/s5/steps/segmentation/decode_sad.sh
index c937b5d2c04..f28e90a79d7 100755
--- a/egs/wsj/s5/steps/segmentation/decode_sad.sh
+++ b/egs/wsj/s5/steps/segmentation/decode_sad.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vimal Manohar
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh b/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
index 831283bb5ec..c7d9919b518 100755
--- a/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
+++ b/egs/wsj/s5/steps/segmentation/detect_speech_activity.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016-17  Vimal Manohar
 #              2017  Nagendra Kumar Goel
diff --git a/egs/wsj/s5/steps/segmentation/get_targets_for_out_of_segments.sh b/egs/wsj/s5/steps/segmentation/get_targets_for_out_of_segments.sh
index 361acbf2203..f16af6a6485 100755
--- a/egs/wsj/s5/steps/segmentation/get_targets_for_out_of_segments.sh
+++ b/egs/wsj/s5/steps/segmentation/get_targets_for_out_of_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/segmentation/lats_to_targets.sh b/egs/wsj/s5/steps/segmentation/lats_to_targets.sh
index 29454234f82..770093cfa1d 100755
--- a/egs/wsj/s5/steps/segmentation/lats_to_targets.sh
+++ b/egs/wsj/s5/steps/segmentation/lats_to_targets.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/segmentation/merge_targets_dirs.sh b/egs/wsj/s5/steps/segmentation/merge_targets_dirs.sh
index a22f8ad12ec..08ee3767401 100755
--- a/egs/wsj/s5/steps/segmentation/merge_targets_dirs.sh
+++ b/egs/wsj/s5/steps/segmentation/merge_targets_dirs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh b/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh
index b168c307b57..4bf70a11f93 100755
--- a/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh
+++ b/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015-17  Vimal Manohar
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/segmentation/resample_targets_dir.sh b/egs/wsj/s5/steps/segmentation/resample_targets_dir.sh
index 464858b0241..c0377139848 100755
--- a/egs/wsj/s5/steps/segmentation/resample_targets_dir.sh
+++ b/egs/wsj/s5/steps/segmentation/resample_targets_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/segmentation/validate_targets_dir.sh b/egs/wsj/s5/steps/segmentation/validate_targets_dir.sh
index 9bebb8fab20..009b7e1782b 100755
--- a/egs/wsj/s5/steps/segmentation/validate_targets_dir.sh
+++ b/egs/wsj/s5/steps/segmentation/validate_targets_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/select_feats.sh b/egs/wsj/s5/steps/select_feats.sh
index 20957818c3b..a5eca24da7d 100755
--- a/egs/wsj/s5/steps/select_feats.sh
+++ b/egs/wsj/s5/steps/select_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/shift_feats.sh b/egs/wsj/s5/steps/shift_feats.sh
index ada5716f187..8d476d9b6dd 100755
--- a/egs/wsj/s5/steps/shift_feats.sh
+++ b/egs/wsj/s5/steps/shift_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016    Vimal Manohar
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/subset_ali_dir.sh b/egs/wsj/s5/steps/subset_ali_dir.sh
index 537d91c1248..839ab747f85 100755
--- a/egs/wsj/s5/steps/subset_ali_dir.sh
+++ b/egs/wsj/s5/steps/subset_ali_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/tandem/align_fmllr.sh b/egs/wsj/s5/steps/tandem/align_fmllr.sh
index 0b012e24146..f5212b37091 100755
--- a/egs/wsj/s5/steps/tandem/align_fmllr.sh
+++ b/egs/wsj/s5/steps/tandem/align_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Korbinian Riedhammer
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/tandem/align_sgmm2.sh b/egs/wsj/s5/steps/tandem/align_sgmm2.sh
index 48eb1fbef43..1071b1385c9 100755
--- a/egs/wsj/s5/steps/tandem/align_sgmm2.sh
+++ b/egs/wsj/s5/steps/tandem/align_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Korbinian Riedhammer
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/tandem/align_si.sh b/egs/wsj/s5/steps/tandem/align_si.sh
index 4e52c51e308..370654425c1 100755
--- a/egs/wsj/s5/steps/tandem/align_si.sh
+++ b/egs/wsj/s5/steps/tandem/align_si.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Korbinian Riedhammer
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/tandem/decode.sh b/egs/wsj/s5/steps/tandem/decode.sh
index aacf3d48077..c0805290b5e 100755
--- a/egs/wsj/s5/steps/tandem/decode.sh
+++ b/egs/wsj/s5/steps/tandem/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/tandem/decode_fmllr.sh b/egs/wsj/s5/steps/tandem/decode_fmllr.sh
index c2cb8caa7bb..8cbaf48b5fc 100755
--- a/egs/wsj/s5/steps/tandem/decode_fmllr.sh
+++ b/egs/wsj/s5/steps/tandem/decode_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Korbinian Riedhammer
diff --git a/egs/wsj/s5/steps/tandem/decode_sgmm2.sh b/egs/wsj/s5/steps/tandem/decode_sgmm2.sh
index 3afe1f8cb5d..c47166544da 100755
--- a/egs/wsj/s5/steps/tandem/decode_sgmm2.sh
+++ b/egs/wsj/s5/steps/tandem/decode_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #                 Korbinian Riedhammer
diff --git a/egs/wsj/s5/steps/tandem/make_denlats.sh b/egs/wsj/s5/steps/tandem/make_denlats.sh
index 0b0f11a4bdb..6ed0d848489 100755
--- a/egs/wsj/s5/steps/tandem/make_denlats.sh
+++ b/egs/wsj/s5/steps/tandem/make_denlats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #                 Korbinian Riedhammer
 
diff --git a/egs/wsj/s5/steps/tandem/make_denlats_sgmm2.sh b/egs/wsj/s5/steps/tandem/make_denlats_sgmm2.sh
index 7dbae9d26c9..9f5c5567b7e 100755
--- a/egs/wsj/s5/steps/tandem/make_denlats_sgmm2.sh
+++ b/egs/wsj/s5/steps/tandem/make_denlats_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #                 Korbinian Riedhammer
 
diff --git a/egs/wsj/s5/steps/tandem/mk_aslf_lda_mllt.sh b/egs/wsj/s5/steps/tandem/mk_aslf_lda_mllt.sh
index 4b1ea3f071a..c80ae9e6077 100755
--- a/egs/wsj/s5/steps/tandem/mk_aslf_lda_mllt.sh
+++ b/egs/wsj/s5/steps/tandem/mk_aslf_lda_mllt.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Korbinian Riedhammer
diff --git a/egs/wsj/s5/steps/tandem/mk_aslf_sgmm2.sh b/egs/wsj/s5/steps/tandem/mk_aslf_sgmm2.sh
index 73c45836f0b..2dc387fd121 100755
--- a/egs/wsj/s5/steps/tandem/mk_aslf_sgmm2.sh
+++ b/egs/wsj/s5/steps/tandem/mk_aslf_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Korbinian Riedhammer
diff --git a/egs/wsj/s5/steps/tandem/train_deltas.sh b/egs/wsj/s5/steps/tandem/train_deltas.sh
index d6a1baa6623..50ba1836f60 100755
--- a/egs/wsj/s5/steps/tandem/train_deltas.sh
+++ b/egs/wsj/s5/steps/tandem/train_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Korbinian Riedhammer
diff --git a/egs/wsj/s5/steps/tandem/train_lda_mllt.sh b/egs/wsj/s5/steps/tandem/train_lda_mllt.sh
index a5fa4ea8786..7856d236d9e 100755
--- a/egs/wsj/s5/steps/tandem/train_lda_mllt.sh
+++ b/egs/wsj/s5/steps/tandem/train_lda_mllt.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Korbinian Riedhammer
diff --git a/egs/wsj/s5/steps/tandem/train_mllt.sh b/egs/wsj/s5/steps/tandem/train_mllt.sh
index 7d46074baec..4e522978c70 100755
--- a/egs/wsj/s5/steps/tandem/train_mllt.sh
+++ b/egs/wsj/s5/steps/tandem/train_mllt.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Korbinian Riedhammer
diff --git a/egs/wsj/s5/steps/tandem/train_mmi.sh b/egs/wsj/s5/steps/tandem/train_mmi.sh
index 4f617b67c4c..03a785a4c2e 100755
--- a/egs/wsj/s5/steps/tandem/train_mmi.sh
+++ b/egs/wsj/s5/steps/tandem/train_mmi.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #                 Korbinian Riedhammer
 
diff --git a/egs/wsj/s5/steps/tandem/train_mmi_sgmm2.sh b/egs/wsj/s5/steps/tandem/train_mmi_sgmm2.sh
index f09ecd963ec..d425932b0a2 100755
--- a/egs/wsj/s5/steps/tandem/train_mmi_sgmm2.sh
+++ b/egs/wsj/s5/steps/tandem/train_mmi_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #                 Korbinian Riedhammer
 
diff --git a/egs/wsj/s5/steps/tandem/train_mono.sh b/egs/wsj/s5/steps/tandem/train_mono.sh
index b5c55f6f369..393a3ba0101 100755
--- a/egs/wsj/s5/steps/tandem/train_mono.sh
+++ b/egs/wsj/s5/steps/tandem/train_mono.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #                 Korbinian Riedhammer
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/tandem/train_sat.sh b/egs/wsj/s5/steps/tandem/train_sat.sh
index 09e3f625674..b6240009a50 100755
--- a/egs/wsj/s5/steps/tandem/train_sat.sh
+++ b/egs/wsj/s5/steps/tandem/train_sat.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #                 Korbinian Riedhammer
 
diff --git a/egs/wsj/s5/steps/tandem/train_sgmm2.sh b/egs/wsj/s5/steps/tandem/train_sgmm2.sh
index daa0437b47b..77182e307ca 100755
--- a/egs/wsj/s5/steps/tandem/train_sgmm2.sh
+++ b/egs/wsj/s5/steps/tandem/train_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #                 Korbinian Riedhammer
diff --git a/egs/wsj/s5/steps/tandem/train_ubm.sh b/egs/wsj/s5/steps/tandem/train_ubm.sh
index 858856792c1..21d7eb2095c 100755
--- a/egs/wsj/s5/steps/tandem/train_ubm.sh
+++ b/egs/wsj/s5/steps/tandem/train_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # This trains a UBM (i.e. a mixture of Gaussians), by clustering
diff --git a/egs/wsj/s5/steps/tfrnnlm/check_tensorflow_installed.sh b/egs/wsj/s5/steps/tfrnnlm/check_tensorflow_installed.sh
index 3162bf165ad..b8067abcebf 100755
--- a/egs/wsj/s5/steps/tfrnnlm/check_tensorflow_installed.sh
+++ b/egs/wsj/s5/steps/tfrnnlm/check_tensorflow_installed.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script checks if TF is installed to be used with python
 #                    and if TF related binaries in kaldi is ready to use
diff --git a/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat.sh
index b84d9a0eef7..437549f339f 100644
--- a/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat.sh
+++ b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Guoguo Chen
 #           2017  Hainan Xu
diff --git a/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh
index e098aef85df..254908e6e9c 100755
--- a/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh
+++ b/egs/wsj/s5/steps/tfrnnlm/lmrescore_rnnlm_lat_pruned.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Guoguo Chen
 #           2017  Hainan Xu
diff --git a/egs/wsj/s5/steps/tfrnnlm/lstm.py b/egs/wsj/s5/steps/tfrnnlm/lstm.py
index 433dc87b4c6..a66e7d69a35 100644
--- a/egs/wsj/s5/steps/tfrnnlm/lstm.py
+++ b/egs/wsj/s5/steps/tfrnnlm/lstm.py
@@ -25,32 +25,26 @@
 from __future__ import division
 from __future__ import print_function
 
-import sys
-
-import inspect
-import time
-
-import numpy as np
+import absl
+import absl.flags as flags
 import tensorflow as tf
 
 import reader
 
-flags = tf.flags
-logging = tf.logging
-
 flags.DEFINE_integer("hidden_size", 200, "hidden dim of RNN")
 
 flags.DEFINE_string("data_path", None,
                     "Where the training/test data is stored.")
 flags.DEFINE_string("vocab_path", None,
                     "Where the wordlist file is stored.")
-flags.DEFINE_string("save_path", None,
+flags.DEFINE_string("save_path", "export",
                     "Model output directory.")
 flags.DEFINE_bool("use_fp16", False,
                   "Train using 16-bit floats instead of 32bit floats")
 
 FLAGS = flags.FLAGS
 
+
 class Config(object):
   init_scale = 0.1
   learning_rate = 1.0
@@ -64,265 +58,183 @@ class Config(object):
   lr_decay = 0.5
   batch_size = 64
 
+
 def data_type():
   return tf.float16 if FLAGS.use_fp16 else tf.float32
 
 
-class RnnlmInput(object):
-  """The input data."""
+class RNNLMModel(tf.Module):
+  """The RNN model itself."""
 
-  def __init__(self, config, data, name=None):
-    self.batch_size = batch_size = config.batch_size
-    self.num_steps = num_steps = config.num_steps
-    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
-    self.input_data, self.targets = reader.rnnlm_producer(
-        data, batch_size, num_steps, name=name)
+  def __init__(self, config, logits_bias_initializer=None):
+    super().__init__()
+    self._config = config
 
-
-class RnnlmModel(object):
-  """The RNNLM model."""
-
-  def __init__(self, is_training, config, input_):
-    self._input = input_
-
-    batch_size = input_.batch_size
-    num_steps = input_.num_steps
     size = config.hidden_size
     vocab_size = config.vocab_size
+    dt = data_type()
 
     def lstm_cell():
-      # With the latest TensorFlow source code (as of Mar 27, 2017),
-      # the BasicLSTMCell will need a reuse parameter which is unfortunately not
-      # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
-      # an argument check here:
-      if 'reuse' in inspect.getargspec(
-          tf.contrib.rnn.BasicLSTMCell.__init__).args:
-        return tf.contrib.rnn.BasicLSTMCell(
-            size, forget_bias=0.0, state_is_tuple=True,
-            reuse=tf.get_variable_scope().reuse)
-      else:
-        return tf.contrib.rnn.BasicLSTMCell(
-            size, forget_bias=0.0, state_is_tuple=True)
-    attn_cell = lstm_cell
-    if is_training and config.keep_prob < 1:
-      def attn_cell():
-        return tf.contrib.rnn.DropoutWrapper(
-            lstm_cell(), output_keep_prob=config.keep_prob)
-    self.cell = tf.contrib.rnn.MultiRNNCell(
-        [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
-
-    self._initial_state = self.cell.zero_state(batch_size, data_type())
-    self._initial_state_single = self.cell.zero_state(1, data_type())
-
-    self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 2, 1, size], name="test_initial_state")
-
-
-    # first implement the less efficient version
-    test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")
-
-    state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state_in")
-    # unpacking the input state context 
-    l = tf.unstack(state_placeholder, axis=0)
-    test_input_state = tuple(
-               [tf.contrib.rnn.LSTMStateTuple(l[idx][0],l[idx][1])
-                 for idx in range(config.num_layers)]
-    )
-
-    with tf.device("/cpu:0"):
-      self.embedding = tf.get_variable(
-          "embedding", [vocab_size, size], dtype=data_type())
-
-      inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data)
-      test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in)
-
-    # test time
-    with tf.variable_scope("RNN"):
-      (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state)
-
-    test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
-    test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out")
-    # above is the first part of the graph for test
-    # test-word-in
-    #               > ---- > test-state-out
-    # test-state-in        > test-cell-out
-
-
-    # below is the 2nd part of the graph for test
-    # test-word-out
-    #               > prob(word | test-word-out)
-    # test-cell-in
-
-    test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
-    cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in")
-
-    softmax_w = tf.get_variable(
-        "softmax_w", [size, vocab_size], dtype=data_type())
-    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
-
-    test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
-    test_softmaxed = tf.nn.log_softmax(test_logits)
-
-    p_word = test_softmaxed[0, test_word_out[0,0]]
-    test_out = tf.identity(p_word, name="test_out")
-
-    if is_training and config.keep_prob < 1:
-      inputs = tf.nn.dropout(inputs, config.keep_prob)
-
-    # Simplified version of models/tutorials/rnn/rnn.py's rnn().
-    # This builds an unrolled LSTM for tutorial purposes only.
-    # In general, use the rnn() or state_saving_rnn() from rnn.py.
-    #
-    # The alternative version of the code below is:
-    #
-    # inputs = tf.unstack(inputs, num=num_steps, axis=1)
-    # outputs, state = tf.contrib.rnn.static_rnn(
-    #     cell, inputs, initial_state=self._initial_state)
-    outputs = []
-    state = self._initial_state
-    with tf.variable_scope("RNN"):
-      for time_step in range(num_steps):
-        if time_step > -1: tf.get_variable_scope().reuse_variables()
-        (cell_output, state) = self.cell(inputs[:, time_step, :], state)
-        outputs.append(cell_output)
-
-    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
-    logits = tf.matmul(output, softmax_w) + softmax_b
-    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
-        [logits],
-        [tf.reshape(input_.targets, [-1])],
-        [tf.ones([batch_size * num_steps], dtype=data_type())])
-    self._cost = cost = tf.reduce_sum(loss) / batch_size
-    self._final_state = state
-
-    if not is_training:
-      return
-
-    self._lr = tf.Variable(0.0, trainable=False)
-    tvars = tf.trainable_variables()
-    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
-                                      config.max_grad_norm)
-    optimizer = tf.train.GradientDescentOptimizer(self._lr)
-    self._train_op = optimizer.apply_gradients(
-        list(zip(grads, tvars)),
-        global_step=tf.contrib.framework.get_or_create_global_step())
-
-    self._new_lr = tf.placeholder(
-        tf.float32, shape=[], name="new_learning_rate")
-    self._lr_update = tf.assign(self._lr, self._new_lr)
-
-  def assign_lr(self, session, lr_value):
-    session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
-
-  @property
-  def input(self):
-    return self._input
-
-  @property
-  def initial_state(self):
-    return self._initial_state
-
-  @property
-  def cost(self):
-    return self._cost
-
-  @property
-  def final_state(self):
-    return self._final_state
-
-  @property
-  def lr(self):
-    return self._lr
-
-  @property
-  def train_op(self):
-    return self._train_op
-
-def run_epoch(session, model, eval_op=None, verbose=False):
-  """Runs the model on the given data."""
-  start_time = time.time()
-  costs = 0.0
-  iters = 0
-  state = session.run(model.initial_state)
-
-  fetches = {
-      "cost": model.cost,
-      "final_state": model.final_state,
-  }
-  if eval_op is not None:
-    fetches["eval_op"] = eval_op
-
-  for step in range(model.input.epoch_size):
-    feed_dict = {}
-    for i, (c, h) in enumerate(model.initial_state):
-      feed_dict[c] = state[i].c
-      feed_dict[h] = state[i].h
-
-    vals = session.run(fetches, feed_dict)
-    cost = vals["cost"]
-    state = vals["final_state"]
-
-    costs += cost
-    iters += model.input.num_steps
-
-    if verbose and step % (model.input.epoch_size // 10) == 10:
-      print("%.3f perplexity: %.3f speed: %.0f wps" %
-            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
-             iters * model.input.batch_size / (time.time() - start_time)))
-
-  return np.exp(costs / iters)
+      return tf.keras.layers.LSTMCell(size, dtype=dt, unit_forget_bias=False)
+
+    def add_dropout(cell):
+      if config.keep_prob < 1:
+        cell = tf.nn.RNNCellDropoutWrapper(cell=cell, output_keep_prob=config.keep_prob)
+      return cell
+
+    self.embedding = tf.keras.layers.Embedding(vocab_size, size, dtype=dt)
+    self.cells = [lstm_cell() for _ in range(config.num_layers)]
+    self.rnn = tf.keras.layers.RNN(self.cells, return_sequences=True)
+
+    if logits_bias_initializer is None:
+      logits_bias_initializer = 'zeros'
+    self.fc = tf.keras.layers.Dense(vocab_size, bias_initializer=logits_bias_initializer)
+
+    # only used in training
+    self.training_cells = [add_dropout(cell) for cell in self.cells]
+    self.training_rnn = tf.keras.layers.RNN(self.training_cells, return_sequences=True)
+
+  def get_logits(self, word_ids, is_training=False):
+    rnn = self.training_rnn if is_training else self.rnn
+    inputs = self.embedding(word_ids)
+    if is_training and self._config.keep_prob < 1:
+      inputs = tf.nn.dropout(inputs, 1 - self._config.keep_prob)
+    rnn_out = rnn(inputs)
+    logits = self.fc(rnn_out)
+    return logits
+
+  def get_loss(self, word_ids, labels, is_training=False):
+    logits = self.get_logits(word_ids, is_training)
+    loss_obj = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
+    return loss_obj(labels, logits)
+
+  def get_score(self, logits):
+    """Take logits as input, output a score."""
+    return tf.nn.log_softmax(logits)
+
+  @tf.function
+  def get_initial_state(self):
+    """Exported function which emits zeroed RNN context vector."""
+    # This seems a bug in TensorFlow, but passing tf.int32 makes the state tensor also int32.
+    fake_input = tf.constant(0, dtype=tf.float32, shape=[1, 1])
+    initial_state = tf.stack(self.rnn.get_initial_state(fake_input))
+    return {"initial_state": initial_state}
+
+  @tf.function
+  def single_step(self, context, word_id):
+    """Exported function which perform one step of the RNN model."""
+    rnn = tf.keras.layers.RNN(self.cells, return_state=True)
+    context = tf.unstack(context)
+    context = [tf.unstack(c) for c in context]
+
+    inputs = self.embedding(word_id)
+    rnn_out_and_states = rnn(inputs, initial_state=context)
+
+    rnn_out = rnn_out_and_states[0]
+    rnn_states = tf.stack(rnn_out_and_states[1:])
+
+    logits = self.fc(rnn_out)
+    output = self.get_score(logits)
+    log_prob = output[0, word_id[0, 0]]
+    return {"log_prob": log_prob, "rnn_states": rnn_states, "rnn_out": rnn_out}
+
+
+class RNNLMModelTrainer(tf.Module):
+  """This class contains training code."""
+
+  def __init__(self, model: RNNLMModel, config):
+    super().__init__()
+    self.model = model
+    self.learning_rate = tf.Variable(1e-3, dtype=tf.float32, trainable=False)
+    self.optimizer = tf.optimizers.SGD(learning_rate=self.learning_rate)
+    self.max_grad_norm = config.max_grad_norm
+
+    self.eval_mean_loss = tf.metrics.Mean()
+
+  def train_one_epoch(self, data_producer, learning_rate, verbose=True):
+    print("start epoch with learning rate {}".format(learning_rate))
+    self.learning_rate.assign(learning_rate)
+
+    for i, (inputs, labels) in enumerate(data_producer.iterate()):
+      loss = self._train_step(inputs, labels)
+      if verbose and i % (data_producer.epoch_size // 10) == 1:
+        print("{}/{}: loss={}".format(i, data_producer.epoch_size, loss))
+
+  @tf.function
+  def evaluate(self, data_producer):
+    self.eval_mean_loss.reset_states()
+    for i, (inputs, labels) in enumerate(data_producer.iterate()):
+      loss = self.model.get_loss(inputs, labels)
+      self.eval_mean_loss.update_state(loss)
+
+    return self.eval_mean_loss.result()
+
+  @tf.function
+  def _train_step(self, inputs, labels):
+    with tf.GradientTape() as tape:
+      loss = self.model.get_loss(inputs, labels, is_training=True)
+
+    tvars = self.model.trainable_variables
+    grads = tape.gradient(loss, tvars)
+    clipped_grads, _ = tf.clip_by_global_norm(grads, self.max_grad_norm)
+    self.optimizer.apply_gradients(zip(clipped_grads, tvars))
+    return loss
 
 
 def get_config():
   return Config()
 
+
 def main(_):
-  if not FLAGS.data_path:
-    raise ValueError("Must set --data_path to RNNLM data directory")
+  # Turn this on to try the model code with this source file itself!
+  __TESTING = False
 
-  raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path)
-  train_data, valid_data, _, word_map = raw_data
+  if __TESTING:
+    (train_data, valid_data), word_map = reader.rnnlm_gen_data(__file__, reader.__file__)
+  else:
+    if not FLAGS.data_path:
+      raise ValueError("Must set --data_path to RNNLM data directory")
+
+    raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path)
+    train_data, valid_data, _, word_map = raw_data
 
   config = get_config()
   config.hidden_size = FLAGS.hidden_size
   config.vocab_size = len(word_map)
-  eval_config = get_config()
-  eval_config.batch_size = 1
-  eval_config.num_steps = 1
-
-  with tf.Graph().as_default():
-    initializer = tf.random_uniform_initializer(-config.init_scale,
-                                                config.init_scale)
-
-    with tf.name_scope("Train"):
-      train_input = RnnlmInput(config=config, data=train_data, name="TrainInput")
-      with tf.variable_scope("Model", reuse=None, initializer=initializer):
-        m = RnnlmModel(is_training=True, config=config, input_=train_input)
-      tf.summary.scalar("Training Loss", m.cost)
-      tf.summary.scalar("Learning Rate", m.lr)
-
-    with tf.name_scope("Valid"):
-      valid_input = RnnlmInput(config=config, data=valid_data, name="ValidInput")
-      with tf.variable_scope("Model", reuse=True, initializer=initializer):
-        mvalid = RnnlmModel(is_training=False, config=config, input_=valid_input)
-      tf.summary.scalar("Validation Loss", mvalid.cost)
-
-    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
-    with sv.managed_session() as session:
-      for i in range(config.max_max_epoch):
-        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
-        m.assign_lr(session, config.learning_rate * lr_decay)
-
-        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
-        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
-                                     verbose=True)
-
-        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
-        valid_perplexity = run_epoch(session, mvalid)
-        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
-
-      if FLAGS.save_path:
-        print("Saving model to %s." % FLAGS.save_path)
-        sv.saver.save(session, FLAGS.save_path)
+
+  if __TESTING:
+    # use a much smaller scale on our tiny test data
+    config.num_steps = 8
+    config.batch_size = 4
+
+  model = RNNLMModel(config)
+  train_producer = reader.RNNLMProducer(train_data, config.batch_size, config.num_steps)
+  trainer = RNNLMModelTrainer(model, config)
+
+  valid_producer = reader.RNNLMProducer(valid_data, config.batch_size, config.num_steps)
+
+  # Save variables to disk if you want to prevent crash...
+  # Data producer can also be saved to preverse feeding progress.
+  checkpoint = tf.train.Checkpoint(trainer=trainer, data_feeder=train_producer)
+  manager = tf.train.CheckpointManager(checkpoint, "checkpoints/", 5)
+
+  for i in range(config.max_max_epoch):
+    lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
+    lr = config.learning_rate * lr_decay
+    trainer.train_one_epoch(train_producer, lr)
+    manager.save()
+
+    eval_loss = trainer.evaluate(valid_producer)
+    print("validating: loss={}".format(eval_loss))
+
+  # Export
+  print("Saving model to %s." % FLAGS.save_path)
+  spec = [tf.TensorSpec(shape=[config.num_layers, 2, 1, config.hidden_size], dtype=data_type(), name="context"),
+          tf.TensorSpec(shape=[1, 1], dtype=tf.int32, name="word_id")]
+  cfunc = model.single_step.get_concrete_function(*spec)
+  cfunc2 = model.get_initial_state.get_concrete_function()
+  tf.saved_model.save(model, FLAGS.save_path, signatures={"single_step": cfunc, "get_initial_state": cfunc2})
+
 
 if __name__ == "__main__":
-  tf.app.run()
+  absl.app.run(main)
diff --git a/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py b/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py
index ff6c7263804..e299f449636 100644
--- a/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py
+++ b/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py
@@ -25,32 +25,28 @@
 from __future__ import division
 from __future__ import print_function
 
-import sys
-
-import inspect
-import time
-
-import numpy as np
+import absl
+import absl.flags as flags
 import tensorflow as tf
+from tensorflow.python.keras.losses import LossFunctionWrapper
 
 import reader
+from lstm import RNNLMModel, RNNLMModelTrainer
 
-flags = tf.flags
-logging = tf.logging
-
-flags.DEFINE_integer("hidden_size", 200, "hidden dim of RNN")
-
-flags.DEFINE_string("data_path", None,
-                    "Where the training/test data is stored.")
-flags.DEFINE_string("vocab_path", None,
-                    "Where the wordlist file is stored.")
-flags.DEFINE_string("save_path", None,
-                    "Model output directory.")
-flags.DEFINE_bool("use_fp16", False,
-                  "Train using 16-bit floats instead of 32bit floats")
+# flags.DEFINE_integer("hidden_size", 200, "hidden dim of RNN")
+#
+# flags.DEFINE_string("data_path", None,
+#                     "Where the training/test data is stored.")
+# flags.DEFINE_string("vocab_path", None,
+#                     "Where the wordlist file is stored.")
+# flags.DEFINE_string("save_path", "export",
+#                     "Model output directory.")
+# flags.DEFINE_bool("use_fp16", False,
+#                   "Train using 16-bit floats instead of 32bit floats")
 
 FLAGS = flags.FLAGS
 
+
 class Config(object):
   """Small config."""
   init_scale = 0.1
@@ -65,280 +61,102 @@ class Config(object):
   lr_decay = 0.8
   batch_size = 64
 
+
 def data_type():
   return tf.float16 if FLAGS.use_fp16 else tf.float32
 
+
 # this new "softmax" function we show can train a "self-normalized" RNNLM where
 # the sum of the output is automatically (close to) 1.0
 # which saves a lot of computation for lattice-rescoring
 def new_softmax(labels, logits):
-  target = tf.reshape(labels, [-1])
-  f_logits = tf.exp(logits)
-  row_sums = tf.reduce_sum(f_logits, 1) # this is the negative part of the objf
-
-  t2 = tf.expand_dims(target, 1)
-  range = tf.expand_dims(tf.range(tf.shape(target)[0]), 1)
+  flatten_labels = tf.reshape(labels, [-1])
+  n_samples = tf.shape(flatten_labels)[0]
+  flatten_logits = tf.reshape(logits, shape=[n_samples, -1])
+  f_logits = tf.exp(flatten_logits)
+  row_sums = tf.reduce_sum(f_logits, -1) # this is the negative part of the objf
+
+  t2 = tf.expand_dims(flatten_labels, 1)
+  range = tf.expand_dims(tf.range(n_samples), 1)
   ind = tf.concat([range, t2], 1)
-  res = tf.gather_nd(logits, ind)
+  res = tf.gather_nd(flatten_logits, ind)
 
   return -res + row_sums - 1
 
-class RnnlmInput(object):
-  """The input data."""
-
-  def __init__(self, config, data, name=None):
-    self.batch_size = batch_size = config.batch_size
-    self.num_steps = num_steps = config.num_steps
-    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
-    self.input_data, self.targets = reader.rnnlm_producer(
-        data, batch_size, num_steps, name=name)
-
-
-class RnnlmModel(object):
-  """The RNNLM model."""
-
-  def __init__(self, is_training, config, input_):
-    self._input = input_
-
-    batch_size = input_.batch_size
-    num_steps = input_.num_steps
-    size = config.hidden_size
-    vocab_size = config.vocab_size
-
-    def lstm_cell():
-      # With the latest TensorFlow source code (as of Mar 27, 2017),
-      # the BasicLSTMCell will need a reuse parameter which is unfortunately not
-      # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
-      # an argument check here:
-      if 'reuse' in inspect.getargspec(
-          tf.contrib.rnn.BasicLSTMCell.__init__).args:
-        return tf.contrib.rnn.BasicLSTMCell(
-            size, forget_bias=0.0, state_is_tuple=True,
-            reuse=tf.get_variable_scope().reuse)
-      else:
-        return tf.contrib.rnn.BasicLSTMCell(
-            size, forget_bias=0.0, state_is_tuple=True)
-    attn_cell = lstm_cell
-    if is_training and config.keep_prob < 1:
-      def attn_cell():
-        return tf.contrib.rnn.DropoutWrapper(
-            lstm_cell(), output_keep_prob=config.keep_prob)
-    self.cell = tf.contrib.rnn.MultiRNNCell(
-        [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
-
-    self._initial_state = self.cell.zero_state(batch_size, data_type())
-    self._initial_state_single = self.cell.zero_state(1, data_type())
-
-    self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 2, 1, size], name="test_initial_state")
-
-    # first implement the less efficient version
-    test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")
-
-    state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state_in")
-    # unpacking the input state context 
-    l = tf.unstack(state_placeholder, axis=0)
-    test_input_state = tuple(
-               [tf.contrib.rnn.LSTMStateTuple(l[idx][0],l[idx][1])
-                 for idx in range(config.num_layers)]
-    )
-
-    with tf.device("/cpu:0"):
-      self.embedding = tf.get_variable(
-          "embedding", [vocab_size, size], dtype=data_type())
-
-      inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data)
-      test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in)
-
-    # test time
-    with tf.variable_scope("RNN"):
-      (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state)
-
-    test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
-    test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out")
-    # above is the first part of the graph for test
-    # test-word-in
-    #               > ---- > test-state-out
-    # test-state-in        > test-cell-out
-
-
-    # below is the 2nd part of the graph for test
-    # test-word-out
-    #               > prob(word | test-word-out)
-    # test-cell-in
-
-    test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
-    cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in")
-
-    softmax_w = tf.get_variable(
-        "softmax_w", [size, vocab_size], dtype=data_type())
-    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
-    softmax_b = softmax_b - 9.0
-
-    test_logits = tf.matmul(cellout_placeholder, tf.transpose(tf.nn.embedding_lookup(tf.transpose(softmax_w), test_word_out[0]))) + softmax_b[test_word_out[0,0]]
-
-    p_word = test_logits[0, 0]
-    test_out = tf.identity(p_word, name="test_out")
-
-    if is_training and config.keep_prob < 1:
-      inputs = tf.nn.dropout(inputs, config.keep_prob)
-
-    # Simplified version of models/tutorials/rnn/rnn.py's rnn().
-    # This builds an unrolled LSTM for tutorial purposes only.
-    # In general, use the rnn() or state_saving_rnn() from rnn.py.
-    #
-    # The alternative version of the code below is:
-    #
-    # inputs = tf.unstack(inputs, num=num_steps, axis=1)
-    # outputs, state = tf.contrib.rnn.static_rnn(
-    #     cell, inputs, initial_state=self._initial_state)
-    outputs = []
-    state = self._initial_state
-    with tf.variable_scope("RNN"):
-      for time_step in range(num_steps):
-        if time_step > -1: tf.get_variable_scope().reuse_variables()
-        (cell_output, state) = self.cell(inputs[:, time_step, :], state)
-        outputs.append(cell_output)
-
-    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
-    logits = tf.matmul(output, softmax_w) + softmax_b
-    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
-        [logits],
-        [tf.reshape(input_.targets, [-1])],
-        [tf.ones([batch_size * num_steps], dtype=data_type())],
-        softmax_loss_function=new_softmax)
-    self._cost = cost = tf.reduce_sum(loss) / batch_size
-    self._final_state = state
-
-    if not is_training:
-      return
-
-    self._lr = tf.Variable(0.0, trainable=False)
-    tvars = tf.trainable_variables()
-    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
-                                      config.max_grad_norm)
-    optimizer = tf.train.GradientDescentOptimizer(self._lr)
-    self._train_op = optimizer.apply_gradients(
-        list(zip(grads, tvars)),
-        global_step=tf.contrib.framework.get_or_create_global_step())
-
-    self._new_lr = tf.placeholder(
-        tf.float32, shape=[], name="new_learning_rate")
-    self._lr_update = tf.assign(self._lr, self._new_lr)
-
-  def assign_lr(self, session, lr_value):
-    session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
-
-  @property
-  def input(self):
-    return self._input
-
-  @property
-  def initial_state(self):
-    return self._initial_state
-
-  @property
-  def cost(self):
-    return self._cost
-
-  @property
-  def final_state(self):
-    return self._final_state
-
-  @property
-  def lr(self):
-    return self._lr
-
-  @property
-  def train_op(self):
-    return self._train_op
-
-def run_epoch(session, model, eval_op=None, verbose=False):
-  """Runs the model on the given data."""
-  start_time = time.time()
-  costs = 0.0
-  iters = 0
-  state = session.run(model.initial_state)
-
-  fetches = {
-      "cost": model.cost,
-      "final_state": model.final_state,
-  }
-  if eval_op is not None:
-    fetches["eval_op"] = eval_op
-
-  for step in range(model.input.epoch_size):
-    feed_dict = {}
-    for i, (c, h) in enumerate(model.initial_state):
-      feed_dict[c] = state[i].c
-      feed_dict[h] = state[i].h
-
-    vals = session.run(fetches, feed_dict)
-    cost = vals["cost"]
-    state = vals["final_state"]
-
-
-    costs += cost
-    iters += model.input.num_steps
-
-    if verbose and step % (model.input.epoch_size // 10) == 10:
-      print("%.3f perplexity: %.3f speed: %.0f wps" %
-            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
-             iters * model.input.batch_size / (time.time() - start_time)))
-
-  return np.exp(costs / iters)
+
+class MyFastLossFunction(LossFunctionWrapper):
+  def __init__(self):
+    super().__init__(new_softmax)
+
+
+class FastRNNLMModel(RNNLMModel):
+  def __init__(self, config):
+    super().__init__(config, tf.constant_initializer(-9))
+
+  def get_loss(self, word_ids, labels, is_training=False):
+    logits = self.get_logits(word_ids, is_training)
+    loss_obj = MyFastLossFunction()
+    return loss_obj(labels, logits)
+
+  def get_score(self, logits):
+    # In this implementation, logits can be used as dist output
+    return logits
 
 
 def get_config():
   return Config()
 
+
 def main(_):
-  if not FLAGS.data_path:
-    raise ValueError("Must set --data_path to RNNLM data directory")
+  # Turn this on to try the model code with this source file itself!
+  __TESTING = False
 
-  raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path)
-  train_data, valid_data, _, word_map = raw_data
+  if __TESTING:
+    (train_data, valid_data), word_map = reader.rnnlm_gen_data(__file__, reader.__file__)
+  else:
+    if not FLAGS.data_path:
+      raise ValueError("Must set --data_path to RNNLM data directory")
+
+    raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path)
+    train_data, valid_data, _, word_map = raw_data
 
   config = get_config()
   config.hidden_size = FLAGS.hidden_size
   config.vocab_size = len(word_map)
-  eval_config = get_config()
-  eval_config.batch_size = 1
-  eval_config.num_steps = 1
-
-  with tf.Graph().as_default():
-    initializer = tf.random_uniform_initializer(-config.init_scale,
-                                                config.init_scale)
-
-    with tf.name_scope("Train"):
-      train_input = RnnlmInput(config=config, data=train_data, name="TrainInput")
-      with tf.variable_scope("Model", reuse=None, initializer=initializer):
-        m = RnnlmModel(is_training=True, config=config, input_=train_input)
-      tf.summary.scalar("Training Loss", m.cost)
-      tf.summary.scalar("Learning Rate", m.lr)
-
-    with tf.name_scope("Valid"):
-      valid_input = RnnlmInput(config=config, data=valid_data, name="ValidInput")
-      with tf.variable_scope("Model", reuse=True, initializer=initializer):
-        mvalid = RnnlmModel(is_training=False, config=config, input_=valid_input)
-      tf.summary.scalar("Validation Loss", mvalid.cost)
-
-    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
-    with sv.managed_session() as session:
-      for i in range(config.max_max_epoch):
-        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
-        m.assign_lr(session, config.learning_rate * lr_decay)
-
-        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
-        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
-                                     verbose=True)
-
-        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
-        valid_perplexity = run_epoch(session, mvalid)
-        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
-
-      if FLAGS.save_path:
-        print("Saving model to %s." % FLAGS.save_path)
-        sv.saver.save(session, FLAGS.save_path)
+
+  if __TESTING:
+    # use a much smaller scale on our tiny test data
+    config.num_steps = 8
+    config.batch_size = 4
+
+  model = FastRNNLMModel(config)
+  train_producer = reader.RNNLMProducer(train_data, config.batch_size, config.num_steps)
+  trainer = RNNLMModelTrainer(model, config)
+
+  valid_producer = reader.RNNLMProducer(valid_data, config.batch_size, config.num_steps)
+
+  # Save variables to disk if you want to prevent crash...
+  # Data producer can also be saved to preverse feeding progress.
+  checkpoint = tf.train.Checkpoint(trainer=trainer, data_feeder=train_producer)
+  manager = tf.train.CheckpointManager(checkpoint, "checkpoints/", 5)
+
+  for i in range(config.max_max_epoch):
+    lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
+    lr = config.learning_rate * lr_decay
+    trainer.train_one_epoch(train_producer, lr)
+    manager.save()
+
+    eval_loss = trainer.evaluate(valid_producer)
+    print("validating: loss={}".format(eval_loss))
+
+  # Export
+  print("Saving model to %s." % FLAGS.save_path)
+  spec = [tf.TensorSpec(shape=[config.num_layers, 2, 1, config.hidden_size], dtype=data_type(), name="context"),
+          tf.TensorSpec(shape=[1, 1], dtype=tf.int32, name="word_id")]
+  cfunc = model.single_step.get_concrete_function(*spec)
+  cfunc2 = model.get_initial_state.get_concrete_function()
+  tf.saved_model.save(model, FLAGS.save_path, signatures={"single_step": cfunc, "get_initial_state": cfunc2})
+
 
 if __name__ == "__main__":
-  tf.app.run()
+  absl.app.run(main)
diff --git a/egs/wsj/s5/steps/tfrnnlm/reader.py b/egs/wsj/s5/steps/tfrnnlm/reader.py
index 80cdeccbb26..b0d0a7f563d 100644
--- a/egs/wsj/s5/steps/tfrnnlm/reader.py
+++ b/egs/wsj/s5/steps/tfrnnlm/reader.py
@@ -61,45 +61,61 @@ def rnnlm_raw_data(data_path, vocab_path):
   return train_data, valid_data, vocabulary, word_to_id
 
 
-def rnnlm_producer(raw_data, batch_size, num_steps, name=None):
-  """Iterate on the raw RNNLM data.
+def rnnlm_gen_data(*files):
+  """Generates data and vocab from files.
 
-  This chunks up raw_data into batches of examples and returns Tensors that
-  are drawn from these batches.
+  This function is used solely for testing.
+  """
+  import collections
+  import re
 
-  Args:
-    raw_data: one of the raw data outputs from rnnlm_raw_data.
-    batch_size: int, the batch size.
-    num_steps: int, the number of unrolls.
-    name: the name of this operation (optional).
+  all_words = collections.Counter()
+  all_word_lists = []
+  for f in files:
+    with open(f, mode="r") as fp:
+      text = fp.read()
 
-  Returns:
-    A pair of Tensors, each shaped [batch_size, num_steps]. The second element
-    of the tuple is the same data time-shifted to the right by one.
+    word_list = re.split("[^A-Za-z]", text)
+    word_list = list(filter(None, word_list))
+    all_words.update(word_list)
+    all_word_lists.append(word_list)
 
-  Raises:
-    tf.errors.InvalidArgumentError: if batch_size or num_steps are too high.
-  """
-  with tf.name_scope(name, "RNNLMProducer", [raw_data, batch_size, num_steps]):
-    raw_data = tf.convert_to_tensor(raw_data, name="raw_data", dtype=tf.int32)
-
-    data_len = tf.size(raw_data)
-    batch_len = data_len // batch_size
-    data = tf.reshape(raw_data[0 : batch_size * batch_len],
-                      [batch_size, batch_len])
-
-    epoch_size = (batch_len - 1) // num_steps
-    assertion = tf.assert_positive(
-        epoch_size,
-        message="epoch_size == 0, decrease batch_size or num_steps")
-    with tf.control_dependencies([assertion]):
-      epoch_size = tf.identity(epoch_size, name="epoch_size")
-
-    i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue()
-    x = tf.strided_slice(data, [0, i * num_steps],
-                         [batch_size, (i + 1) * num_steps])
-    x.set_shape([batch_size, num_steps])
-    y = tf.strided_slice(data, [0, i * num_steps + 1],
-                         [batch_size, (i + 1) * num_steps + 1])
-    y.set_shape([batch_size, num_steps])
-    return x, y
+  word_to_id = {word: i for i, (word, _) in enumerate(all_words.most_common())}
+
+  def convert(word_list):
+    return [word_to_id[word] for word in word_list]
+
+  all_word_ids = [convert(word_list) for word_list in all_word_lists]
+  return all_word_ids, word_to_id
+
+
+class RNNLMProducer(tf.Module):
+  """This is the data feeder."""
+
+  def __init__(self, raw_data, batch_size, num_steps, name=None):
+    super().__init__(name)
+    self.batch_size = batch_size
+    self.num_steps = num_steps
+    self.epoch_size = (len(raw_data) - 1) // num_steps // batch_size
+
+    # load data into a variable so that it will be separated from graph
+    self._raw_data = tf.Variable(raw_data, dtype=tf.int32, trainable=False)
+
+    ds_x = tf.data.Dataset.from_tensor_slices(self._raw_data)
+    ds_y = ds_x.skip(1)
+    ds = tf.data.Dataset.zip((ds_x, ds_y))
+    # form samples
+    ds = ds.batch(num_steps, drop_remainder=True)
+    # form batches
+    self._ds = ds.batch(batch_size, drop_remainder=True)
+
+  def iterate(self):
+    return self._ds
+
+
+if __name__ == "__main__":
+  samples = list(range(100))
+  ds = RNNLMProducer(samples, 4, 8)
+  print(ds.epoch_size)
+  for data in ds.iterate():
+    print(data)
diff --git a/egs/wsj/s5/steps/train_deltas.sh b/egs/wsj/s5/steps/train_deltas.sh
index 7deace6b13e..bfddfeab7e9 100755
--- a/egs/wsj/s5/steps/train_deltas.sh
+++ b/egs/wsj/s5/steps/train_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/train_diag_ubm.sh b/egs/wsj/s5/steps/train_diag_ubm.sh
index 4389844d478..010c04d55cb 100755
--- a/egs/wsj/s5/steps/train_diag_ubm.sh
+++ b/egs/wsj/s5/steps/train_diag_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey),  2012.
 # Apache 2.0.
diff --git a/egs/wsj/s5/steps/train_lda_mllt.sh b/egs/wsj/s5/steps/train_lda_mllt.sh
index a1828aa6fcb..131b7daea23 100755
--- a/egs/wsj/s5/steps/train_lda_mllt.sh
+++ b/egs/wsj/s5/steps/train_lda_mllt.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #
diff --git a/egs/wsj/s5/steps/train_lvtln.sh b/egs/wsj/s5/steps/train_lvtln.sh
index 111e0598edf..43954828ded 100755
--- a/egs/wsj/s5/steps/train_lvtln.sh
+++ b/egs/wsj/s5/steps/train_lvtln.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey)
 # Copyright 2014       Vimal Manohar
diff --git a/egs/wsj/s5/steps/train_map.sh b/egs/wsj/s5/steps/train_map.sh
index 8194df27b09..97327cfa8b3 100755
--- a/egs/wsj/s5/steps/train_map.sh
+++ b/egs/wsj/s5/steps/train_map.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 
diff --git a/egs/wsj/s5/steps/train_mmi.sh b/egs/wsj/s5/steps/train_mmi.sh
index 1adb01b5278..789fa7dbdaf 100755
--- a/egs/wsj/s5/steps/train_mmi.sh
+++ b/egs/wsj/s5/steps/train_mmi.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # MMI training (or optionally boosted MMI, if you give the --boost option).
diff --git a/egs/wsj/s5/steps/train_mmi_fmmi.sh b/egs/wsj/s5/steps/train_mmi_fmmi.sh
index d922bfa4e0a..d748cb84997 100755
--- a/egs/wsj/s5/steps/train_mmi_fmmi.sh
+++ b/egs/wsj/s5/steps/train_mmi_fmmi.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # by Johns Hopkins University (Author: Daniel Povey), 2012.  Apache 2.0.
 
 # This script does MMI discriminative training, including
diff --git a/egs/wsj/s5/steps/train_mmi_fmmi_indirect.sh b/egs/wsj/s5/steps/train_mmi_fmmi_indirect.sh
index 451b08de513..59f809e0534 100755
--- a/egs/wsj/s5/steps/train_mmi_fmmi_indirect.sh
+++ b/egs/wsj/s5/steps/train_mmi_fmmi_indirect.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # by Johns Hopkins University (Author: Daniel Povey), 2012.  Apache 2.0.
 
 # This script does MMI discriminative training, including
diff --git a/egs/wsj/s5/steps/train_mmi_sgmm2.sh b/egs/wsj/s5/steps/train_mmi_sgmm2.sh
index 66663cf4a08..e3d4f3df4e0 100755
--- a/egs/wsj/s5/steps/train_mmi_sgmm2.sh
+++ b/egs/wsj/s5/steps/train_mmi_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # MMI training (or optionally boosted MMI, if you give the --boost option),
diff --git a/egs/wsj/s5/steps/train_mono.sh b/egs/wsj/s5/steps/train_mono.sh
index 5a0b79a4a1c..301d40c8464 100755
--- a/egs/wsj/s5/steps/train_mono.sh
+++ b/egs/wsj/s5/steps/train_mono.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #           2019  Xiaohui Zhang
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/train_mpe.sh b/egs/wsj/s5/steps/train_mpe.sh
index a86a47b962b..03cb6eb5eec 100755
--- a/egs/wsj/s5/steps/train_mpe.sh
+++ b/egs/wsj/s5/steps/train_mpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # MMI training (or optionally boosted MMI, if you give the --boost option).
diff --git a/egs/wsj/s5/steps/train_quick.sh b/egs/wsj/s5/steps/train_quick.sh
index 3325c4964e9..9f8661c95ad 100755
--- a/egs/wsj/s5/steps/train_quick.sh
+++ b/egs/wsj/s5/steps/train_quick.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 
diff --git a/egs/wsj/s5/steps/train_raw_sat.sh b/egs/wsj/s5/steps/train_raw_sat.sh
index aa5e8813d71..daeef2c29b4 100755
--- a/egs/wsj/s5/steps/train_raw_sat.sh
+++ b/egs/wsj/s5/steps/train_raw_sat.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 
diff --git a/egs/wsj/s5/steps/train_sat.sh b/egs/wsj/s5/steps/train_sat.sh
index 92b744dc75c..1f01acff013 100755
--- a/egs/wsj/s5/steps/train_sat.sh
+++ b/egs/wsj/s5/steps/train_sat.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 
diff --git a/egs/wsj/s5/steps/train_sat_basis.sh b/egs/wsj/s5/steps/train_sat_basis.sh
index 5245ea0c619..c4753e060dc 100755
--- a/egs/wsj/s5/steps/train_sat_basis.sh
+++ b/egs/wsj/s5/steps/train_sat_basis.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 # Copyright 2013  GoVivace Inc. (Author: Nagendra Goel), Apache 2.0
 
diff --git a/egs/wsj/s5/steps/train_segmenter.sh b/egs/wsj/s5/steps/train_segmenter.sh
index 515005c0257..96e46f94e44 100755
--- a/egs/wsj/s5/steps/train_segmenter.sh
+++ b/egs/wsj/s5/steps/train_segmenter.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/train_sgmm2.sh b/egs/wsj/s5/steps/train_sgmm2.sh
index 7f7df2e046a..a6129c70377 100755
--- a/egs/wsj/s5/steps/train_sgmm2.sh
+++ b/egs/wsj/s5/steps/train_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/train_sgmm2_group.sh b/egs/wsj/s5/steps/train_sgmm2_group.sh
index 7263e2d5e8e..38f5422ba07 100755
--- a/egs/wsj/s5/steps/train_sgmm2_group.sh
+++ b/egs/wsj/s5/steps/train_sgmm2_group.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/steps/train_smbr.sh b/egs/wsj/s5/steps/train_smbr.sh
index c77b904fba9..294e7a60107 100755
--- a/egs/wsj/s5/steps/train_smbr.sh
+++ b/egs/wsj/s5/steps/train_smbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # sMBR training 
diff --git a/egs/wsj/s5/steps/train_ubm.sh b/egs/wsj/s5/steps/train_ubm.sh
index 5351abbb784..fb9a5cc6b44 100755
--- a/egs/wsj/s5/steps/train_ubm.sh
+++ b/egs/wsj/s5/steps/train_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
 # This trains a UBM (i.e. a mixture of Gaussians), by clustering
diff --git a/egs/wsj/s5/steps/word_align_lattices.sh b/egs/wsj/s5/steps/word_align_lattices.sh
index a6a1240830f..c4fd4cd0a84 100755
--- a/egs/wsj/s5/steps/word_align_lattices.sh
+++ b/egs/wsj/s5/steps/word_align_lattices.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright Johns Hopkins University (Author: Daniel Povey)  2012
 # Apache 2.0.
diff --git a/egs/wsj/s5/utils/best_wer.sh b/egs/wsj/s5/utils/best_wer.sh
index 45b855d988f..ad97d7abdee 100755
--- a/egs/wsj/s5/utils/best_wer.sh
+++ b/egs/wsj/s5/utils/best_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/wsj/s5/utils/build_const_arpa_lm.sh b/egs/wsj/s5/utils/build_const_arpa_lm.sh
index 51aca1bb2ad..ecdf6ef8c5e 100755
--- a/egs/wsj/s5/utils/build_const_arpa_lm.sh
+++ b/egs/wsj/s5/utils/build_const_arpa_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/combine_data.sh b/egs/wsj/s5/utils/combine_data.sh
index 8daffcea8c5..dc615764a8b 100755
--- a/egs/wsj/s5/utils/combine_data.sh
+++ b/egs/wsj/s5/utils/combine_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #           2014  David Snyder
 
diff --git a/egs/wsj/s5/utils/convert_slf_parallel.sh b/egs/wsj/s5/utils/convert_slf_parallel.sh
index 1b242ed2c38..b6a6383cd01 100755
--- a/egs/wsj/s5/utils/convert_slf_parallel.sh
+++ b/egs/wsj/s5/utils/convert_slf_parallel.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright Brno University of Technology (Author: Karel Vesely) 2014.  Apache 2.0.
 
 # This script converts lattices to HTK format compatible with other toolkits.
diff --git a/egs/wsj/s5/utils/copy_data_dir.sh b/egs/wsj/s5/utils/copy_data_dir.sh
index fbd31203e34..9fd420c42a5 100755
--- a/egs/wsj/s5/utils/copy_data_dir.sh
+++ b/egs/wsj/s5/utils/copy_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/data/combine_short_segments.sh b/egs/wsj/s5/utils/data/combine_short_segments.sh
index ef4927efdd8..ba636665d58 100755
--- a/egs/wsj/s5/utils/data/combine_short_segments.sh
+++ b/egs/wsj/s5/utils/data/combine_short_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
@@ -16,8 +16,13 @@
 
 # begin configuration section
 cleanup=true
+speaker_only=false  # If true, utterances are only combined from the same speaker.
+                    # It may be useful for the speaker recognition task.
+                    # If false, utterances are preferentially combined from the same speaker,
+                    # and then combined across different speakers.
 # end configuration section
 
+
 . utils/parse_options.sh
 
 if [ $# != 3 ]; then
@@ -25,7 +30,8 @@ if [ $# != 3 ]; then
   echo "  $0 [options] <srcdir> <min-segment-length-in-seconds> <dir>"
   echo "e.g.:"
   echo " $0 data/train 1.55 data/train_comb"
-  # options documentation here.
+  echo " Options:"
+  echo "  --speaker-only <true|false>  # options to internal/choose_utts_to_combine.py, default false."
   exit 1;
 fi
 
@@ -55,7 +61,7 @@ if ! mkdir -p $dir; then
   exit 1;
 fi
 
-if ! utils/validate_data_dir.sh $srcdir; then
+if ! utils/validate_data_dir.sh --no-text $srcdir; then
   echo "$0: failed to validate input directory $srcdir.  If needed, run   utils/fix_data_dir.sh $srcdir"
   exit 1
 fi
@@ -72,6 +78,7 @@ set -o pipefail
 utils/data/get_utt2dur.sh $srcdir
 
 utils/data/internal/choose_utts_to_combine.py --min-duration=$min_seg_len \
+  --merge-within-speakers-only=$speaker_only \
   $srcdir/spk2utt $srcdir/utt2dur $dir/utt2utts $dir/utt2spk $dir/utt2dur
 
 utils/utt2spk_to_spk2utt.pl < $dir/utt2spk > $dir/spk2utt
@@ -87,7 +94,9 @@ utils/apply_map.pl -f 2- $srcdir/feats.scp <$dir/utt2utts | \
 
 # create $dir/text by concatenating the source 'text' entries for the original
 # utts.
-utils/apply_map.pl -f 2- $srcdir/text <$dir/utt2utts > $dir/text
+if [ -f $srcdir/text ]; then
+  utils/apply_map.pl -f 2- $srcdir/text <$dir/utt2utts > $dir/text
+fi
 
 if [ -f $srcdir/utt2uniq ]; then
   # the utt2uniq file is such that if 2 utts were derived from the same original
@@ -125,7 +134,7 @@ if [ -f $srcdir/utt2uniq ]; then
   # they have to be merged into the same set, and we name that set 'a'
   # (in general, we take the lowest string in lexicographical order).
 
-  cat $dir/uniq_sets | LC_ALL=C python -c '
+  cat $dir/uniq_sets | LC_ALL=C python3 -c '
 import sys;
 from collections import defaultdict
 uniq2orig_uniq = dict()
@@ -149,7 +158,7 @@ while changed:
                  changed = True
 
 for uniq in sorted(uniq2orig_uniq.keys()):
-    print uniq, uniq2orig_uniq[uniq]
+    print(uniq, uniq2orig_uniq[uniq])
 ' > $dir/uniq_to_orig_uniq
   rm $dir/uniq_sets
 
@@ -171,7 +180,7 @@ fi
 # note: the user will have to recompute the cmvn, as the speakers may have changed.
 rm $dir/cmvn.scp 2>/dev/null || true
 
-utils/validate_data_dir.sh --no-wav $dir
+utils/validate_data_dir.sh --no-text --no-wav $dir
 
 if $cleanup; then
   rm $dir/utt2utts
diff --git a/egs/wsj/s5/utils/data/extract_wav_segments_data_dir.sh b/egs/wsj/s5/utils/data/extract_wav_segments_data_dir.sh
index 129977415e0..4e51e9d6db5 100755
--- a/egs/wsj/s5/utils/data/extract_wav_segments_data_dir.sh
+++ b/egs/wsj/s5/utils/data/extract_wav_segments_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2017  Hossein Hadian
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/data/fix_subsegment_feats.pl b/egs/wsj/s5/utils/data/fix_subsegment_feats.pl
index 2a6478a1430..b6474433f41 100755
--- a/egs/wsj/s5/utils/data/fix_subsegment_feats.pl
+++ b/egs/wsj/s5/utils/data/fix_subsegment_feats.pl
@@ -83,12 +83,16 @@ END
   my $row_start = $1;
   my $row_end = $2;
   my $col_range = $3;
-
+  
+  if ($row_start >= $utt2max_frames{$utt}) {
+    print STDERR "Removing $utt because row_start $row_start >= file max length $utt2max_frames{$utt}\n";
+    next;
+  }  
   if ($row_end >= $utt2max_frames{$utt}) {
     print STDERR "Fixed row_end for $utt from $row_end to $utt2max_frames{$utt}-1\n";
     $row_end = $utt2max_frames{$utt} - 1;
-  }
-
+  } 
+   
   if ($row_start ne "") {
     $range = "$row_start:$row_end";
   } else {
@@ -98,6 +102,5 @@ END
   if ($col_range ne "") {
     $range .= ",$col_range";
   }
-
   print ("$utt " . join(" ", @F) . "[" . $range . "]\n");
 }
diff --git a/egs/wsj/s5/utils/data/get_allowed_durations.py b/egs/wsj/s5/utils/data/get_allowed_durations.py
new file mode 100755
index 00000000000..097ab7dbe14
--- /dev/null
+++ b/egs/wsj/s5/utils/data/get_allowed_durations.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python3
+
+# Copyright     2017  Hossein Hadian
+#               2019  Facebook Inc. (Author: Vimal Manohar)
+# Apache 2.0
+
+
+""" This script generates a set of allowed lengths of utterances
+    spaced by a factor (like 10%). This is useful for generating
+    fixed-length chunks for chain training.
+"""
+
+import argparse
+import os
+import sys
+import copy
+import math
+import logging
+
+sys.path.insert(0, 'steps')
+import libs.common as common_lib
+
+logger = logging.getLogger('libs')
+logger.setLevel(logging.INFO)
+handler = logging.StreamHandler()
+handler.setLevel(logging.INFO)
+formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - "
+                              "%(funcName)s - %(levelname)s ] %(message)s")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+def get_args():
+    parser = argparse.ArgumentParser(description="""
+    This script creates a list of allowed durations of utterances for flatstart
+    LF-MMI training corresponding to input data directory 'data_dir' and writes
+    it in two files in output directory 'dir':
+    1) allowed_durs.txt -- durations are in seconds
+    2) allowed_lengths.txt -- lengths are in number of frames
+
+    Both the allowed_durs.txt and allowed_lengths.txt are formatted to
+    have one entry on each line. Examples are as follows:
+
+    $ echo data/train/allowed_lengths.txt
+    414
+    435
+    468
+
+    $ echo data/train/allowed_durs.txt
+    4.16
+    4.37
+    4.70
+
+    These files can then be used by a downstream script to perturb the
+    utterances to these lengths.
+    A perturbed data directory (created by a downstream script
+    similar to utils/data/perturb_speed_to_allowed_lengths.py)
+    that only contains utterances of these allowed durations,
+    along with the corresponding allowed_lengths.txt are
+    consumed by the e2e chain egs preparation script.
+    See steps/nnet3/chain/e2e/get_egs_e2e.sh for how these are used.
+
+    See also:
+    * egs/cifar/v1/image/get_allowed_lengths.py -- a similar script for OCR datasets
+    * utils/data/perturb_speed_to_allowed_lengths.py --
+        creates the allowed_lengths.txt AND perturbs the data directory
+    """)
+    parser.add_argument('factor', type=float, default=12,
+                        help='Spacing (in percentage) between allowed lengths. '
+                        'Can be 0, which means all seen lengths that are a multiple of '
+                        'frame_subsampling_factor will be allowed.')
+    parser.add_argument('data_dir', type=str, help='path to data dir. Assumes that '
+                        'it contains the utt2dur file.')
+    parser.add_argument('dir', type=str, help='We write the output files '
+                        'allowed_lengths.txt and allowed_durs.txt to this directory.')
+    parser.add_argument('--coverage-factor', type=float, default=0.05,
+                        help="""Percentage of durations not covered from each
+                             side of duration histogram.""")
+    parser.add_argument('--frame-shift', type=int, default=10,
+                        help="""Frame shift in milliseconds.""")
+    parser.add_argument('--frame-length', type=int, default=25,
+                        help="""Frame length in milliseconds.""")
+    parser.add_argument('--frame-subsampling-factor', type=int, default=3,
+                        help="""Chain frame subsampling factor.
+                             See steps/nnet3/chain/train.py""")
+    args = parser.parse_args()
+    return args
+
+
+def read_kaldi_mapfile(path):
+    """ Read any Kaldi mapping file - like text, .scp files, etc.
+    """
+
+    m = {}
+    with open(path, 'r', encoding='latin-1') as f:
+        for line in f:
+            line = line.strip(" \t\r\n")
+            sp_pos = line.find(' ')
+            key = line[:sp_pos]
+            val = line[sp_pos+1:]
+            m[key] = val
+    return m
+
+
+def find_duration_range(utt2dur, coverage_factor):
+    """Given a list of utterance durations, find the start and end duration to cover
+
+     If we try to cover
+     all durations which occur in the training set, the number of
+     allowed lengths could become very large.
+
+     Returns
+     -------
+     start_dur: float
+     end_dur: float
+    """
+    durs = [float(val) for key, val in utt2dur.items()]
+    durs.sort()
+    to_ignore_dur = 0
+    tot_dur = sum(durs)
+    for d in durs:
+        to_ignore_dur += d
+        if to_ignore_dur * 100.0 / tot_dur > coverage_factor:
+            start_dur = d
+            break
+    to_ignore_dur = 0
+    for d in reversed(durs):
+        to_ignore_dur += d
+        if to_ignore_dur * 100.0 / tot_dur > coverage_factor:
+            end_dur = d
+            break
+    if start_dur < 0.3:
+        start_dur = 0.3  # a hard limit to avoid too many allowed lengths --not critical
+    return start_dur, end_dur
+
+
+def get_allowed_durations(start_dur, end_dur, args):
+    """Given the start and end duration, find a set of
+       allowed durations spaced by args.factor%. Also write
+       out the list of allowed durations and the corresponding
+       allowed lengths (in frames) on disk.
+
+     Returns
+     -------
+     allowed_durations: list of allowed durations (in seconds)
+    """
+
+    allowed_durations = []
+    d = start_dur
+    with open(os.path.join(args.dir, 'allowed_durs.txt'), 'w', encoding='latin-1') as durs_fp, \
+           open(os.path.join(args.dir, 'allowed_lengths.txt'), 'w', encoding='latin-1') as lengths_fp:
+        while d < end_dur:
+            length = int(d * 1000 - args.frame_length) / args.frame_shift + 1
+            if length % args.frame_subsampling_factor != 0:
+                length = (args.frame_subsampling_factor *
+                              (length // args.frame_subsampling_factor))
+                d = (args.frame_shift * (length - 1.0)
+                     + args.frame_length + args.frame_shift / 2) / 1000.0
+            allowed_durations.append(d)
+            durs_fp.write("{}\n".format(d))
+            lengths_fp.write("{}\n".format(int(length)))
+            d *= args.factor
+    return allowed_durations
+
+
+def get_trivial_allowed_durations(utt2dur, args):
+    lengths = list(set(
+        [int(float(d) * 1000 - args.frame_length) / args.frame_shift + 1
+         for key, d in utt2dur.items()]
+    ))
+    lengths.sort()
+
+    allowed_durations = []
+    with open(os.path.join(args.dir, 'allowed_durs.txt'), 'w', encoding='latin-1') as durs_fp, \
+           open(os.path.join(args.dir, 'allowed_lengths.txt'), 'w', encoding='latin-1') as lengths_fp:
+        for length in lengths:
+            if length % args.frame_subsampling_factor != 0:
+                length = (args.frame_subsampling_factor *
+                              (length // args.frame_subsampling_factor))
+                d = (args.frame_shift * (length - 1.0)
+                     + args.frame_length + args.frame_shift / 2) / 1000.0
+            allowed_durations.append(d)
+            durs_fp.write("{}\n".format(d))
+            lengths_fp.write("{}\n".format(int(length)))
+
+    assert len(allowed_durations) > 0
+    start_dur = allowed_durations[0]
+    end_dur = allowed_durations[-1]
+
+    logger.info("Durations in the range [{},{}] will be covered."
+                "".format(start_dur, end_dur))
+    logger.info("There will be {} unique allowed lengths "
+                "for the utterances.".format(len(allowed_durations)))
+
+    return allowed_durations
+
+
+def main():
+    args = get_args()
+    utt2dur = read_kaldi_mapfile(os.path.join(args.data_dir, 'utt2dur'))
+
+    if args.factor == 0.0:
+        get_trivial_allowed_durations(utt2dur, args)
+        return
+
+    args.factor = 1.0 + args.factor / 100.0
+
+    start_dur, end_dur = find_duration_range(utt2dur, args.coverage_factor)
+    logger.info("Durations in the range [{},{}] will be covered. "
+                "Coverage rate: {}%".format(start_dur, end_dur,
+                                      100.0 - args.coverage_factor * 2))
+    logger.info("There will be {} unique allowed lengths "
+                "for the utterances.".format(int(math.log(end_dur / start_dur)/
+                                                 math.log(args.factor))))
+
+    get_allowed_durations(start_dur, end_dur, args)
+
+
+if __name__ == '__main__':
+      main()
diff --git a/egs/wsj/s5/utils/data/get_frame_shift.sh b/egs/wsj/s5/utils/data/get_frame_shift.sh
index c836bde1b18..4fcf483b6b5 100755
--- a/egs/wsj/s5/utils/data/get_frame_shift.sh
+++ b/egs/wsj/s5/utils/data/get_frame_shift.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/data/get_num_frames.sh b/egs/wsj/s5/utils/data/get_num_frames.sh
index 996468631fa..d819bf7b117 100755
--- a/egs/wsj/s5/utils/data/get_num_frames.sh
+++ b/egs/wsj/s5/utils/data/get_num_frames.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script works out the approximate number of frames in a training directory.
 # This is sometimes needed by higher-level scripts
diff --git a/egs/wsj/s5/utils/data/get_reco2dur.sh b/egs/wsj/s5/utils/data/get_reco2dur.sh
index 943e739c53c..24f51e72367 100755
--- a/egs/wsj/s5/utils/data/get_reco2dur.sh
+++ b/egs/wsj/s5/utils/data/get_reco2dur.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 #           2018  Andrea Carmantini
diff --git a/egs/wsj/s5/utils/data/get_segments_for_data.sh b/egs/wsj/s5/utils/data/get_segments_for_data.sh
index 7adc4c465d3..6b161b31e90 100755
--- a/egs/wsj/s5/utils/data/get_segments_for_data.sh
+++ b/egs/wsj/s5/utils/data/get_segments_for_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script operates on a data directory, such as in data/train/,
 # and writes new segments to stdout. The file 'segments' maps from
diff --git a/egs/wsj/s5/utils/data/get_utt2dur.sh b/egs/wsj/s5/utils/data/get_utt2dur.sh
index a760981d198..09c860703c0 100755
--- a/egs/wsj/s5/utils/data/get_utt2dur.sh
+++ b/egs/wsj/s5/utils/data/get_utt2dur.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/data/internal/choose_utts_to_combine.py b/egs/wsj/s5/utils/data/internal/choose_utts_to_combine.py
index 875c238abd5..2b97f5207fa 100755
--- a/egs/wsj/s5/utils/data/internal/choose_utts_to_combine.py
+++ b/egs/wsj/s5/utils/data/internal/choose_utts_to_combine.py
@@ -37,6 +37,12 @@
 
 parser.add_argument("--min-duration", type = float, default = 1.55,
                     help="Minimum utterance duration")
+parser.add_argument("--merge-within-speakers-only", type = str, default = 'false',
+                    choices = ['true', 'false'],
+                    help="If true, utterances are only combined from the same speaker."
+                    "It may be useful for the speaker recognition task."
+                    "If false, utterances are preferentially combined from the same speaker,"
+                    "and then combined across different speakers.")
 parser.add_argument("spk2utt_in", type = str, metavar = "<spk2utt-in>",
                     help="Filename of [input] speaker to utterance map needed "
                     "because this script tries to merge utterances from the "
@@ -216,12 +222,14 @@ def SelfTest():
 # This function figures out the grouping of utterances.
 # The input is:
 # 'min_duration' which is the minimum utterance length in seconds.
+# 'merge_within_speakers_only' which is a ['true', 'false'] choice.
+# If true, then utterances are only combined if they belong to the same speaker.
 # 'spk2utt' which is a list of pairs (speaker-id, [list-of-utterances])
 # 'utt2dur' which is a dict from utterance-id to duration (as a float)
 # It returns a lists of lists of utterances; each list corresponds to
 # a group, e.g.
 # [ ['utt1'], ['utt2', 'utt3'] ]
-def GetUtteranceGroups(min_duration, spk2utt, utt2dur):
+def GetUtteranceGroups(min_duration, merge_within_speakers_only, spk2utt, utt2dur):
     # utt_groups will be a list of lists of utterance-ids formed from the
     # first pass of combination.
     utt_groups = []
@@ -256,22 +264,24 @@ def GetUtteranceGroups(min_duration, spk2utt, utt2dur):
     # Now we combine the groups obtained above, in case we had situations where
     # the combination of all the utterances of one speaker were still below
     # the minimum duration.
-    new_utt_groups = []
-    ranges = CombineList(min_duration, group_durations)
-    for start, end in ranges:
-        # the following code is destructive of 'utt_groups' but it doesn't
-        # matter.
-        this_group = utt_groups[start]
-        for i in range(start + 1, end):
-            this_group += utt_groups[i]
-        new_utt_groups.append(this_group)
-    print("choose_utts_to_combine.py: combined {0} utterances to {1} utterances "
-          "while respecting speaker boundaries, and then to {2} utterances "
-          "with merging across speaker boundaries.".format(
-            len(utt2dur), len(utt_groups), len(new_utt_groups)),
-          file = sys.stderr)
-    return new_utt_groups
-
+    if merge_within_speakers_only == 'true':
+      return utt_groups
+    else:
+      new_utt_groups = []
+      ranges = CombineList(min_duration, group_durations)
+      for start, end in ranges:
+          # the following code is destructive of 'utt_groups' but it doesn't
+          # matter.
+          this_group = utt_groups[start]
+          for i in range(start + 1, end):
+              this_group += utt_groups[i]
+          new_utt_groups.append(this_group)
+      print("choose_utts_to_combine.py: combined {0} utterances to {1} utterances "
+            "while respecting speaker boundaries, and then to {2} utterances "
+            "with merging across speaker boundaries.".format(
+              len(utt2dur), len(utt_groups), len(new_utt_groups)),
+            file = sys.stderr)
+      return new_utt_groups
 
 
 SelfTest()
@@ -324,7 +334,7 @@ def GetUtteranceGroups(min_duration, spk2utt, utt2dur):
                 args.utt2dur_in, line))
 
 
-utt_groups = GetUtteranceGroups(args.min_duration, spk2utt, utt2dur)
+utt_groups = GetUtteranceGroups(args.min_duration, args.merge_within_speakers_only, spk2utt, utt2dur)
 
 # set utt_group names to an array like [ 'utt1', 'utt2-comb2', 'utt4', ... ]
 utt_group_names = [ group[0] if len(group)==1 else "{0}-comb{1}".format(group[0], len(group))
diff --git a/egs/wsj/s5/utils/data/limit_feature_dim.sh b/egs/wsj/s5/utils/data/limit_feature_dim.sh
index 2d969ee569b..d88a55e3ec6 100755
--- a/egs/wsj/s5/utils/data/limit_feature_dim.sh
+++ b/egs/wsj/s5/utils/data/limit_feature_dim.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Alibaba Robotics Corp. (author: Xingyu Na)
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/data/modify_speaker_info.sh b/egs/wsj/s5/utils/data/modify_speaker_info.sh
index f75e9be5f67..10bd11cc079 100755
--- a/egs/wsj/s5/utils/data/modify_speaker_info.sh
+++ b/egs/wsj/s5/utils/data/modify_speaker_info.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013-2016  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/data/modify_speaker_info_to_recording.sh b/egs/wsj/s5/utils/data/modify_speaker_info_to_recording.sh
index fb5773f77c3..9c75e8d6c04 100755
--- a/egs/wsj/s5/utils/data/modify_speaker_info_to_recording.sh
+++ b/egs/wsj/s5/utils/data/modify_speaker_info_to_recording.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0.
diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh b/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh
index cd291427398..a3a97c747e8 100755
--- a/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh
+++ b/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016-2018  Johns Hopkins University (author: Daniel Povey)
 #                2018  Hossein Hadian
diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh
index e357ba8cbfb..d1cdcb253f3 100755
--- a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh
+++ b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/data/remove_dup_utts.sh b/egs/wsj/s5/utils/data/remove_dup_utts.sh
index 529474b5eb1..c1eaa33c994 100755
--- a/egs/wsj/s5/utils/data/remove_dup_utts.sh
+++ b/egs/wsj/s5/utils/data/remove_dup_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Remove excess utterances once they appear  more than a specified
 # number of times with the same transcription, in a data set.
diff --git a/egs/wsj/s5/utils/data/shift_and_combine_feats.sh b/egs/wsj/s5/utils/data/shift_and_combine_feats.sh
index 217b7768078..e3f927a268f 100755
--- a/egs/wsj/s5/utils/data/shift_and_combine_feats.sh
+++ b/egs/wsj/s5/utils/data/shift_and_combine_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Hossein Hadian
 
diff --git a/egs/wsj/s5/utils/data/shift_feats.sh b/egs/wsj/s5/utils/data/shift_feats.sh
index 2ae7b2435d3..2a4dfa842f1 100755
--- a/egs/wsj/s5/utils/data/shift_feats.sh
+++ b/egs/wsj/s5/utils/data/shift_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016    Vimal Manohar
 #           2017    Hossein Hadian
diff --git a/egs/wsj/s5/utils/data/subsegment_data_dir.sh b/egs/wsj/s5/utils/data/subsegment_data_dir.sh
index 088d0ff1871..6de0e9b11c0 100755
--- a/egs/wsj/s5/utils/data/subsegment_data_dir.sh
+++ b/egs/wsj/s5/utils/data/subsegment_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh b/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh
index 59ae4a4c994..1f4f398f450 100755
--- a/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh
+++ b/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Apache 2.0.
 # Copyright  2014  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/wsj/s5/utils/fix_data_dir.sh b/egs/wsj/s5/utils/fix_data_dir.sh
index 8adf5e45e7b..ed4710d0b1f 100755
--- a/egs/wsj/s5/utils/fix_data_dir.sh
+++ b/egs/wsj/s5/utils/fix_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script makes sure that only the segments present in
 # all of "feats.scp", "wav.scp" [if present], segments [if present]
diff --git a/egs/wsj/s5/utils/format_lm_sri.sh b/egs/wsj/s5/utils/format_lm_sri.sh
index 08f842a08f5..f1c3ad675ef 100755
--- a/egs/wsj/s5/utils/format_lm_sri.sh
+++ b/egs/wsj/s5/utils/format_lm_sri.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Arnab Ghoshal
 # Copyright 2010-2011  Microsoft Corporation
diff --git a/egs/wsj/s5/utils/gen_topo.pl b/egs/wsj/s5/utils/gen_topo.pl
index 1c02ed0eaeb..3232bc2daed 100755
--- a/egs/wsj/s5/utils/gen_topo.pl
+++ b/egs/wsj/s5/utils/gen_topo.pl
@@ -20,7 +20,7 @@
 
 $nonsil_phones =~ s/:/ /g;
 $sil_phones =~ s/:/ /g;
-$nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
+$nonsil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
 $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
 
 print "<Topology>\n";
diff --git a/egs/wsj/s5/utils/lang/adjust_unk_arpa.pl b/egs/wsj/s5/utils/lang/adjust_unk_arpa.pl
index c3a6640b8bc..37dd417dc1e 100755
--- a/egs/wsj/s5/utils/lang/adjust_unk_arpa.pl
+++ b/egs/wsj/s5/utils/lang/adjust_unk_arpa.pl
@@ -55,7 +55,7 @@
   if ( @col > 1 && $ngram > 0 && $col[$ngram] eq $unk_word ) {
     if ( $fixed_value eq "true" && $ngram == 1 ) {
       $col[0] = (log($unk_scale) / log(10.0));
-    } else {
+    } elsif ($fixed_value eq "false" ) {
       $col[0] += (log($unk_scale) / log(10.0));
     }
     my $line = join("\t", @col);
diff --git a/egs/wsj/s5/utils/lang/adjust_unk_graph.sh b/egs/wsj/s5/utils/lang/adjust_unk_graph.sh
index c40f75ceec8..23766d7247a 100755
--- a/egs/wsj/s5/utils/lang/adjust_unk_graph.sh
+++ b/egs/wsj/s5/utils/lang/adjust_unk_graph.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Xiaohui Zhang
 # Apache 2.0
 
@@ -35,4 +35,4 @@ cp -r $graphdir_in/{disambig_tid.int,num_pdfs,phones,phones.txt,words.txt} $grap
 oov_id=`echo $oov_word | utils/sym2int.pl $graphdir_in/words.txt`
 [ -z $oov_id ] && echo "adjust_unk_graph.sh: the specified oov symbol $oov_word is out of the vocabulary." && exit 1;
 fstprint $graphdir_in/HCLG.fst | awk -v oov=$oov_id -v unk_scale=$unk_scale '{if($4==oov) $5=$5-log(unk_scale);print $0}' | \
-  fstcompile > $graphdir_out/HCLG.fst || exit 1;
+  fstcompile | fstconvert --fst_type=const  > $graphdir_out/HCLG.fst || exit 1;
diff --git a/egs/wsj/s5/utils/lang/bpe/add_final_optional_silence.sh b/egs/wsj/s5/utils/lang/bpe/add_final_optional_silence.sh
index 11e6b897382..103edab17e1 100755
--- a/egs/wsj/s5/utils/lang/bpe/add_final_optional_silence.sh
+++ b/egs/wsj/s5/utils/lang/bpe/add_final_optional_silence.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 . ./path.sh
 
 final_sil_prob=0.5
diff --git a/egs/wsj/s5/utils/lang/check_phones_compatible.sh b/egs/wsj/s5/utils/lang/check_phones_compatible.sh
index cfad06d2b8c..6c2b642071e 100755
--- a/egs/wsj/s5/utils/lang/check_phones_compatible.sh
+++ b/egs/wsj/s5/utils/lang/check_phones_compatible.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2016 Hang Lyu
 
 # Licensed udner the Apache License, Version 2.0 (the "Lincense");
diff --git a/egs/wsj/s5/utils/lang/extend_lang.sh b/egs/wsj/s5/utils/lang/extend_lang.sh
index 7602cb983de..400f748702a 100755
--- a/egs/wsj/s5/utils/lang/extend_lang.sh
+++ b/egs/wsj/s5/utils/lang/extend_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright     2018  Johns Hopkins University (Author: Daniel Povey);
 #               2019  Dongji Gao
 
diff --git a/egs/wsj/s5/utils/lang/internal/apply_unk_lm.sh b/egs/wsj/s5/utils/lang/internal/apply_unk_lm.sh
index 740ad6a79ec..1e6f87d56ac 100755
--- a/egs/wsj/s5/utils/lang/internal/apply_unk_lm.sh
+++ b/egs/wsj/s5/utils/lang/internal/apply_unk_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2016 Johns Hopkins University (Author: Daniel Povey);
 
diff --git a/egs/wsj/s5/utils/lang/make_phone_bigram_lang.sh b/egs/wsj/s5/utils/lang/make_phone_bigram_lang.sh
index 1d3d04896b4..da6361ea429 100755
--- a/egs/wsj/s5/utils/lang/make_phone_bigram_lang.sh
+++ b/egs/wsj/s5/utils/lang/make_phone_bigram_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Apache 2.0.  Copyright 2012, Johns Hopkins University (author: Daniel Povey)
 
diff --git a/egs/wsj/s5/utils/lang/make_unk_lm.sh b/egs/wsj/s5/utils/lang/make_unk_lm.sh
index 2564c53ad4d..f3a41e1af4e 100755
--- a/egs/wsj/s5/utils/lang/make_unk_lm.sh
+++ b/egs/wsj/s5/utils/lang/make_unk_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2016 Johns Hopkins University (Author: Daniel Povey);
 
diff --git a/egs/wsj/s5/utils/make_absolute.sh b/egs/wsj/s5/utils/make_absolute.sh
index 523e19ac975..957bfbaa870 100755
--- a/egs/wsj/s5/utils/make_absolute.sh
+++ b/egs/wsj/s5/utils/make_absolute.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script replaces the command readlink -f (which is not portable).
 # It turns a pathname into an absolute pathname, including following soft links.
diff --git a/egs/wsj/s5/utils/mkgraph.sh b/egs/wsj/s5/utils/mkgraph.sh
index 31e86cd38f6..929396aaf19 100755
--- a/egs/wsj/s5/utils/mkgraph.sh
+++ b/egs/wsj/s5/utils/mkgraph.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2010-2012 Microsoft Corporation
 #           2012-2013 Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/mkgraph_lookahead.sh b/egs/wsj/s5/utils/mkgraph_lookahead.sh
new file mode 100755
index 00000000000..33280f13a65
--- /dev/null
+++ b/egs/wsj/s5/utils/mkgraph_lookahead.sh
@@ -0,0 +1,208 @@
+#!/bin/bash
+# Copyright 2019 Alpha Cephei Inc.
+# Copyright 2018 Joan Puigcerver
+# Copyright 2010-2012 Microsoft Corporation
+#           2012-2013 Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# This script creates setup for decoding with lookahead online composition. The 
+# graph HCLr.fst represents pronunciation dictionary (lexicon), context-dependency,
+# and HMM structure in our model. The graph Gr.fst represents the language model.
+# If arpa model is provided it compiles ngram model into compact LOUDS-encoded
+# structure with opengrm. Both HCLr.fst and Gr.fst are optionally combined into
+# single graph HCLG for testing with default decoders.
+#
+# See
+#  http://kaldi-asr.org/doc/graph_recipe_test.html
+# (this is compiled from this repository using Doxygen,
+# the source for this part is in src/doc/graph_recipe_test.dox)
+#
+# Note that most of the fsts here are not stochastic, so many kaldi operations like
+# fstpushspecial or fstdeterminizestar in log domain do not really work for them. 
+# Instead most operations are in tropical domain.
+set -o pipefail
+
+tscale=1.0
+loopscale=0.1
+compose_graph=false
+remove_oov=false
+
+for x in `seq 4`; do
+  [ "$1" == "--remove-oov" ] && remove_oov=true && shift;
+  [ "$1" == "--compose-graph" ] && compose_graph=true && shift;
+  [ "$1" == "--transition-scale" ] && tscale=$2 && shift 2;
+  [ "$1" == "--self-loop-scale" ] && loopscale=$2 && shift 2;
+done
+
+# Note: [[ ]] is like [ ] but enables certain extra constructs, e.g. || in
+# place of -o
+if [[ $# != 3 && $# != 4 ]]; then
+   echo "Usage: $0 [options] <lang-dir> <model-dir> [<arpa_file>] <graphdir>"
+   echo "e.g.: $0 data/lang data/local/lm.gz exp/tri1 db/trigram.lm.gz exp/tri1/lgraph"
+   echo " Options:"
+   echo " --remove-oov       #  If true, any paths containing the OOV symbol (obtained from oov.int"
+   echo "                    #  in the lang directory) are removed from the G.fst during compilation."
+   echo " --transition-scale #  Scaling factor on transition probabilities."
+   echo " --self-loop-scale  #  Please see: http://kaldi-asr.org/doc/hmm.html#hmm_scale."
+   echo " --compose-graph    #  Compile composed graph for testing with other decoders (default: false)"
+   exit 1;
+fi
+
+if [ -f path.sh ]; then . ./path.sh; fi
+
+lang=$1
+tree=$2/tree
+model=$2/final.mdl
+
+if [ $# == 3 ]; then
+  echo "$0 : compiling grammar $1/G.fst"
+  arpa=
+  dir=$3
+else
+  echo "$0 : compiling grammar $3"
+  arpa=$3
+  dir=$4
+  loc=`which ngramread`
+  if [ -z $loc ]; then
+    echo You appear to not have OpenGRM tools installed.
+    echo cd to $KALDI_ROOT/tools and run extras/install_opengrm.sh.
+    exit 1
+  fi
+fi
+
+mkdir -p $dir
+
+required="$lang/L_disambig.fst $arpa $lang/phones.txt $lang/words.txt $lang/phones/silence.csl $lang/phones/disambig.int $arpa $model $tree"
+for f in $required; do
+  [ ! -f $f ] && echo "$0 : expected $f to exist" && exit 1;
+done
+
+if [ -f $dir/HCLG.fst ]; then
+  # detect when the result already exists, and avoid overwriting it.
+  must_rebuild=false
+  for f in $required; do
+    [ $f -nt $dir/HCLG.fst ] && must_rebuild=true
+  done
+  if ! $must_rebuild; then
+    echo "$0: $dir/HCLG.fst is up to date."
+    exit 0
+  fi
+fi
+
+
+N=$(tree-info $tree | grep "context-width" | cut -d' ' -f2) || { echo "Error when getting context-width"; exit 1; }
+P=$(tree-info $tree | grep "central-position" | cut -d' ' -f2) || { echo "Error when getting central-position"; exit 1; }
+
+[[ -f $2/frame_subsampling_factor && "$loopscale" == "0.1" ]] && \
+  echo "$0: WARNING: chain models need '--self-loop-scale 1.0'";
+
+trap "rm -f $dir/L_disambig_det.fst.$$" EXIT HUP INT PIPE TERM
+if [[ ! -s $dir/L_disambig_det.fst || $dir/L_disambig_det -ot $lang/L_disambig.fst ]]; then
+  fstdeterminizestar $lang/L_disambig.fst | fstarcsort --sort_type=ilabel > $dir/L_disambig_det.fst.$$ || exit 1;
+  mv $dir/L_disambig_det.fst.$$ $dir/L_disambig_det.fst
+fi
+
+cl=$dir/CL_${N}_${P}.fst
+cl_tmp=$cl.$$
+ilabels=$dir/ilabels_${N}_${P}
+ilabels_tmp=$ilabels.$$
+trap "rm -f $cl_tmp $ilabels_tmp" EXIT HUP INT PIPE TERM
+if [[ ! -s $cl || $cl -ot $dir/L_disambig_det.fst \
+    || ! -s $ilabels || $ilabels -ot $dir/L_disambig_det.fst ]]; then
+  fstcomposecontext $nonterm_opt --context-size=$N --central-position=$P \
+   --read-disambig-syms=$lang/phones/disambig.int \
+   --write-disambig-syms=$dir/disambig_ilabels_${N}_${P}.int \
+    $ilabels_tmp $dir/L_disambig_det.fst | \
+    fstarcsort --sort_type=ilabel > $cl_tmp
+  mv $cl_tmp $cl
+  mv $ilabels_tmp $ilabels
+fi
+
+trap "rm -f $dir/Ha.fst.$$" EXIT HUP INT PIPE TERM
+if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model  \
+    || $dir/Ha.fst -ot $dir/ilabels_${N}_${P} ]]; then
+  make-h-transducer $nonterm_opt --disambig-syms-out=$dir/disambig_tid.int \
+    --transition-scale=$tscale $dir/ilabels_${N}_${P} $tree $model | \
+  fstarcsort --sort_type=olabel \
+     > $dir/Ha.fst.$$  || exit 1;
+  mv $dir/Ha.fst.$$ $dir/Ha.fst
+fi
+
+trap "rm -f $dir/HCLr.fst.$$" EXIT HUP INT PIPE TERM
+if [[ ! -s $dir/HCLr.fst || $dir/HCLr.fst -ot $dir/Ha.fst || \
+      $dir/HCLr.fst -ot $cl ]]; then
+  fstcompose $dir/Ha.fst "$cl" | fstdeterminizestar | \
+     add-self-loops --disambig-syms=$dir/disambig_tid.int --self-loop-scale=$loopscale --reorder=true $model | \
+     fstarcsort --sort_type=olabel | \
+     fstconvert --fst_type=olabel_lookahead --save_relabel_opairs=${dir}/relabel \
+      > $dir/HCLr.fst.$$ || exit 1;
+  mv $dir/HCLr.fst.$$ $dir/HCLr.fst
+fi
+
+trap "rm -f $dir/Gr.fst.$$" EXIT HUP INT PIPE TERM
+if [[ -z $arpa ]]; then
+  if [[ ! -s $dir/Gr.fst || $dir/Gr.fst -ot $lang/G.fst ]]; then
+    gr=${lang}/G.fst
+    if $remove_oov; then
+      [ ! -f $lang/oov.int ] && \
+        echo "$0: --remove-oov option: no file $lang/oov.int" && exit 1;
+      fstrmsymbols --remove-arcs=true --apply-to-output=true $lang/oov.int $gr | \
+        fstrelabel --relabel_ipairs=${dir}/relabel | \
+        fstarcsort --sort_type=ilabel | \
+        fstconvert --fst_type=const > ${dir}/Gr.fst.$$
+    else
+      fstrelabel --relabel_ipairs=${dir}/relabel "$gr" | \
+        fstarcsort --sort_type=ilabel | \
+        fstconvert --fst_type=const > ${dir}/Gr.fst.$$
+    fi
+    mv $dir/Gr.fst.$$ $dir/Gr.fst
+    cp $lang/words.txt $dir/ || exit 1;
+  fi
+else
+  if [[ ! -s $dir/Gr.fst || $dir/Gr.fst -ot $arpa ]]; then
+    # Opengrm builds acceptors, so we need to reorder words in symboltable
+    utils/apply_map.pl --permissive -f 2 ${dir}/relabel < ${lang}/words.txt > ${dir}/words.txt
+    gunzip -c $arpa | ngramread --OOV_symbol=`cat ${lang}/oov.txt` --symbols=${dir}/words.txt --ARPA | \
+    fstarcsort --sort_type=ilabel | \
+      fstconvert --fst_type=ngram > ${dir}/Gr.fst.$$
+    mv $dir/Gr.fst.$$ $dir/Gr.fst
+  fi
+fi
+
+if $compose_graph; then
+  trap "rm -f $dir/HCLG.fst.$$" EXIT HUP INT PIPE TERM
+  if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLr.fst \
+        || $dir/HCLG.fst -ot $dir/Gr.fst ]]; then
+    fstcompose ${dir}/HCLr.fst ${dir}/Gr.fst | \
+    fstrmsymbols $dir/disambig_tid.int  | \
+    fstconvert --fst_type=const > $dir/HCLG.fst.$$ || exit 1;
+    mv $dir/HCLG.fst.$$ $dir/HCLG.fst
+    if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
+      # No point doing this test if transition-scale not 1, as it is bound to fail.
+      fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic."
+    fi
+  fi
+
+  # note: the empty FST has 66 bytes.  this check is for whether the final FST
+  # is the empty file or is the empty FST.
+  if ! [ $(head -c 67 $dir/HCLG.fst | wc -c) -eq 67 ]; then
+    echo "$0: it looks like the result in $dir/HCLG.fst is empty"
+    exit 1
+  fi
+fi
+
+# keep a copy of the lexicon and a list of silence phones with HCLG...
+# this means we can decode without reference to the $lang directory.
+
+mkdir -p $dir/phones
+cp $lang/phones/word_boundary.* $dir/phones/ 2>/dev/null # might be needed for ctm scoring,
+cp $lang/phones/align_lexicon.* $dir/phones/ 2>/dev/null # might be needed for ctm scoring,
+cp $lang/phones/optional_silence.* $dir/phones/ 2>/dev/null # might be needed for analyzing alignments.
+    # but ignore the error if it's not there.
+
+
+cp $lang/phones/disambig.{txt,int} $dir/phones/ 2> /dev/null
+cp $lang/phones/silence.csl $dir/phones/ || exit 1;
+cp $lang/phones.txt $dir/ 2> /dev/null # ignore the error if it's not there.
+
+am-info --print-args=false $model | grep pdfs | awk '{print $NF}' > $dir/num_pdfs
diff --git a/egs/wsj/s5/utils/nnet/subset_data_tr_cv.sh b/egs/wsj/s5/utils/nnet/subset_data_tr_cv.sh
index d8694bdf36d..77a9c512103 100755
--- a/egs/wsj/s5/utils/nnet/subset_data_tr_cv.sh
+++ b/egs/wsj/s5/utils/nnet/subset_data_tr_cv.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Copyright 2017  Brno University of Technology (Author: Karel Vesely);
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/parallel/limit_num_gpus.sh b/egs/wsj/s5/utils/parallel/limit_num_gpus.sh
index 9d7caddd1f6..a5244ddb3f9 100755
--- a/egs/wsj/s5/utils/parallel/limit_num_gpus.sh
+++ b/egs/wsj/s5/utils/parallel/limit_num_gpus.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script functions as a wrapper of a bash command that uses GPUs.
 #
diff --git a/egs/wsj/s5/utils/parallel/slurm.pl b/egs/wsj/s5/utils/parallel/slurm.pl
index cfa634aebc4..4a2a3b7c41d 100755
--- a/egs/wsj/s5/utils/parallel/slurm.pl
+++ b/egs/wsj/s5/utils/parallel/slurm.pl
@@ -180,9 +180,10 @@ sub exec_command {
 default gpu=0
 option gpu=0 -p shared
 option gpu=* -p gpu --gres=gpu:$0 --time 4:0:0  # this has to be figured out
+EOF
+
 # note: the --max-jobs-run option is supported as a special case
 # by slurm.pl and you don't have to handle it in the config file.
-EOF
 
 # Here the configuration options specified by the user on the command line
 # (e.g. --mem 2G) are converted to options to the qsub system as defined in
diff --git a/egs/wsj/s5/utils/parse_options.sh b/egs/wsj/s5/utils/parse_options.sh
index 335e69e9ac7..71fb9e5ea1d 100755
--- a/egs/wsj/s5/utils/parse_options.sh
+++ b/egs/wsj/s5/utils/parse_options.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
 #                 Arnab Ghoshal, Karel Vesely
diff --git a/egs/wsj/s5/utils/perturb_data_dir_speed.sh b/egs/wsj/s5/utils/perturb_data_dir_speed.sh
index 924ebdc3473..fa22f4c00fe 100755
--- a/egs/wsj/s5/utils/perturb_data_dir_speed.sh
+++ b/egs/wsj/s5/utils/perturb_data_dir_speed.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 #           2014  Tom Ko
diff --git a/egs/wsj/s5/utils/prepare_extended_lang.sh b/egs/wsj/s5/utils/prepare_extended_lang.sh
index 57cfcaabe34..c7b4fce5c2a 100755
--- a/egs/wsj/s5/utils/prepare_extended_lang.sh
+++ b/egs/wsj/s5/utils/prepare_extended_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018  Xiaohui Zhang
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/prepare_lang.sh b/egs/wsj/s5/utils/prepare_lang.sh
index fa9b9122786..bc7d8ff19bd 100755
--- a/egs/wsj/s5/utils/prepare_lang.sh
+++ b/egs/wsj/s5/utils/prepare_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
 #                      Arnab Ghoshal
 #                2014  Guoguo Chen
diff --git a/egs/wsj/s5/utils/prepare_online_nnet_dist_build.sh b/egs/wsj/s5/utils/prepare_online_nnet_dist_build.sh
index adc2cefbe42..d7d273efb26 100755
--- a/egs/wsj/s5/utils/prepare_online_nnet_dist_build.sh
+++ b/egs/wsj/s5/utils/prepare_online_nnet_dist_build.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Johns Hopkins University (Author: Vijayaditya Peddinti)
 #                 Guoguo Chen
diff --git a/egs/wsj/s5/utils/remove_data_links.sh b/egs/wsj/s5/utils/remove_data_links.sh
index 8ec68f91bc9..e1973acf2db 100755
--- a/egs/wsj/s5/utils/remove_data_links.sh
+++ b/egs/wsj/s5/utils/remove_data_links.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This program searches within a directory for soft links that
 # appear to be created by 'create_data_link.pl' to a 'storage/' subdirectory,
diff --git a/egs/wsj/s5/utils/rnnlm_compute_scores.sh b/egs/wsj/s5/utils/rnnlm_compute_scores.sh
index 0d0eb606ddc..a268387e42d 100755
--- a/egs/wsj/s5/utils/rnnlm_compute_scores.sh
+++ b/egs/wsj/s5/utils/rnnlm_compute_scores.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Compute scores from RNNLM.  This script takes a directory
 # $dir (e.g. dir=local/rnnlm/rnnlm.voc30.hl30 ),
diff --git a/egs/wsj/s5/utils/show_lattice.sh b/egs/wsj/s5/utils/show_lattice.sh
index d8f8d71f182..0a404441dd9 100755
--- a/egs/wsj/s5/utils/show_lattice.sh
+++ b/egs/wsj/s5/utils/show_lattice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 format=pdf # pdf svg
 mode=save # display save
diff --git a/egs/wsj/s5/utils/split_data.sh b/egs/wsj/s5/utils/split_data.sh
index bc5894e7551..8aa71a1f236 100755
--- a/egs/wsj/s5/utils/split_data.sh
+++ b/egs/wsj/s5/utils/split_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2010-2013 Microsoft Corporation
 #                     Johns Hopkins University (Author: Daniel Povey)
 
diff --git a/egs/wsj/s5/utils/split_scp.pl b/egs/wsj/s5/utils/split_scp.pl
index dc798282f79..0876dcb6dad 100755
--- a/egs/wsj/s5/utils/split_scp.pl
+++ b/egs/wsj/s5/utils/split_scp.pl
@@ -225,7 +225,7 @@
         $error = 1;
     }
     $linesperscp = int( $numlines / $numscps); # the "whole part"..
-    $linesperscp >= 1 || die "$0: You are splitting into too many pieces! [reduce \$nj]\n";
+    $linesperscp >= 1 || die "$0: You are splitting into too many pieces! [reduce \$nj ($numscps) to be smaller than the number of lines ($numlines) in $inscp]\n";
     $remainder = $numlines - ($linesperscp * $numscps);
     ($remainder >= 0 && $remainder < $numlines) || die "bad remainder $remainder";
     # [just doing int() rounds down].
diff --git a/egs/wsj/s5/utils/subset_data_dir.sh b/egs/wsj/s5/utils/subset_data_dir.sh
index f202a998b5d..b6fe7407149 100755
--- a/egs/wsj/s5/utils/subset_data_dir.sh
+++ b/egs/wsj/s5/utils/subset_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2010-2011  Microsoft Corporation
 #           2012-2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0
diff --git a/egs/wsj/s5/utils/subword/prepare_lang_subword.sh b/egs/wsj/s5/utils/subword/prepare_lang_subword.sh
index 8f5e4ecce6b..51a0f8c2353 100755
--- a/egs/wsj/s5/utils/subword/prepare_lang_subword.sh
+++ b/egs/wsj/s5/utils/subword/prepare_lang_subword.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
 #                      Arnab Ghoshal
 #                2014  Guoguo Chen
diff --git a/egs/wsj/s5/utils/subword/prepare_subword_text.sh b/egs/wsj/s5/utils/subword/prepare_subword_text.sh
index 0f0ce68c44f..aa0163235a6 100755
--- a/egs/wsj/s5/utils/subword/prepare_subword_text.sh
+++ b/egs/wsj/s5/utils/subword/prepare_subword_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # 2019 Dongji Gao
 
diff --git a/egs/wsj/s5/utils/validate_data_dir.sh b/egs/wsj/s5/utils/validate_data_dir.sh
index c7e633ab57b..9c0e350eeef 100755
--- a/egs/wsj/s5/utils/validate_data_dir.sh
+++ b/egs/wsj/s5/utils/validate_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 cmd="$@"
 
diff --git a/egs/yesno/s5/local/prepare_data.sh b/egs/yesno/s5/local/prepare_data.sh
index ca5dfcd8249..d5da6ad7573 100755
--- a/egs/yesno/s5/local/prepare_data.sh
+++ b/egs/yesno/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 mkdir -p data/local
 local=`pwd`/local
diff --git a/egs/yesno/s5/local/prepare_dict.sh b/egs/yesno/s5/local/prepare_dict.sh
index 661960c3ef2..ba30b3c432e 100755
--- a/egs/yesno/s5/local/prepare_dict.sh
+++ b/egs/yesno/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 mkdir -p data/local/dict
 
diff --git a/egs/yesno/s5/local/prepare_lm.sh b/egs/yesno/s5/local/prepare_lm.sh
index da284f3dacb..1dbab7fe993 100755
--- a/egs/yesno/s5/local/prepare_lm.sh
+++ b/egs/yesno/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 . ./path.sh
 
diff --git a/egs/yesno/s5/run.sh b/egs/yesno/s5/run.sh
index f881da7a0e6..c040a7dba5f 100755
--- a/egs/yesno/s5/run.sh
+++ b/egs/yesno/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 train_cmd="utils/run.pl"
 decode_cmd="utils/run.pl"
diff --git a/egs/yomdle_fa/v1/local/augment_data.sh b/egs/yomdle_fa/v1/local/augment_data.sh
index 1c38bcb072d..881be6ca36b 100755
--- a/egs/yomdle_fa/v1/local/augment_data.sh
+++ b/egs/yomdle_fa/v1/local/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018 Hossein Hadian
 #             2018 Ashish Arora
 
diff --git a/egs/yomdle_fa/v1/local/chain/compare_wer.sh b/egs/yomdle_fa/v1/local/chain/compare_wer.sh
index ab880c1adb5..eeb831e8e6b 100755
--- a/egs/yomdle_fa/v1/local/chain/compare_wer.sh
+++ b/egs/yomdle_fa/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh
index 700b57d9fce..9857282b828 100755
--- a/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh
+++ b/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
 # lattice alignments and to build a tree
diff --git a/egs/yomdle_fa/v1/local/chain/run_flatstart_cnn1a.sh b/egs/yomdle_fa/v1/local/chain/run_flatstart_cnn1a.sh
index bb5352943f6..089d1bf5002 100755
--- a/egs/yomdle_fa/v1/local/chain/run_flatstart_cnn1a.sh
+++ b/egs/yomdle_fa/v1/local/chain/run_flatstart_cnn1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/yomdle_fa/v1/local/create_download.sh b/egs/yomdle_fa/v1/local/create_download.sh
index 1040ecc2165..7d1f527e673 100755
--- a/egs/yomdle_fa/v1/local/create_download.sh
+++ b/egs/yomdle_fa/v1/local/create_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Chun-Chieh Chang
 
 # The original format of the dataset given is GEDI and page images.
diff --git a/egs/yomdle_fa/v1/local/extract_features.sh b/egs/yomdle_fa/v1/local/extract_features.sh
index f75837ae5b3..c9a36991e94 100755
--- a/egs/yomdle_fa/v1/local/extract_features.sh
+++ b/egs/yomdle_fa/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
 
diff --git a/egs/yomdle_fa/v1/local/score.sh b/egs/yomdle_fa/v1/local/score.sh
index f2405205f02..6e98902f5bd 100755
--- a/egs/yomdle_fa/v1/local/score.sh
+++ b/egs/yomdle_fa/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh --max-lmwt 10 "$@"
diff --git a/egs/yomdle_fa/v1/local/train_lm.sh b/egs/yomdle_fa/v1/local/train_lm.sh
index bc738f217da..9e651d63aff 100755
--- a/egs/yomdle_fa/v1/local/train_lm.sh
+++ b/egs/yomdle_fa/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/yomdle_fa/v1/local/train_lm_lr.sh b/egs/yomdle_fa/v1/local/train_lm_lr.sh
index 5bfc20acdeb..3e7c644eb1e 100755
--- a/egs/yomdle_fa/v1/local/train_lm_lr.sh
+++ b/egs/yomdle_fa/v1/local/train_lm_lr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/yomdle_fa/v1/run.sh b/egs/yomdle_fa/v1/run.sh
index a7547b1ee69..7905e4537e1 100755
--- a/egs/yomdle_fa/v1/run.sh
+++ b/egs/yomdle_fa/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 stage=0
diff --git a/egs/yomdle_korean/v1/local/augment_data.sh b/egs/yomdle_korean/v1/local/augment_data.sh
index 136bfd24eb2..d607afc05a5 100755
--- a/egs/yomdle_korean/v1/local/augment_data.sh
+++ b/egs/yomdle_korean/v1/local/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018 Hossein Hadian
 #             2018 Ashish Arora
 
diff --git a/egs/yomdle_korean/v1/local/chain/compare_wer.sh b/egs/yomdle_korean/v1/local/chain/compare_wer.sh
index 80f31e0f311..2219a7eb0d7 100755
--- a/egs/yomdle_korean/v1/local/chain/compare_wer.sh
+++ b/egs/yomdle_korean/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/yomdle_korean/v1/local/chain/run_e2e_cnn.sh b/egs/yomdle_korean/v1/local/chain/run_e2e_cnn.sh
index cea60a221a1..fe71f133936 100755
--- a/egs/yomdle_korean/v1/local/chain/run_e2e_cnn.sh
+++ b/egs/yomdle_korean/v1/local/chain/run_e2e_cnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2017  Hossein Hadian
 
diff --git a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index 03333f6d229..b275b7013f6 100755
--- a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1a is the same as 1a but uses the e2e chain model to get the
 # lattice alignments and to build a tree
diff --git a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index fd9cdc8921d..e5f8ec32c91 100755
--- a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1b is the same as e2eali_1a but has fewer CNN layers, smaller
 # l2-regularize, more epochs and uses dropout.
diff --git a/egs/yomdle_korean/v1/local/extract_features.sh b/egs/yomdle_korean/v1/local/extract_features.sh
index 3880ebad3e8..315c5fbc251 100755
--- a/egs/yomdle_korean/v1/local/extract_features.sh
+++ b/egs/yomdle_korean/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
diff --git a/egs/yomdle_korean/v1/local/score.sh b/egs/yomdle_korean/v1/local/score.sh
index 31564d25326..e1befafd8b2 100755
--- a/egs/yomdle_korean/v1/local/score.sh
+++ b/egs/yomdle_korean/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh
index f6b2c1bac42..6733032e7c1 100755
--- a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh
+++ b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 #           2018  Ashish Arora
diff --git a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh
index 8185fa2645d..1207236e813 100755
--- a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh
+++ b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 #           2018  Ashish Arora
diff --git a/egs/yomdle_korean/v1/local/semisup/run_semisup.sh b/egs/yomdle_korean/v1/local/semisup/run_semisup.sh
index 5e20f50c99e..dea60195b68 100755
--- a/egs/yomdle_korean/v1/local/semisup/run_semisup.sh
+++ b/egs/yomdle_korean/v1/local/semisup/run_semisup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 #           2018  Ashish Arora
diff --git a/egs/yomdle_korean/v1/local/train_lm.sh b/egs/yomdle_korean/v1/local/train_lm.sh
index c73c42fb7dc..c6338cab713 100755
--- a/egs/yomdle_korean/v1/local/train_lm.sh
+++ b/egs/yomdle_korean/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/yomdle_korean/v1/run_end2end.sh b/egs/yomdle_korean/v1/run_end2end.sh
index 65f5beb4b08..4d6f34dfe82 100755
--- a/egs/yomdle_korean/v1/run_end2end.sh
+++ b/egs/yomdle_korean/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018    Hossein Hadian
 #                   Ashish Arora
diff --git a/egs/yomdle_russian/v1/local/chain/compare_wer.sh b/egs/yomdle_russian/v1/local/chain/compare_wer.sh
index 80f31e0f311..2219a7eb0d7 100755
--- a/egs/yomdle_russian/v1/local/chain/compare_wer.sh
+++ b/egs/yomdle_russian/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/yomdle_russian/v1/local/chain/run_e2e_cnn.sh b/egs/yomdle_russian/v1/local/chain/run_e2e_cnn.sh
index 6f5742cd34b..5d026267e10 100755
--- a/egs/yomdle_russian/v1/local/chain/run_e2e_cnn.sh
+++ b/egs/yomdle_russian/v1/local/chain/run_e2e_cnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2017  Hossein Hadian
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index cd582472993..5b37d0fdcaf 100755
--- a/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a
 # System                      cnn_e2eali_1a      rescoring + nomalized
diff --git a/egs/yomdle_russian/v1/local/extract_features.sh b/egs/yomdle_russian/v1/local/extract_features.sh
index 3880ebad3e8..315c5fbc251 100755
--- a/egs/yomdle_russian/v1/local/extract_features.sh
+++ b/egs/yomdle_russian/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
diff --git a/egs/yomdle_russian/v1/local/score.sh b/egs/yomdle_russian/v1/local/score.sh
index 31564d25326..e1befafd8b2 100755
--- a/egs/yomdle_russian/v1/local/score.sh
+++ b/egs/yomdle_russian/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/yomdle_russian/v1/local/train_lm.sh b/egs/yomdle_russian/v1/local/train_lm.sh
index c73c42fb7dc..c6338cab713 100755
--- a/egs/yomdle_russian/v1/local/train_lm.sh
+++ b/egs/yomdle_russian/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/yomdle_russian/v1/run_end2end.sh b/egs/yomdle_russian/v1/run_end2end.sh
index 12beebeaa05..e000f2305bd 100755
--- a/egs/yomdle_russian/v1/run_end2end.sh
+++ b/egs/yomdle_russian/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018    Hossein Hadian
 #                   Ashish Arora
diff --git a/egs/yomdle_tamil/v1/local/augment_data.sh b/egs/yomdle_tamil/v1/local/augment_data.sh
index 136bfd24eb2..d607afc05a5 100755
--- a/egs/yomdle_tamil/v1/local/augment_data.sh
+++ b/egs/yomdle_tamil/v1/local/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018 Hossein Hadian
 #             2018 Ashish Arora
 
diff --git a/egs/yomdle_tamil/v1/local/chain/compare_wer.sh b/egs/yomdle_tamil/v1/local/chain/compare_wer.sh
index 80f31e0f311..2219a7eb0d7 100755
--- a/egs/yomdle_tamil/v1/local/chain/compare_wer.sh
+++ b/egs/yomdle_tamil/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/yomdle_tamil/v1/local/chain/run_e2e_cnn.sh b/egs/yomdle_tamil/v1/local/chain/run_e2e_cnn.sh
index f553467d4a6..4d062aa19d7 100755
--- a/egs/yomdle_tamil/v1/local/chain/run_e2e_cnn.sh
+++ b/egs/yomdle_tamil/v1/local/chain/run_e2e_cnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright    2017  Hossein Hadian
 
diff --git a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index 03333f6d229..b275b7013f6 100755
--- a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1a is the same as 1a but uses the e2e chain model to get the
 # lattice alignments and to build a tree
diff --git a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index fb15ce10dde..63aa81d35c9 100755
--- a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1b is the same as e2eali_1a but has fewer CNN layers, smaller
 # l2-regularize, more epochs and uses dropout.
diff --git a/egs/yomdle_tamil/v1/local/extract_features.sh b/egs/yomdle_tamil/v1/local/extract_features.sh
index 3880ebad3e8..315c5fbc251 100755
--- a/egs/yomdle_tamil/v1/local/extract_features.sh
+++ b/egs/yomdle_tamil/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
diff --git a/egs/yomdle_tamil/v1/local/score.sh b/egs/yomdle_tamil/v1/local/score.sh
index 31564d25326..e1befafd8b2 100755
--- a/egs/yomdle_tamil/v1/local/score.sh
+++ b/egs/yomdle_tamil/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh
index f6b2c1bac42..6733032e7c1 100755
--- a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh
+++ b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 #           2018  Ashish Arora
diff --git a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh
index 17d59642b05..37c9f8ec320 100755
--- a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh
+++ b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 #           2018  Ashish Arora
diff --git a/egs/yomdle_tamil/v1/local/semisup/run_semisup.sh b/egs/yomdle_tamil/v1/local/semisup/run_semisup.sh
index 0b82def2ead..666f796d494 100755
--- a/egs/yomdle_tamil/v1/local/semisup/run_semisup.sh
+++ b/egs/yomdle_tamil/v1/local/semisup/run_semisup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Vimal Manohar
 #           2018  Ashish Arora
diff --git a/egs/yomdle_tamil/v1/local/train_lm.sh b/egs/yomdle_tamil/v1/local/train_lm.sh
index bb21c67b63f..b491895b11b 100755
--- a/egs/yomdle_tamil/v1/local/train_lm.sh
+++ b/egs/yomdle_tamil/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/yomdle_tamil/v1/local/yomdle/create_download_dir.sh b/egs/yomdle_tamil/v1/local/yomdle/create_download_dir.sh
index de932e01021..f8932e4ee4d 100755
--- a/egs/yomdle_tamil/v1/local/yomdle/create_download_dir.sh
+++ b/egs/yomdle_tamil/v1/local/yomdle/create_download_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright      2018  Chun Chieh Chang
 #                2018  Ashish Arora
diff --git a/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/baseline_text_detect.sh b/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/baseline_text_detect.sh
index 057d22ab492..3eef4716a77 100755
--- a/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/baseline_text_detect.sh
+++ b/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/baseline_text_detect.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #  INPUT:
 #		LANGUAGE - SLAM language to evaluate
diff --git a/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/eval_text_detect.sh b/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/eval_text_detect.sh
index 2243d46e10a..7c2427fb0d5 100755
--- a/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/eval_text_detect.sh
+++ b/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/eval_text_detect.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # DESC: Evaluate text detection bounding boxes
 #
diff --git a/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/normalized_score.sh b/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/normalized_score.sh
index f55600939ae..de6bc887b24 100755
--- a/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/normalized_score.sh
+++ b/egs/yomdle_tamil/v1/local/yomdle/normalized_scoring/normalized_score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #    This script normalizes hypothesis and reference file and performs scoring.
 #    Eg. ./local/yomdle/normalized_scoring/normalized_score.sh <output-dir> <input-hyp-file> <slam-language>
diff --git a/egs/yomdle_tamil/v1/run_end2end.sh b/egs/yomdle_tamil/v1/run_end2end.sh
index e6a8e0a4432..888de5fc5b3 100755
--- a/egs/yomdle_tamil/v1/run_end2end.sh
+++ b/egs/yomdle_tamil/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018    Hossein Hadian
 #                   Ashish Arora
diff --git a/egs/yomdle_zh/v1/local/augment_data.sh b/egs/yomdle_zh/v1/local/augment_data.sh
index 1f13ed15ded..d498c54665c 100755
--- a/egs/yomdle_zh/v1/local/augment_data.sh
+++ b/egs/yomdle_zh/v1/local/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2018 Hossein Hadian
 #             2018 Ashish Arora
 
diff --git a/egs/yomdle_zh/v1/local/chain/compare_wer.sh b/egs/yomdle_zh/v1/local/chain/compare_wer.sh
index ab880c1adb5..eeb831e8e6b 100755
--- a/egs/yomdle_zh/v1/local/chain/compare_wer.sh
+++ b/egs/yomdle_zh/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh
index 357ce6a1f8e..f6cbc2ac171 100755
--- a/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh
+++ b/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
 # lattice alignments and to build a tree
diff --git a/egs/yomdle_zh/v1/local/chain/run_flatstart_cnn1a.sh b/egs/yomdle_zh/v1/local/chain/run_flatstart_cnn1a.sh
index 28ea2863e38..2b6c9d7bc2a 100755
--- a/egs/yomdle_zh/v1/local/chain/run_flatstart_cnn1a.sh
+++ b/egs/yomdle_zh/v1/local/chain/run_flatstart_cnn1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
diff --git a/egs/yomdle_zh/v1/local/create_download.sh b/egs/yomdle_zh/v1/local/create_download.sh
index 1daad354473..3f949ab26b1 100755
--- a/egs/yomdle_zh/v1/local/create_download.sh
+++ b/egs/yomdle_zh/v1/local/create_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2018 Chun-Chieh Chang
 
 # The original format of the dataset given is GEDI and page images.
diff --git a/egs/yomdle_zh/v1/local/extract_features.sh b/egs/yomdle_zh/v1/local/extract_features.sh
index f75837ae5b3..c9a36991e94 100755
--- a/egs/yomdle_zh/v1/local/extract_features.sh
+++ b/egs/yomdle_zh/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
 
diff --git a/egs/yomdle_zh/v1/local/score.sh b/egs/yomdle_zh/v1/local/score.sh
index f2405205f02..6e98902f5bd 100755
--- a/egs/yomdle_zh/v1/local/score.sh
+++ b/egs/yomdle_zh/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 
 steps/scoring/score_kaldi_wer.sh --max-lmwt 10 "$@"
diff --git a/egs/yomdle_zh/v1/local/train_lm.sh b/egs/yomdle_zh/v1/local/train_lm.sh
index bc738f217da..9e651d63aff 100755
--- a/egs/yomdle_zh/v1/local/train_lm.sh
+++ b/egs/yomdle_zh/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/yomdle_zh/v1/local/train_lm_lr.sh b/egs/yomdle_zh/v1/local/train_lm_lr.sh
index a8b1bfb76a4..70efc7fd8dd 100755
--- a/egs/yomdle_zh/v1/local/train_lm_lr.sh
+++ b/egs/yomdle_zh/v1/local/train_lm_lr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/yomdle_zh/v1/run.sh b/egs/yomdle_zh/v1/run.sh
index eb8e9e11927..4b581372ecf 100755
--- a/egs/yomdle_zh/v1/run.sh
+++ b/egs/yomdle_zh/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 stage=0
diff --git a/egs/zeroth_korean/s5/local/chain/compare_wer.sh b/egs/zeroth_korean/s5/local/chain/compare_wer.sh
index e8366bfb358..bc06a630189 100755
--- a/egs/zeroth_korean/s5/local/chain/compare_wer.sh
+++ b/egs/zeroth_korean/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script is used for comparing decoding results between systems.
 # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
index 14b9a8d6c8e..b9f554b9743 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
index 28b36243ba3..2effbd7451e 100755
--- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e -o pipefail
 
diff --git a/egs/zeroth_korean/s5/local/data_prep.sh b/egs/zeroth_korean/s5/local/data_prep.sh
index 4fbb727f1cb..1e35b7f4a75 100755
--- a/egs/zeroth_korean/s5/local/data_prep.sh
+++ b/egs/zeroth_korean/s5/local/data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2018  Atlas Guide (Author : Lucas Jo)
 #            2018  Gridspace Inc. (Author: Wonkyum Lee)
diff --git a/egs/zeroth_korean/s5/local/download_and_untar.sh b/egs/zeroth_korean/s5/local/download_and_untar.sh
index 2e62a3273d4..20effd125b3 100755
--- a/egs/zeroth_korean/s5/local/download_and_untar.sh
+++ b/egs/zeroth_korean/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2018 Lucas Jo (Atlas Guide)
 #           2018 Wonkyum Lee (Gridspace)
diff --git a/egs/zeroth_korean/s5/local/format_lms.sh b/egs/zeroth_korean/s5/local/format_lms.sh
index a9111e80eeb..36d9c9fb9ec 100755
--- a/egs/zeroth_korean/s5/local/format_lms.sh
+++ b/egs/zeroth_korean/s5/local/format_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
index 70be96310e1..b253e6ca4f9 100755
--- a/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/zeroth_korean/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # this script contains some common (shared) parts of the run_nnet*.sh scripts.
 . cmd.sh
diff --git a/egs/zeroth_korean/s5/local/prepare_dict.sh b/egs/zeroth_korean/s5/local/prepare_dict.sh
index 76c6821e11e..1bb81bb1252 100755
--- a/egs/zeroth_korean/s5/local/prepare_dict.sh
+++ b/egs/zeroth_korean/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2014 Vassil Panayotov
 # Apache 2.0
diff --git a/egs/zeroth_korean/s5/local/score.sh b/egs/zeroth_korean/s5/local/score.sh
index c812199fc98..cb5bbb7277b 100755
--- a/egs/zeroth_korean/s5/local/score.sh
+++ b/egs/zeroth_korean/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 #           2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/zeroth_korean/s5/local/update_segmentation.sh b/egs/zeroth_korean/s5/local/update_segmentation.sh
index e1eea821645..3b63a433967 100755
--- a/egs/zeroth_korean/s5/local/update_segmentation.sh
+++ b/egs/zeroth_korean/s5/local/update_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017 Lucas Jo (Atlas Guide)
 # Apache 2.0
diff --git a/egs/zeroth_korean/s5/run.sh b/egs/zeroth_korean/s5/run.sh
index c5c7506980b..457f533f07c 100755
--- a/egs/zeroth_korean/s5/run.sh
+++ b/egs/zeroth_korean/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Based mostly on the WSJ/Librispeech recipe. 
 # The training/testing database is described in http://www.openslr.org/40/
diff --git a/misc/htk_conversion/convert_htk.sh b/misc/htk_conversion/convert_htk.sh
index 47a36adefb0..6ed07929c38 100755
--- a/misc/htk_conversion/convert_htk.sh
+++ b/misc/htk_conversion/convert_htk.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # E.g. of usage: ./convert_htk.sh ../.. /mnt/matylda5/jhu09/setup/CH1/English/exp/xwrd.R0_800_TB500/hmm84/MMF /mnt/matylda5/jhu09/setup/CH1/English/exp/xwrd.R0_800_TB500/hmm10_800_500/cluster.trees convert_dir
 
diff --git a/misc/htk_graph_creation_example/test.sh b/misc/htk_graph_creation_example/test.sh
index 7906da7e1a0..44cea399377 100755
--- a/misc/htk_graph_creation_example/test.sh
+++ b/misc/htk_graph_creation_example/test.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # testing the graph creation (verifies we get the same answer as with HTK)
 # needs the model-list.
diff --git a/misc/maintenance/find_missing_dependencies.sh b/misc/maintenance/find_missing_dependencies.sh
index 55e300fe9f0..c93492b75be 100755
--- a/misc/maintenance/find_missing_dependencies.sh
+++ b/misc/maintenance/find_missing_dependencies.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 echo "$0: finding missing inter-directory dependencies in src/Makefile"
 
diff --git a/misc/maintenance/fix_apache_headers.sh b/misc/maintenance/fix_apache_headers.sh
index 7fb813b2624..a29cc4a5d52 100755
--- a/misc/maintenance/fix_apache_headers.sh
+++ b/misc/maintenance/fix_apache_headers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # makes sure the line See ../../COPYING for clarification regarding multiple
 # authors appears in the apache headers in the source, and that source files
diff --git a/misc/maintenance/fix_bash_shebang.sh b/misc/maintenance/fix_bash_shebang.sh
new file mode 100755
index 00000000000..8bdbd3edab5
--- /dev/null
+++ b/misc/maintenance/fix_bash_shebang.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+#Replaces #!/bin/bash with #!/usr/bin/env bash to make the bash shell scripts more portable.
+
+# To run this, cd to the top level of the repo and type
+# misc/maintenance/fix_bash_shebang.sh
+
+grep -rl --include=*.sh "^#\!/bin/bash$" . | xargs -I@ bash -c $'sed -i.BAK \'s:#!/bin/bash:#!/usr/bin/env bash:\' @ ; rm @.BAK'
diff --git a/misc/maintenance/fix_cpplint_whitespace.sh b/misc/maintenance/fix_cpplint_whitespace.sh
index 8d99aebbac2..0e7b0069ff4 100755
--- a/misc/maintenance/fix_cpplint_whitespace.sh
+++ b/misc/maintenance/fix_cpplint_whitespace.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 [ $# == 0 ] && echo "Usage: $0 src/[some-dir]/*.{h,cc}" && exit 1
 
 # Let's run a set of in-place modifications by sed-commands,
diff --git a/misc/maintenance/svnignore.sh b/misc/maintenance/svnignore.sh
index bc8d447094c..6989ea11f57 100755
--- a/misc/maintenance/svnignore.sh
+++ b/misc/maintenance/svnignore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script should be run from two levels up, as:
 # misc/maintenance/svnignore.sh
diff --git a/misc/papers/icassp12_lat/examples.sh b/misc/papers/icassp12_lat/examples.sh
index 7e22a13a72a..6492f96e30c 100644
--- a/misc/papers/icassp12_lat/examples.sh
+++ b/misc/papers/icassp12_lat/examples.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This file contains commands to create the FST examples.
 export PATH=$PATH:~/kaldi/trunk/tools/openfst/bin/
diff --git a/scripts/rnnlm/compute_perplexity.sh b/scripts/rnnlm/compute_perplexity.sh
index 17c441e6aea..94a2b6e2c0f 100755
--- a/scripts/rnnlm/compute_perplexity.sh
+++ b/scripts/rnnlm/compute_perplexity.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script computes perplexity of text on the specified RNNLM model. 
 
diff --git a/scripts/rnnlm/compute_sentence_scores.sh b/scripts/rnnlm/compute_sentence_scores.sh
index 0b9fbb5da12..71cc9e1f6f5 100755
--- a/scripts/rnnlm/compute_sentence_scores.sh
+++ b/scripts/rnnlm/compute_sentence_scores.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is very similar to utils/rnnlm_compute_scores.sh, and it computes
 # log-likelihoods from a Kaldi-RNNLM model instead of that of Mikolov's RNNLM.
diff --git a/scripts/rnnlm/compute_sentence_scores_back.sh b/scripts/rnnlm/compute_sentence_scores_back.sh
index 3024d43439e..bed2ca0d87e 100755
--- a/scripts/rnnlm/compute_sentence_scores_back.sh
+++ b/scripts/rnnlm/compute_sentence_scores_back.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Hainan Xu
 #           2017  Szu-Jui Chen
diff --git a/scripts/rnnlm/get_num_splits.sh b/scripts/rnnlm/get_num_splits.sh
index 974fd8bf204..ae42a949a2c 100755
--- a/scripts/rnnlm/get_num_splits.sh
+++ b/scripts/rnnlm/get_num_splits.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright  2017  Johns Hopkins University (author: Daniel Povey)
 # Apache 2.0.
diff --git a/scripts/rnnlm/lmrescore.sh b/scripts/rnnlm/lmrescore.sh
index 9da22ae75a2..aea0da13278 100755
--- a/scripts/rnnlm/lmrescore.sh
+++ b/scripts/rnnlm/lmrescore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2015  Guoguo Chen
 #           2017  Hainan Xu
diff --git a/scripts/rnnlm/lmrescore_back.sh b/scripts/rnnlm/lmrescore_back.sh
index 5a8052a201d..7a2c781c803 100755
--- a/scripts/rnnlm/lmrescore_back.sh
+++ b/scripts/rnnlm/lmrescore_back.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017   Hainan Xu
 # Apache 2.0
diff --git a/scripts/rnnlm/lmrescore_nbest.sh b/scripts/rnnlm/lmrescore_nbest.sh
index a22d43961ab..108b2f66d07 100755
--- a/scripts/rnnlm/lmrescore_nbest.sh
+++ b/scripts/rnnlm/lmrescore_nbest.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is very similar to steps/rnnlmrescore.sh, and it performs n-best
 # LM rescoring with Kaldi-RNNLM.
diff --git a/scripts/rnnlm/lmrescore_nbest_back.sh b/scripts/rnnlm/lmrescore_nbest_back.sh
index 7531d99b0a4..45ea3c110b0 100755
--- a/scripts/rnnlm/lmrescore_nbest_back.sh
+++ b/scripts/rnnlm/lmrescore_nbest_back.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Hainan Xu
 #           2017  Szu-Jui Chen
diff --git a/scripts/rnnlm/lmrescore_pruned.sh b/scripts/rnnlm/lmrescore_pruned.sh
index b6ec694ffd4..7e9db8a25ac 100755
--- a/scripts/rnnlm/lmrescore_pruned.sh
+++ b/scripts/rnnlm/lmrescore_pruned.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017   Hainan Xu
 # Apache 2.0
diff --git a/scripts/wakeword/add_prefix_to_scp.py b/scripts/wakeword/add_prefix_to_scp.py
new file mode 100755
index 00000000000..dceb44bcd37
--- /dev/null
+++ b/scripts/wakeword/add_prefix_to_scp.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+# Apache 2.0
+
+""" This script adds prefix to utt-id for entries in scp files.
+"""
+
+
+import argparse
+import re
+import os
+import sys
+
+def main():
+    parser = argparse.ArgumentParser(description="""Adds augmentation prefix to
+        original utterance ids to obtain <new_id> <value> entry""")
+    parser.add_argument('--prefix', nargs='+',
+                        help='prefix to add to each utterance id')
+    args = parser.parse_args()
+
+    for line in sys.stdin:
+        line = line.strip()
+        id, val = line.split(None, 1)
+        if re.match(r'^sp\d\.\d-', id):
+            pass
+        else:
+            for prefix in args.prefix:
+                print(prefix + '-' + id + ' ' + val)
+
+
+if __name__ == "__main__":
+    main()
+
+
diff --git a/scripts/wakeword/compute_metrics.py b/scripts/wakeword/compute_metrics.py
new file mode 100755
index 00000000000..59a70d8a69f
--- /dev/null
+++ b/scripts/wakeword/compute_metrics.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2018-2020  Yiming Wang
+# Apache 2.0
+
+""" This script computes several metrics for wake word detection.
+"""
+
+
+import argparse
+import os
+import io
+import sys
+import codecs
+
+def main():
+    parser = argparse.ArgumentParser(description="""Computes metrics for evalutuon.""")
+    parser.add_argument('ref', type=str,
+                        default='ref.txt',
+                        help='path to the reference')
+    parser.add_argument('hyp', type=str,
+                        default='hyp.txt',
+                        help='path to the hypothesis')
+    parser.add_argument('--wake-word', type=str, dest='wake_word', default='嗨小问',
+                        help='wake word')
+    parser.add_argument('--duration', type=float, dest='duration', default=0.0)
+    args = parser.parse_args()
+
+    f = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') if args.ref == "-" else codecs.open(args.ref, 'r', encoding='utf-8')
+    lines = f.readlines()
+    ref = [line.strip().split(None, 1) if len(line.strip().split(None, 1)) == 2 else [line.strip().split(None, 1)[0], ''] for line in lines]
+    f.close()
+
+    f = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') if args.hyp == "-" else codecs.open(args.hyp, 'r', encoding='utf-8')
+    lines = f.readlines()
+    hyp = {}
+    for line in lines:
+        hyp[line.strip().split(None, 1)[0]] = line.strip().split(None, 1)[1] if len(line.strip().split(None, 1)) == 2 else ""
+    f.close()
+
+    if len(ref) != len(hyp):
+        print("The lengths of reference and hypothesis do not match. ref: {} vs hyp: {}.".format(len(ref), len(hyp)), file=sys.stderr)
+    TP = TN = FP = FN = 0.0
+    for i in range(len(ref)):
+        if ref[i][0] not in hyp:
+            print("reference {} does not exist in hypothesis.".format(ref[i][0]), file=sys.stderr)
+            continue
+        if ref[i][1] == args.wake_word:
+            if args.wake_word in hyp[ref[i][0]]:
+                TP += 1.
+            else:
+                FN += 1.
+        else:
+            if args.wake_word in hyp[ref[i][0]]:
+                FP += 1.
+            else:
+                TN += 1.
+    precision = TP / (TP + FP) if TP + FP > 0 else 0.0
+    recall = TP / (TP + FN) if TP + FN > 0 else 0.0
+    false_positive_rate = FP / (FP + TN) if FP + TN > 0 else 0.0
+    false_negative_rate = FN / (FN + TP) if FN + TP > 0 else 0.0
+    false_alarms_per_hour = FP / (args.duration / 3600) if args.duration > 0.0 else 0.0
+
+    print("precision: %.5f  recall: %.5f  FPR: %.5f  FNR: %.5f  FP per hour: %.5f  total: %d" % (precision, recall, false_positive_rate, false_negative_rate, false_alarms_per_hour, TP+TN+FP+FN))
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/wakeword/copy_lat_dir.sh b/scripts/wakeword/copy_lat_dir.sh
new file mode 100755
index 00000000000..d017c393343
--- /dev/null
+++ b/scripts/wakeword/copy_lat_dir.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright 2018-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+utt_prefixes=
+max_jobs_run=30
+nj=75
+cmd=run.pl
+write_compact=true
+
+echo "$0 $@"  # Print the command line for logging 
+
+. ./path.sh
+. utils/parse_options.sh
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <out-data> <src-lat-dir> <out-lat-dir>"
+  exit 1
+fi
+
+data=$1
+src_dir=$2
+dir=$3
+
+rm -rf $dir 2>/dev/null
+cp -r $src_dir $dir
+
+num_jobs=$(cat $src_dir/num_jobs)
+
+rm -f $dir/lat_tmp.*.{ark,scp} 2>/dev/null
+
+# Copy the lattices temporarily
+$cmd --max-jobs-run $max_jobs_run JOB=1:$num_jobs $dir/log/copy_lattices.JOB.log \
+  lattice-copy --write-compact=$write_compact \
+  "ark:gunzip -c $src_dir/lat.JOB.gz |" \
+  ark,scp:$dir/lat_tmp.JOB.ark,$dir/lat_tmp.JOB.scp || exit 1
+
+# Make copies of utterances for perturbed data
+for p in $utt_prefixes; do
+  cat $dir/lat_tmp.*.scp | local/add_prefix_to_scp.py --prefix $p
+done >$dir/lat_out.scp.tmp
+cat $dir/lat_tmp.*.scp $dir/lat_out.scp.tmp | sort -k1,1 >$dir/lat_out.scp
+rm -f $dir/lat_out.scp.tmp 2>/dev/null
+
+utils/split_data.sh ${data} $nj
+
+# Copy and dump the lattices for perturbed data
+$cmd --max-jobs-run $max_jobs_run JOB=1:$nj $dir/log/copy_out_lattices.JOB.log \
+  lattice-copy --write-compact=$write_compact \
+  "scp:utils/filter_scp.pl ${data}/split$nj/JOB/utt2spk $dir/lat_out.scp |" \
+  "ark:| gzip -c > $dir/lat.JOB.gz" || exit 1
+
+rm $dir/lat_tmp.* #$dir/lat_out.scp
+
+echo $nj > $dir/num_jobs
diff --git a/scripts/wakeword/gen_topo.pl b/scripts/wakeword/gen_topo.pl
new file mode 100755
index 00000000000..c889d549644
--- /dev/null
+++ b/scripts/wakeword/gen_topo.pl
@@ -0,0 +1,81 @@
+#!/usr/bin/env perl
+
+# Copyright 2012-2020  Daniel Povey
+#           2018-2020  Yiming Wang
+
+# This script is modified from utils/gen_topo.pl. It is used for wake word detection
+# chain training, where the transotion probs are even. The topo also contains separate
+# ForwardPdfClass and SelfLoopPdfClass.
+
+if (@ARGV != 4) {
+  print STDERR "Usage: utils/gen_topo.pl <num-nonsilence-states> <num-silence-states> <colon-separated-nonsilence-phones> <colon-separated-silence-phones>\n";
+  print STDERR "e.g.:  utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3\n";
+  exit (1);
+}
+
+($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV;
+
+( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) ||
+  die "Unexpected number of nonsilence-model states $num_nonsil_states\n";
+(( $num_sil_states == 1 || $num_sil_states >= 3) && $num_sil_states <= 100 ) ||
+  die "Unexpected number of silence-model states $num_sil_states\n";
+
+$nonsil_phones =~ s/:/ /g;
+$sil_phones =~ s/:/ /g;
+$nonsil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
+$sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
+
+print "<Topology>\n";
+print "<TopologyEntry>\n";
+print "<ForPhones>\n";
+print "$nonsil_phones\n";
+print "</ForPhones>\n";
+for ($state = 0; $state < $num_nonsil_states; $state++) {
+  $statep1 = $state+1;
+  print "<State> $state <ForwardPdfClass> @{[2*$state]} <SelfLoopPdfClass> @{[2*$state+1]} <Transition> $state 0.5 <Transition> $statep1 0.5 </State>\n";
+}
+print "<State> $num_nonsil_states </State>\n"; # non-emitting final state.
+print "</TopologyEntry>\n";
+# Now silence phones.  They have a different topology-- apart from the first and
+# last states, it's fully connected, as long as you have >= 3 states.
+
+if ($num_sil_states > 1) {
+  $transp = 1.0 / ($num_sil_states-1);
+  print "<TopologyEntry>\n";
+  print "<ForPhones>\n";
+  print "$sil_phones\n";
+  print "</ForPhones>\n";
+  print "<State> 0 <PdfClass> <ForwardPdfClass> 0 <SelfLoopPdfClass> 1 ";
+  for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last
+    # emitting state.
+    print "<Transition> $nextstate $transp ";
+  }
+  print "</State>\n";
+  for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to
+    # themselves and to the last emitting state.
+    print "<State> $state <ForwardPdfClass> @{[2*$state]} <SelfLoopPdfClass> @{[2*$state+1]} ";
+    for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) {
+      print "<Transition> $nextstate $transp ";
+    }
+    print "</State>\n";
+  }
+  # Final emitting state (non-skippable).
+  $state = $num_sil_states-1;
+  print "<State> $state <ForwardPdfClass> @{[2*$state]} <SelfLoopPdfClass> @{[2*$state+1]} <Transition> $state 0.5 <Transition> $num_sil_states 0.5 </State>\n";
+  # Final nonemitting state:
+  print "<State> $num_sil_states </State>\n";
+  print "</TopologyEntry>\n";
+} else {
+  print "<TopologyEntry>\n";
+  print "<ForPhones>\n";
+  print "$sil_phones\n";
+  print "</ForPhones>\n";
+  print "<State> 0 <ForwardPdfClass> 0 <SelfLoopPdfClass> 1 ";
+  print "<Transition> 0 0.5 ";
+  print "<Transition> 1 0.5 ";
+  print "</State>\n";
+  print "<State> $num_sil_states </State>\n"; # non-emitting final state.
+  print "</TopologyEntry>\n";
+}
+
+print "</Topology>\n";
diff --git a/scripts/wakeword/get_random_subsegments.py b/scripts/wakeword/get_random_subsegments.py
new file mode 100755
index 00000000000..0a8d7302ba2
--- /dev/null
+++ b/scripts/wakeword/get_random_subsegments.py
@@ -0,0 +1,101 @@
+#! /usr/bin/env python
+
+# Copyright 2017  Vimal Manohar
+#           2017  Matthew Maciejewski
+#           2019  Yiming Wang
+# Apache 2.0.
+
+from __future__ import print_function
+import argparse
+import logging
+import random
+import sys
+import textwrap
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description=textwrap.dedent("""
+        Creates a subsegments file from an input segments file
+        that has the format
+        <subsegment-id> <utterance-id> <start-time> <end-time>,
+        where the timing are relative to the start-time of the
+        <utterance-id> in the input segments file.
+
+        e.g.: get_uniform_subsegments.py data/dev/segments > \\
+                data/dev_uniform_segments/sub_segments
+
+        utils/data/subsegment_data_dir.sh data/dev \\
+            data/dev_uniform_segments/sub_segments data/dev_uniform_segments
+
+        The output is written to stdout. The resulting file can be
+        passed to utils/data/subsegment_data_dir.sh to sub-segment
+        the data directory."""),
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument("--overlap-duration", type=float,
+                        default=0.3, help="""Overlap between
+                        adjacent segments (in seconds)""")
+    parser.add_argument("--max-remaining-duration", type=float,
+                        default=0.3, help="""Segment is not split
+                        if the left-over duration is more than this
+                        many seconds""")
+    parser.add_argument("--seed", type=int,
+                        default=0, help="""random seed""")
+    parser.add_argument("segments_file", type=argparse.FileType('r'),
+                        help="""Input kaldi segments file""")
+    parser.add_argument("positive_duration_file", type=argparse.FileType('r'),
+                        help="""the file containing utt2dur entries for positive
+                        examples""")
+
+    args = parser.parse_args()
+    return args
+
+
+def run(args):
+    positive_dur = [float(line.strip().split()[1]) for line in args.positive_duration_file]
+    num_pos_utts = len(positive_dur)
+    random.seed(args.seed)
+    for line in args.segments_file:
+        parts = line.strip().split()
+        utt_id = parts[0]
+        start_time = float(parts[2])
+        end_time = float(parts[3])
+
+        dur = end_time - start_time
+
+        start = start_time
+        this_dur = positive_dur[random.randrange(num_pos_utts)]
+        while (dur > this_dur + args.max_remaining_duration):
+            end = start + this_dur
+            start_relative = start - start_time
+            end_relative = end - start_time
+            new_utt = "{utt_id}-{s:08d}-{e:08d}".format(
+                utt_id=utt_id, s=int(100 * start_relative),
+                e=int(100 * end_relative))
+            print ("{new_utt} {utt_id} {s} {e}".format(
+                new_utt=new_utt, utt_id=utt_id, s=start_relative,
+                e=start_relative + this_dur))
+            start += this_dur - args.overlap_duration
+            dur -= this_dur - args.overlap_duration
+            this_dur = positive_dur[random.randrange(num_pos_utts)]
+
+        new_utt = "{utt_id}-{s:08d}-{e:08d}".format(
+            utt_id=utt_id, s=int(round(100 * (start - start_time))),
+            e=int(round(100 * (end_time - start_time))))
+        print ("{new_utt} {utt_id} {s} {e}".format(
+            new_utt=new_utt, utt_id=utt_id, s=start - start_time,
+            e=end_time - start_time))
+
+
+def main():
+    args = get_args()
+    try:
+        run(args)
+    except Exception:
+        logging.error("Failed creating subsegments", exc_info=True)
+        raise SystemExit(1)
+    finally:
+        args.segments_file.close()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/wakeword/plot_det.py b/scripts/wakeword/plot_det.py
new file mode 100755
index 00000000000..69d496a471a
--- /dev/null
+++ b/scripts/wakeword/plot_det.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2020  Yiming Wang
+# Apache 2.0
+
+""" This script plots the DET curves
+"""
+
+
+import argparse
+import os
+import io
+import sys
+import codecs
+import re
+
+try:
+    import matplotlib as mpl
+    mpl.use("Agg")
+    import matplotlib.pyplot as plt
+except ImportError:
+    raise ImportError(
+        """This script requires matplotlib.
+        Please install it to generate plots.
+        If you are on a cluster where you do not have admin rights you could
+        try using virtualenv.""")
+    
+
+def main():
+    parser = argparse.ArgumentParser(description="""Computes metrics for evalutuon.""")
+    parser.add_argument("comparison_path", type=str, nargs="+",
+                        help="paths to result file, and each line in the file should have the format specfied in pattern variable below.")
+
+    args = parser.parse_args()
+    if (args.comparison_path is not None and len(args.comparison_path) > 6):
+        raise Exception(
+            """max 6 comparison paths can be specified.
+            If you want to compare with more comparison_path, you would have to
+            carefully tune the plot_colors variable which specified colors used
+            for plotting.""")
+
+    g_plot_colors = ["red", "blue", "green", "black", "magenta", "yellow", "cyan"]
+
+    pattern = r"precision: (\d+\.\d*)  recall: (\d+\.\d*)  FPR: (\d+\.\d*)  FNR: (\d+\.\d*)  FP per hour: (\d+\.\d*)  total: \d+"
+    prog = re.compile(pattern)
+    
+    fig = plt.figure()
+    det_plots = []
+    for index, path in enumerate(args.comparison_path):
+        if index == 0:
+            savedir = os.path.dirname(path)
+
+        color_val = g_plot_colors[index]
+        with open(path, "r") as f:
+            lines = f.readlines()
+
+        precision = []
+        recall = []
+        FPR = []
+        FNR = []
+        FP_per_hour = []
+        for line in lines:
+            m = prog.match(line)
+            if m:
+                precision.append(float(m.group(1)))
+                recall.append(float(m.group(2)))
+                FPR.append(float(m.group(3)))
+                FNR.append(float(m.group(4)))
+                FP_per_hour.append(float(m.group(5)))
+
+        sorted_index = sorted(range(len(FP_per_hour)), key=FP_per_hour.__getitem__)
+        FPR = [FPR[i] * 100 for i in sorted_index]
+        FNR = [FNR[i] * 100 for i in sorted_index]
+        FP_per_hour = [float(FP_per_hour[i]) for i in sorted_index]
+
+        color_val = g_plot_colors[index]
+        plt.xlim(0.0, 2.0)
+        plt.ylim(0.0, 2.0)
+        det_plot_handle, = plt.plot(FP_per_hour, FNR, color=color_val,
+            linestyle="--", label="{}".format(os.path.dirname(path)), linewidth=2.0,
+        )
+        det_plots.append(det_plot_handle)
+
+    plt.xlabel("False Alarms per hour")
+    plt.ylabel("False Rejection Rate (%)")
+    lgd = plt.legend(handles=det_plots, loc="lower center",
+            bbox_to_anchor=(0.5, -0.2 + len(args.comparison_path) * -0.1),
+            ncol=1, borderaxespad=0.)
+    plt.grid(True)
+    fig.suptitle("DET curve")
+    figfile_name = os.path.join(savedir, "det.pdf")
+    plt.savefig(figfile_name, bbox_extra_artists=(lgd,), bbox_inches="tight")
+
+    print("Saved DET curves as " + figfile_name)
+
+if __name__ == "__main__":
+    main()
diff --git a/src/Makefile b/src/Makefile
index 07b7947f3b1..c50810297fc 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -61,11 +61,11 @@ endif
 biglib: $(SUBDIRS_LIB)
 ifeq ($(KALDI_FLAVOR), dynamic)
 ifeq ($(shell uname), Darwin)
-	$(CXX) -dynamiclib -o $(KALDILIBDIR)/libkaldi.dylib -install_name @rpath/libkaldi.dylib -framework Accelerate $(LDFLAGS) $(SUBDIRS_LIB:=/*.dylib)
+	$(CXX) -dynamiclib -o $(KALDILIBDIR)/libkaldi.dylib -install_name @rpath/libkaldi.dylib -framework Accelerate $(LDFLAGS) $(wildcard $(SUBDIRS_LIB:=/*.dylib))
 else
 ifeq ($(shell uname), Linux)
 	#$(warning the following command will probably fail, in that case add -fPIC to your CXXFLAGS and remake all)
-	$(CXX) -shared -o $(KALDILIBDIR)/$(KALDI_SONAME) -Wl,-soname=$(KALDI_SONAME),--whole-archive  $(SUBDIRS_LIB:=/kaldi-*.a) $(LDLIBS) -Wl,--no-whole-archive
+	$(CXX) -shared -o $(KALDILIBDIR)/$(KALDI_SONAME) -Wl,-soname=$(KALDI_SONAME),--whole-archive  $(wildcard $(SUBDIRS_LIB:=/kaldi-*.a)) -Wl,--no-whole-archive  $(LDLIBS)
 else
 	$(error Dynamic libraries not supported on this platform. Run configure with --static flag. )
 endif
diff --git a/src/base/kaldi-error.cc b/src/base/kaldi-error.cc
index 2dbc7318209..12f972ee856 100644
--- a/src/base/kaldi-error.cc
+++ b/src/base/kaldi-error.cc
@@ -33,7 +33,11 @@
 
 #include "base/kaldi-common.h"
 #include "base/kaldi-error.h"
+
+// KALDI_GIT_HEAD is useless currently in full repo
+#if !defined(KALDI_VERSION)
 #include "base/version.h"
+#endif
 
 namespace kaldi {
 
diff --git a/src/bin/Makefile b/src/bin/Makefile
index 7cb01b50120..a04a84e21af 100644
--- a/src/bin/Makefile
+++ b/src/bin/Makefile
@@ -22,7 +22,7 @@ BINFILES = align-equal align-equal-compiled acc-tree-stats \
         matrix-sum build-pfile-from-ali get-post-on-ali tree-info am-info \
         vector-sum matrix-sum-rows est-pca sum-lda-accs sum-mllt-accs \
         transform-vec align-text matrix-dim post-to-smat compile-graph \
-        compare-int-vector
+        compare-int-vector latgen-incremental-mapped compute-gop
 
 
 OBJFILES =
diff --git a/src/bin/align-equal-compiled.cc b/src/bin/align-equal-compiled.cc
index c4ab9d4205a..55f3927287f 100644
--- a/src/bin/align-equal-compiled.cc
+++ b/src/bin/align-equal-compiled.cc
@@ -36,7 +36,7 @@ int main(int argc, char *argv[]) {
     using fst::VectorFst;
     using fst::StdArc;
 
-    const char *usage =  "Write an equally spaced alignment (for getting training started)"
+    const char *usage =  "Write an equally spaced alignment (for getting training started)\n"
         "Usage:  align-equal-compiled <graphs-rspecifier> <features-rspecifier> <alignments-wspecifier>\n"
         "e.g.: \n"
         " align-equal-compiled 1.fsts scp:train.scp ark:equal.ali\n";
diff --git a/src/bin/compute-gop.cc b/src/bin/compute-gop.cc
new file mode 100644
index 00000000000..f19d0348207
--- /dev/null
+++ b/src/bin/compute-gop.cc
@@ -0,0 +1,231 @@
+// bin/compute-gop.cc
+
+// Copyright 2019  Junbo Zhang
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+   This code computes Goodness of Pronunciation (GOP) and extracts phone-level
+   pronunciation feature for mispronunciations detection tasks, the reference:
+
+   "Improved mispronunciation detection with deep neural network trained acoustic
+   models and transfer learning based logistic regression classifiers"
+   by Hu et al., Speech Comunication, 2015.
+
+   GOP is widely used to detect mispronunciations. The DNN-based GOP was defined
+   as the log phone posterior ratio between the canonical phone and the one with
+   the highest score.
+
+   To compute GOP, we need to compute Log Phone Posterior (LPP):
+     LPP(p) = \log p(p|\mathbf o; t_s,t_e)
+   where {\mathbf o} is the input observations, p is the canonical phone,
+   {t_s, t_e} are the start and end frame indexes.
+
+   LPP could be calculated as the average of the frame-level LPP, i.e. p(p|o_t):
+     LPP(p) = \frac{1}{t_e-t_s+1} \sum_{t=t_s}^{t_e}\log p(p|o_t)
+     p(p|o_t) = \sum_{s \in p} p(s|o_t)
+   where s is the senone label, {s|s \in p} is the states belonging to those
+   triphones whose current phone is p.
+
+   GOP is extracted from LPP:
+     GOP(p) = \log \frac{LPP(p)}{\max_{q\in Q} LPP(q)}
+
+   An array of a phone-level feature for each phone is extracted as well, which
+   could be used to train a classifier to detect mispronunciations. Normally the
+   classifier-based approach archives better performance than the GOP-based approach.
+
+   The phone-level feature is defined as:
+     {[LPP(p_1),\cdots,LPP(p_M), LPR(p_1|p_i), \cdots, LPR(p_j|p_i),\cdots]}^T
+
+   where the Log Posterior Ratio (LPR) between phone p_j and p_i is defined as:
+     LPR(p_j|p_i) = \log p(p_j|\mathbf o; t_s, t_e) - \log p(p_i|\mathbf o; t_s, t_e)
+ */
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "hmm/transition-model.h"
+#include "hmm/hmm-utils.h"
+#include "hmm/tree-accu.h"
+#include "hmm/posterior.h"
+
+namespace kaldi {
+
+/** FrameLevelLpp compute a log posterior for pure-phones by sum the posterior
+    of the states belonging to those triphones whose current phone is the canonical
+    phone:
+
+    p(p|o_t) = \sum_{s \in p} p(s|o_t),
+
+    where s is the senone label, {s|s \in p} is the states belonging to those
+    riphones whose current phone is the canonical phone p.
+
+ */
+void FrameLevelLpp(const SubVector<BaseFloat> &prob_row,
+                   const std::vector<std::set<int32> > &pdf2phones,
+                   const std::vector<int32> *phone_map,
+                   Vector<BaseFloat> *out_frame_level_lpp) {
+  for (int32 i = 0; i < prob_row.Dim(); i++) {
+    std::set<int32> dest_idxs;
+    for (int32 ph : pdf2phones.at(i)) {
+      dest_idxs.insert((phone_map != NULL) ? (*phone_map)[ph] - 1 : ph - 1);
+    }
+
+    for (int32 idx : dest_idxs) {
+      KALDI_ASSERT(idx < out_frame_level_lpp->Dim());
+      (*out_frame_level_lpp)(idx) += prob_row(i);
+    }
+  }
+  out_frame_level_lpp->ApplyLog();
+}
+
+}  // namespace kaldi
+
+int main(int argc, char *argv[]) {
+  using namespace kaldi;
+  typedef kaldi::int32 int32;
+  try {
+    const char *usage =
+        "Compute Goodness Of Pronunciation (GOP) from a matrix of "
+        "probabilities (e.g. from nnet3-compute).\n"
+        "Usage:  compute-gop [options] <model> <alignments-rspecifier> "
+        "<prob-matrix-rspecifier> <gop-wspecifier> "
+        "[<phone-feature-wspecifier>]\n"
+        "e.g.:\n"
+        " nnet3-compute [args] | compute-gop 1.mdl ark:ali-phone.1 ark:-"
+        " ark:gop.1 ark:phone-feat.1\n";
+
+    ParseOptions po(usage);
+
+    bool log_applied = true;
+    std::string phone_map_rxfilename;
+
+    po.Register("log-applied", &log_applied,
+        "If true, assume the input probabilities have been applied log.");
+    po.Register("phone-map", &phone_map_rxfilename,
+                "File name containing old->new phone mapping (each line is: "
+                "old-integer-id new-integer-id)");
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 4 && po.NumArgs() != 5) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string model_filename = po.GetArg(1),
+                alignments_rspecifier = po.GetArg(2),
+                prob_rspecifier = po.GetArg(3),
+                gop_wspecifier = po.GetArg(4),
+                feat_wspecifier = po.GetArg(5);
+
+    TransitionModel trans_model;
+    {
+      bool binary;
+      Input ki(model_filename, &binary);
+      trans_model.Read(ki.Stream(), binary);
+    }
+    std::vector<std::set<int32> > pdf2phones;
+    GetPdfToPhonesMap(trans_model, &pdf2phones);
+    int32 phone_num = trans_model.NumPhones();
+
+    std::vector<int32> phone_map;
+    if (phone_map_rxfilename != "") {
+      ReadPhoneMap(phone_map_rxfilename, &phone_map);
+      phone_num = phone_map[phone_map.size() - 1];
+    }
+
+    RandomAccessInt32VectorReader alignment_reader(alignments_rspecifier);
+    SequentialBaseFloatMatrixReader prob_reader(prob_rspecifier);
+    PosteriorWriter gop_writer(gop_wspecifier);
+    BaseFloatMatrixWriter feat_writer(feat_wspecifier);
+
+    int32 num_done = 0;
+    for (; !prob_reader.Done(); prob_reader.Next()) {
+      std::string key = prob_reader.Key();
+      if (!alignment_reader.HasKey(key)) {
+        KALDI_WARN << "No alignment for utterance " << key;
+        continue;
+      }
+      auto alignment = alignment_reader.Value(key);
+      Matrix<BaseFloat> &probs = prob_reader.Value();
+      if (log_applied) probs.ApplyExp();
+
+      int32 frame_num = alignment.size();
+      if (alignment.size() != probs.NumRows()) {
+        KALDI_WARN << "The frame numbers of alignment and prob are not equal.";
+        if (frame_num > probs.NumRows()) frame_num = probs.NumRows();
+      }
+
+      KALDI_ASSERT(frame_num > 0);
+      int32 cur_phone_id = alignment[0] - 1;  // start by 0, skipping <eps>
+      int32 duration = 0;
+      Vector<BaseFloat> phone_level_feat(phone_num * 2);  // LPPs and LPRs
+      SubVector<BaseFloat> lpp_part(phone_level_feat, 0, phone_num);
+      std::vector<Vector<BaseFloat> > phone_level_feat_stdvector;
+      Posterior posterior_gop;
+      for (int32 i = 0; i < frame_num; i++) {
+        // Calculate LPP and LPR for each pure-phone
+        Vector<BaseFloat> frame_level_lpp(phone_num);
+        FrameLevelLpp(probs.Row(i), pdf2phones,
+                      (phone_map_rxfilename != "") ? &phone_map : NULL,
+                      &frame_level_lpp);
+
+        // LPP(p)=\frac{1}{t_e-t_s+1} \sum_{t=t_s}^{t_e}\log p(p|o_t)
+        lpp_part.AddVec(1, frame_level_lpp);
+        duration++;
+
+        int32 next_phone_id = (i < frame_num - 1) ? alignment[i + 1] - 1: -1;
+        if (next_phone_id != cur_phone_id) {
+          // The current phone's feature have been ready
+          lpp_part.Scale(1.0 / duration);
+
+          // LPR(p_j|p_i)=\log p(p_j|\mathbf o; t_s, t_e)-\log p(p_i|\mathbf o; t_s, t_e)
+          for (int k = 0; k < phone_num; k++)
+            phone_level_feat(phone_num + k) = lpp_part(cur_phone_id) - lpp_part(k);
+          phone_level_feat_stdvector.push_back(phone_level_feat);
+
+          // Compute GOP from LPP
+          // GOP(p)=\log \frac{LPP(p)}{\max_{q\in Q} LPP(q)}
+          BaseFloat gop = lpp_part(cur_phone_id) - lpp_part.Max();
+          std::vector<std::pair<int32, BaseFloat> > posterior_item;
+          posterior_item.push_back(std::make_pair(cur_phone_id + 1, gop));
+          posterior_gop.push_back(posterior_item);
+
+          // Reset
+          phone_level_feat.Set(0);
+          duration = 0;
+        }
+        cur_phone_id = next_phone_id;
+      }
+
+      // Write GOPs and the phone-level features
+      Matrix<BaseFloat> feats(phone_level_feat_stdvector.size(), phone_num * 2);
+      for (int32 i = 0; i < phone_level_feat_stdvector.size(); i++) {
+        SubVector<BaseFloat> row(feats, i);
+        row.AddVec(1.0, phone_level_feat_stdvector[i]);
+      }
+      feat_writer.Write(key, feats);
+      gop_writer.Write(key, posterior_gop);
+      num_done++;
+    }
+
+    KALDI_LOG << "Processed " << num_done << " prob matrices.";
+    return (num_done != 0 ? 0 : 1);
+  } catch (const std::exception &e) {
+    std::cerr << e.what() << '\n';
+    return -1;
+  }
+}
diff --git a/src/bin/decode-faster-mapped.cc b/src/bin/decode-faster-mapped.cc
index c7411592504..67cd1a2e8fd 100644
--- a/src/bin/decode-faster-mapped.cc
+++ b/src/bin/decode-faster-mapped.cc
@@ -35,6 +35,7 @@ int main(int argc, char *argv[]) {
     typedef kaldi::int32 int32;
     using fst::SymbolTable;
     using fst::VectorFst;
+    using fst::Fst;
     using fst::StdArc;
 
     const char *usage =
@@ -88,7 +89,7 @@ int main(int argc, char *argv[]) {
     // It has to do with what happens on UNIX systems if you call fork() on a
     // large process: the page-table entries are duplicated, which requires a
     // lot of virtual memory.
-    VectorFst<StdArc> *decode_fst = fst::ReadFstKaldi(fst_in_filename);
+    Fst<StdArc> *decode_fst = fst::ReadFstKaldiGeneric(fst_in_filename);
 
     BaseFloat tot_like = 0.0;
     kaldi::int64 frame_count = 0;
diff --git a/src/bin/latgen-incremental-mapped.cc b/src/bin/latgen-incremental-mapped.cc
new file mode 100644
index 00000000000..80c65bfb535
--- /dev/null
+++ b/src/bin/latgen-incremental-mapped.cc
@@ -0,0 +1,183 @@
+// bin/latgen-incremental-mapped.cc
+
+// Copyright      2019  Zhehuai Chen
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "tree/context-dep.h"
+#include "hmm/transition-model.h"
+#include "fstext/fstext-lib.h"
+#include "decoder/decoder-wrappers.h"
+#include "decoder/decodable-matrix.h"
+#include "base/timer.h"
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    typedef kaldi::int32 int32;
+    using fst::SymbolTable;
+    using fst::Fst;
+    using fst::StdArc;
+
+    const char *usage =
+        "Generate lattices, reading log-likelihoods as matrices\n"
+        " (model is needed only for the integer mappings in its transition-model)\n"
+        "The lattice determinization algorithm here can operate\n"
+        "incrementally.\n"
+        "Usage: latgen-incremental-mapped [options] trans-model-in "
+        "(fst-in|fsts-rspecifier) loglikes-rspecifier"
+        " lattice-wspecifier [ words-wspecifier [alignments-wspecifier] ]\n";
+    ParseOptions po(usage);
+    Timer timer;
+    bool allow_partial = false;
+    BaseFloat acoustic_scale = 0.1;
+    LatticeIncrementalDecoderConfig config;
+
+    std::string word_syms_filename;
+    config.Register(&po);
+    po.Register("acoustic-scale", &acoustic_scale,
+                "Scaling factor for acoustic likelihoods");
+
+    po.Register("word-symbol-table", &word_syms_filename,
+                "Symbol table for words [for debug output]");
+    po.Register("allow-partial", &allow_partial,
+                "If true, produce output even if end state was not reached.");
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() < 4 || po.NumArgs() > 6) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string model_in_filename = po.GetArg(1), fst_in_str = po.GetArg(2),
+                feature_rspecifier = po.GetArg(3), lattice_wspecifier = po.GetArg(4),
+                words_wspecifier = po.GetOptArg(5),
+                alignment_wspecifier = po.GetOptArg(6);
+
+    TransitionModel trans_model;
+    ReadKaldiObject(model_in_filename, &trans_model);
+
+    bool determinize = true;
+    CompactLatticeWriter compact_lattice_writer;
+    LatticeWriter lattice_writer;
+    if (!(determinize ? compact_lattice_writer.Open(lattice_wspecifier)
+                      : lattice_writer.Open(lattice_wspecifier)))
+      KALDI_ERR << "Could not open table for writing lattices: "
+                << lattice_wspecifier;
+
+    Int32VectorWriter words_writer(words_wspecifier);
+
+    Int32VectorWriter alignment_writer(alignment_wspecifier);
+
+    fst::SymbolTable *word_syms = NULL;
+    if (word_syms_filename != "")
+      if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
+        KALDI_ERR << "Could not read symbol table from file " << word_syms_filename;
+
+    double tot_like = 0.0;
+    kaldi::int64 frame_count = 0;
+    int num_success = 0, num_fail = 0;
+
+    if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) {
+      SequentialBaseFloatMatrixReader loglike_reader(feature_rspecifier);
+      // Input FST is just one FST, not a table of FSTs.
+      Fst<StdArc> *decode_fst = fst::ReadFstKaldiGeneric(fst_in_str);
+      timer.Reset();
+
+      {
+        LatticeIncrementalDecoder decoder(*decode_fst, trans_model, config);
+
+        for (; !loglike_reader.Done(); loglike_reader.Next()) {
+          std::string utt = loglike_reader.Key();
+          Matrix<BaseFloat> loglikes(loglike_reader.Value());
+          loglike_reader.FreeCurrent();
+          if (loglikes.NumRows() == 0) {
+            KALDI_WARN << "Zero-length utterance: " << utt;
+            num_fail++;
+            continue;
+          }
+
+          DecodableMatrixScaledMapped decodable(trans_model, loglikes,
+                                                acoustic_scale);
+
+          double like;
+          if (DecodeUtteranceLatticeIncremental(
+                  decoder, decodable, trans_model, word_syms, utt, acoustic_scale,
+                  determinize, allow_partial, &alignment_writer, &words_writer,
+                  &compact_lattice_writer, &lattice_writer, &like)) {
+            tot_like += like;
+            frame_count += loglikes.NumRows();
+            num_success++;
+          } else {
+            num_fail++;
+          }
+        }
+      }
+      delete decode_fst; // delete this only after decoder goes out of scope.
+    } else {             // We have different FSTs for different utterances.
+      SequentialTableReader<fst::VectorFstHolder> fst_reader(fst_in_str);
+      RandomAccessBaseFloatMatrixReader loglike_reader(feature_rspecifier);
+      for (; !fst_reader.Done(); fst_reader.Next()) {
+        std::string utt = fst_reader.Key();
+        if (!loglike_reader.HasKey(utt)) {
+          KALDI_WARN << "Not decoding utterance " << utt
+                     << " because no loglikes available.";
+          num_fail++;
+          continue;
+        }
+        const Matrix<BaseFloat> &loglikes = loglike_reader.Value(utt);
+        if (loglikes.NumRows() == 0) {
+          KALDI_WARN << "Zero-length utterance: " << utt;
+          num_fail++;
+          continue;
+        }
+        LatticeIncrementalDecoder decoder(fst_reader.Value(), trans_model, config);
+        DecodableMatrixScaledMapped decodable(trans_model, loglikes, acoustic_scale);
+        double like;
+        if (DecodeUtteranceLatticeIncremental(
+                decoder, decodable, trans_model, word_syms, utt, acoustic_scale,
+                determinize, allow_partial, &alignment_writer, &words_writer,
+                &compact_lattice_writer, &lattice_writer, &like)) {
+          tot_like += like;
+          frame_count += loglikes.NumRows();
+          num_success++;
+        } else {
+          num_fail++;
+        }
+      }
+    }
+
+    double elapsed = timer.Elapsed();
+    KALDI_LOG << "Time taken " << elapsed
+              << "s: real-time factor assuming 100 frames/sec is "
+              << (elapsed * 100.0 / frame_count);
+    KALDI_LOG << "Done " << num_success << " utterances, failed for " << num_fail;
+    KALDI_LOG << "Overall log-likelihood per frame is " << (tot_like / frame_count)
+              << " over " << frame_count << " frames.";
+
+    delete word_syms;
+    if (num_success != 0)
+      return 0;
+    else
+      return 1;
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/chain/chain-den-graph.cc b/src/chain/chain-den-graph.cc
index 11c851091bd..cbe15740872 100644
--- a/src/chain/chain-den-graph.cc
+++ b/src/chain/chain-den-graph.cc
@@ -132,7 +132,7 @@ void DenominatorGraph::SetInitialProbs(const fst::StdVectorFst &fst) {
     }
     cur_prob.Swap(&next_prob);
     next_prob.SetZero();
-    // Renormalize, beause the HMM won't sum to one even after the
+    // Renormalize, because the HMM won't sum to one even after the
     // previous normalization (due to final-probs).
     cur_prob.Scale(1.0 / cur_prob.Sum());
   }
@@ -181,7 +181,7 @@ void MapFstToPdfIdsPlusOne(const TransitionModel &trans_model,
 
 void MinimizeAcceptorNoPush(fst::StdVectorFst *fst) {
   BaseFloat delta = fst::kDelta * 10.0;  // use fairly loose delta for
-                                         // aggressive minimimization.
+                                         // aggressive minimization.
   fst::ArcMap(fst, fst::QuantizeMapper<fst::StdArc>(delta));
   fst::EncodeMapper<fst::StdArc> encoder(fst::kEncodeLabels | fst::kEncodeWeights,
                                          fst::ENCODE);
@@ -314,7 +314,7 @@ void CreateDenominatorFst(const ContextDependency &ctx_dep,
     AddSubsequentialLoop(subsequential_symbol, &phone_lm);
     fst::Project(&phone_lm, fst::PROJECT_INPUT);
   }
-  std::vector<int32> disambig_syms;  // empty list of diambiguation symbols.
+  std::vector<int32> disambig_syms;  // empty list of disambiguation symbols.
 
   // inv_cfst will be expanded on the fly, as needed.
   fst::InverseContextFst inv_cfst(subsequential_symbol,
diff --git a/src/chain/chain-den-graph.h b/src/chain/chain-den-graph.h
index b2510651f39..65b4d58e9cc 100644
--- a/src/chain/chain-den-graph.h
+++ b/src/chain/chain-den-graph.h
@@ -63,7 +63,7 @@ class DenominatorGraph {
   DenominatorGraph();
 
   // Initialize from epsilon-free acceptor FST with pdf-ids plus one as the
-  // labels.  'num_pdfs' is only needeed for checking.
+  // labels.  'num_pdfs' is only needed for checking.
   DenominatorGraph(const fst::StdVectorFst &fst,
                    int32 num_pdfs);
 
@@ -88,7 +88,7 @@ class DenominatorGraph {
   // Note: we renormalize each HMM-state to sum to one before doing this.
   const CuVector<BaseFloat> &InitialProbs() const;
 
-  // This function outputs a modifified version of the FST that was used to
+  // This function outputs a modified version of the FST that was used to
   // build this object, that has an initial-state with epsilon transitions to
   // each state, with weight determined by initial_probs_; and has each original
   // state being final with probability one (note: we remove epsilons).  This is
@@ -124,7 +124,7 @@ class DenominatorGraph {
   CuArray<Int32Pair> forward_transitions_;
   // backward_transitions_ is an array, indexed by hmm-state index,
   // of start and end indexes into the transition_ array, which
-  // give us the set of transitions out of this state.
+  // give us the set of transitions into this state.
   CuArray<Int32Pair> backward_transitions_;
   // This stores the actual transitions.
   CuArray<DenominatorGraphTransition> transitions_;
@@ -153,7 +153,7 @@ void MapFstToPdfIdsPlusOne(const TransitionModel &trans_model,
                            fst::StdVectorFst *fst);
 
 // Starting from an acceptor on phones that represents some kind of compiled
-// language model (with no disambiguation symbols), this funtion creates the
+// language model (with no disambiguation symbols), this function creates the
 // denominator-graph.  Note: there is similar code in chain-supervision.cc, when
 // creating the supervision graph.
 void CreateDenominatorFst(const ContextDependency &ctx_dep,
diff --git a/src/chain/chain-denominator.cc b/src/chain/chain-denominator.cc
index b9023f02f5e..bfe6419355f 100644
--- a/src/chain/chain-denominator.cc
+++ b/src/chain/chain-denominator.cc
@@ -99,15 +99,13 @@ void DenominatorComputation::AlphaFirstFrame() {
                                    den_graph_.NumStates(),
                                    num_sequences_,
                                    num_sequences_);
-  // TODO (possible): It would be more efficient here if we implemented a
-  // CopyColsFromVec function in class CuMatrix.
-  alpha_mat.SetZero();
-  alpha_mat.AddVecToCols(1.0, den_graph_.InitialProbs(), 0.0);
+  alpha_mat.CopyColsFromVec(den_graph_.InitialProbs());
 }
 
 
 // the alpha computation for some 0 < t <= num_time_steps_.
 void DenominatorComputation::AlphaGeneralFrame(int32 t) {
+  NVTX_RANGE(__func__);
   KALDI_ASSERT(t > 0 && t <= frames_per_sequence_);
   BaseFloat *this_alpha = alpha_.RowData(t);
   const BaseFloat *prev_alpha_dash = alpha_.RowData(t - 1);
@@ -186,6 +184,7 @@ void DenominatorComputation::AlphaGeneralFrame(int32 t) {
 }
 
 void DenominatorComputation::AlphaDash(int32 t) {
+  NVTX_RANGE(__func__);
   BaseFloat *this_alpha = alpha_.RowData(t);
 
   // create a 'fake matrix' for the regular alphas- view this row as a matrix.
@@ -209,6 +208,7 @@ void DenominatorComputation::AlphaDash(int32 t) {
 
 // compute beta from beta-dash.
 void DenominatorComputation::Beta(int32 t) {
+  NVTX_RANGE(__func__);
   BaseFloat *this_beta_dash = beta_.RowData(t % 2);
   // create a 'fake matrix' for the regular beta-dash (which is
   // the counterpart of alpha-dash)- view this row as a matrix.
@@ -231,6 +231,7 @@ void DenominatorComputation::Beta(int32 t) {
 }
 
 BaseFloat DenominatorComputation::Forward() {
+  NVTX_RANGE(__func__);
   AlphaFirstFrame();
   AlphaDash(0);
   for (int32 t = 1; t <= frames_per_sequence_; t++) {
@@ -241,6 +242,7 @@ BaseFloat DenominatorComputation::Forward() {
 }
 
 BaseFloat DenominatorComputation::ComputeTotLogLike() {
+  NVTX_RANGE(__func__);
   tot_prob_.Resize(num_sequences_);
   // View the last alpha-dash as a matrix of size num-hmm-states by num-sequences.
   CuSubMatrix<BaseFloat> last_alpha_dash(
@@ -281,6 +283,7 @@ BaseFloat DenominatorComputation::ComputeTotLogLike() {
 bool DenominatorComputation::Backward(
     BaseFloat deriv_weight,
     CuMatrixBase<BaseFloat> *nnet_output_deriv) {
+  NVTX_RANGE(__func__);
   BetaDashLastFrame();
   Beta(frames_per_sequence_);
   for (int32 t = frames_per_sequence_ - 1; t >= 0; t--) {
@@ -332,6 +335,7 @@ void DenominatorComputation::BetaDashLastFrame() {
 }
 
 void DenominatorComputation::BetaDashGeneralFrame(int32 t) {
+  NVTX_RANGE(__func__);
   KALDI_ASSERT(t >= 0 && t < frames_per_sequence_);
   int32 num_pdfs = exp_nnet_output_transposed_.NumRows();
   // t_wrapped gives us the time-index we use when indexing
diff --git a/src/chain/chain-denominator.h b/src/chain/chain-denominator.h
index 217b7447621..c722c268d92 100644
--- a/src/chain/chain-denominator.h
+++ b/src/chain/chain-denominator.h
@@ -66,9 +66,9 @@ namespace chain {
 
   * Forward computation (version 1)
 
-  In the forward computation we're computing alpha(i, t) for 0 <= t <= T):
+  In the forward computation we're computing alpha(t, i) for 0 <= t <= T):
     - For the first frame, set alpha(0, i) = init(i), where init(i) is the
-      initial-probabilitiy from state i.  # in our framework these are obtained
+      initial-probability from state i.  # in our framework these are obtained
       #  by running the HMM for a while and getting an averaged occupation
       # probability, and using this as an initial-prob, since the boundaries of
       # chunks don't really correspond to utterance boundaries in general.]
@@ -161,7 +161,7 @@ namespace chain {
   - total-prob = \sum_i alpha'(T, i)
 
   The corrected log-prob that we return from the algorithm will be
-   (total-prob + \sum_{t=0}^{T-1} \log tot-alpha(t)).
+   (\log(total-prob) + \sum_{t=0}^{T-1} \log tot-alpha(t)).
 
   * Backward computation (version 3)
 
diff --git a/src/chain/chain-generic-numerator.cc b/src/chain/chain-generic-numerator.cc
index d3a114242c2..4d44d72e7cb 100644
--- a/src/chain/chain-generic-numerator.cc
+++ b/src/chain/chain-generic-numerator.cc
@@ -23,7 +23,6 @@
 
 #include <iterator>
 #include <limits>
-#include <algorithm>
 
 namespace kaldi {
 namespace chain {
@@ -34,13 +33,16 @@ namespace chain {
 // for end-to-end training 'supervision's.
 
 GenericNumeratorComputation::GenericNumeratorComputation(
+    const GenericNumeratorComputationOptions &opts,
     const Supervision &supervision,
     const CuMatrixBase<BaseFloat> &nnet_output):
     supervision_(supervision),
-    nnet_output_(nnet_output) {
+    nnet_output_(nnet_output),
+    opts_(opts) {
   KALDI_ASSERT(supervision.num_sequences *
                supervision.frames_per_sequence == nnet_output.NumRows() &&
                supervision.label_dim == nnet_output.NumCols());
+  NVTX_RANGE(__func__);
 
   using std::vector;
   int num_sequences = supervision_.num_sequences;
@@ -119,6 +121,7 @@ GenericNumeratorComputation::GenericNumeratorComputation(
 
 void GenericNumeratorComputation::AlphaFirstFrame(int seq,
                                                   Matrix<BaseFloat> *alpha) {
+  NVTX_RANGE(__func__);
   const int32 num_frames = supervision_.frames_per_sequence,
               num_states = supervision_.e2e_fsts[seq].NumStates();
   alpha->Resize(num_frames + 1,  num_states + 1, kSetZero);
@@ -133,6 +136,7 @@ void GenericNumeratorComputation::CopySpecificPdfsIndirect(
                                     const std::vector<MatrixIndexT> &indices,
                                     Matrix<BaseFloat> *out) {
   KALDI_ASSERT(nnet_output_stride_ == nnet_output_.Stride());
+  NVTX_RANGE(__func__);
   const int32 num_sequences = supervision_.num_sequences,
               frames_per_sequence = supervision_.frames_per_sequence;
 
@@ -156,6 +160,7 @@ void GenericNumeratorComputation::CopySpecificPdfsIndirect(
 BaseFloat GenericNumeratorComputation::AlphaRemainingFrames(int seq,
                                               const Matrix<BaseFloat> &probs,
                                               Matrix<BaseFloat> *alpha) {
+  NVTX_RANGE(__func__);
   // Define some variables to make things nicer
   const int32 num_sequences = supervision_.num_sequences,
               num_frames = supervision_.frames_per_sequence;
@@ -212,6 +217,7 @@ BaseFloat GenericNumeratorComputation::AlphaRemainingFrames(int seq,
 bool GenericNumeratorComputation::ForwardBackward(
                                  BaseFloat *total_loglike,
                                  CuMatrixBase<BaseFloat> *nnet_output_deriv) {
+  NVTX_RANGE(__func__);
   KALDI_ASSERT(total_loglike != NULL);
   KALDI_ASSERT(nnet_output_deriv != NULL);
   KALDI_ASSERT(nnet_output_deriv->NumCols() == nnet_output_.NumCols());
@@ -221,10 +227,10 @@ bool GenericNumeratorComputation::ForwardBackward(
   const int32 num_sequences = supervision_.num_sequences;
 
   bool ok = true;
-  Matrix<BaseFloat> alpha;
-  Matrix<BaseFloat> beta;
   Matrix<BaseFloat> probs;
-  Matrix<BaseFloat> derivs;
+  Matrix<BaseFloat> derivs; // Don't need nthreads copies to avoid data
+                            // races since each sequence operates on a
+                            // distinct set of columns
 
   // We selectively copy only those pdfs we need
   CopySpecificPdfsIndirect(nnet_output_, index_to_pdf_, &probs);
@@ -232,17 +238,52 @@ bool GenericNumeratorComputation::ForwardBackward(
   derivs.Resize(probs.NumRows(), probs.NumCols());
   derivs.Set(-std::numeric_limits<BaseFloat>::infinity());
 
-  for (int seq = 0; seq < num_sequences; ++seq) {
-    // Forward part
-    AlphaFirstFrame(seq, &alpha);
-    partial_loglike += AlphaRemainingFrames(seq, probs, &alpha);
-
-    // Backward part
-    BetaLastFrame(seq, alpha, &beta);
-    BetaRemainingFrames(seq, probs, alpha, &beta, &derivs);
-    if (GetVerboseLevel() >= 1)
-      ok = ok && CheckValues(seq, probs, alpha, beta, derivs);
+  // Set total number of workers to the available hardware concurrency
+  unsigned int nthreads = opts_.num_threads > 0 ? opts_.num_threads :
+                              std::thread::hardware_concurrency();
+  // Naive load balancing, each thread gets a chunk of the sequences to process
+  unsigned int num_sequences_per_thread = 
+    (num_sequences + nthreads - 1) / nthreads;
+
+  // Allocate one alpha and beta matrix per thread to avoid contention
+  std::vector<Matrix<BaseFloat>> alpha(nthreads);
+  std::vector<Matrix<BaseFloat>> beta(nthreads);
+  
+  // Per thread partial values and boolean
+  std::vector<BaseFloat> partial_loglike_mt(nthreads, static_cast<BaseFloat>(0));
+  std::vector<bool> ok_mt(nthreads, true);
+
+  // Lambda function for each thread's portion of the computation
+  auto thread_lambda = [&] (int thread, int num_sequences, int num_sequences_per_thread) {
+    int seq_st = thread * num_sequences_per_thread;
+    int seq_en = seq_st + num_sequences_per_thread;
+    seq_en = (seq_en <= num_sequences) ? seq_en : num_sequences;
+    for (int seq = seq_st; seq < seq_en; ++seq) {
+      // Forward part
+      AlphaFirstFrame(seq, &alpha[thread]);
+      partial_loglike_mt[thread] += AlphaRemainingFrames(seq, probs, &alpha[thread]);
+
+      // Backward part
+      BetaLastFrame(seq, alpha[thread], &beta[thread]);
+      BetaRemainingFrames(seq, probs, alpha[thread], &beta[thread], &derivs);
+      if (GetVerboseLevel() >= 1)
+        ok_mt[thread] = ok_mt[thread] && CheckValues(seq, probs, alpha[thread], beta[thread], derivs);
+    }
+    return;
+  };
+
+  std::vector<std::thread> workers(nthreads);
+  for (int thread = 0; thread < nthreads; ++thread)
+    // Launch all threads
+    workers[thread] = std::thread(thread_lambda, thread, num_sequences, num_sequences_per_thread);
+  for (int thread = 0; thread < nthreads; ++thread) {
+    // Join threads back in
+    workers[thread].join();
+    // Reduce thread values to a single value
+    partial_loglike += partial_loglike_mt[thread];
+    ok = ok && ok_mt[thread];
   }
+
   // Transfer and add the derivatives to the values in the matrix
   AddSpecificPdfsIndirect(&derivs, index_to_pdf_, nnet_output_deriv);
   *total_loglike = partial_loglike;
@@ -250,6 +291,7 @@ bool GenericNumeratorComputation::ForwardBackward(
 }
 
 BaseFloat GenericNumeratorComputation::ComputeObjf() {
+  NVTX_RANGE(__func__);
   BaseFloat partial_loglike = 0;
   const int32 num_sequences = supervision_.num_sequences;
 
@@ -275,6 +317,7 @@ BaseFloat GenericNumeratorComputation::GetTotalProb(
 void GenericNumeratorComputation::BetaLastFrame(int seq,
                                                 const Matrix<BaseFloat> &alpha,
                                                 Matrix<BaseFloat> *beta) {
+  NVTX_RANGE(__func__);
   // Sets up the beta quantity on the last frame (frame ==
   // frames_per_sequence_).  Note that the betas we use here contain a
   // 1/(tot-prob) factor in order to simplify the backprop.
@@ -298,6 +341,7 @@ void GenericNumeratorComputation::BetaRemainingFrames(int seq,
                                                 const Matrix<BaseFloat> &alpha,
                                                 Matrix<BaseFloat> *beta,
                                                 Matrix<BaseFloat> *derivs) {
+  NVTX_RANGE(__func__);
   const int32
       num_sequences = supervision_.num_sequences,
       num_frames = supervision_.frames_per_sequence,
@@ -340,6 +384,7 @@ void GenericNumeratorComputation::AddSpecificPdfsIndirect(
                                  Matrix<BaseFloat> *logprobs,
                                  const std::vector<MatrixIndexT> &indices,
                                  CuMatrixBase<BaseFloat> *output) {
+  NVTX_RANGE(__func__);
   const int32 num_sequences = supervision_.num_sequences,
               frames_per_sequence = supervision_.frames_per_sequence;
 
diff --git a/src/chain/chain-generic-numerator.h b/src/chain/chain-generic-numerator.h
index fc5e00b2c63..7b2b637ec2e 100644
--- a/src/chain/chain-generic-numerator.h
+++ b/src/chain/chain-generic-numerator.h
@@ -25,6 +25,8 @@
 
 #include <vector>
 #include <map>
+#include <algorithm>
+#include <thread>
 
 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
@@ -102,6 +104,20 @@ namespace chain {
  */
 
 
+struct GenericNumeratorComputationOptions {
+  unsigned int num_threads;
+  GenericNumeratorComputationOptions() : 
+    num_threads(std::min(static_cast<unsigned int>(4),
+                std::thread::hardware_concurrency())) { }
+  void Register(OptionsItf *opts) {
+    opts->Register("numerator-graph-threads", &num_threads, "Number of threads "
+                   "to use to parallelize the chain numerator graph computation. "
+                   "If 0, use available hardware concurrency.");
+  }
+
+};
+
+
 // This class is responsible for the forward-backward of the
 // end-to-end 'supervision' (numerator) FST. This kind of FST can
 // have self-loops.
@@ -112,7 +128,8 @@ namespace chain {
 class GenericNumeratorComputation {
  public:
   /// Initializes the object.
-  GenericNumeratorComputation(const Supervision &supervision,
+  GenericNumeratorComputation(const GenericNumeratorComputationOptions &opts,
+                              const Supervision &supervision,
                               const CuMatrixBase<BaseFloat> &nnet_output);
 
   // Does the forward-backward computation. Returns the total log-prob
@@ -198,6 +215,9 @@ class GenericNumeratorComputation {
   // an offset subtracted from the logprobs of transitions out of the first
   // state of each graph to help reduce numerical problems.
   Vector<BaseFloat> offsets_;
+
+  // Configuration options
+  const GenericNumeratorComputationOptions &opts_;
 };
 
 }  // namespace chain
diff --git a/src/chain/chain-training.cc b/src/chain/chain-training.cc
index d20ecfa4c1e..db98f698689 100644
--- a/src/chain/chain-training.cc
+++ b/src/chain/chain-training.cc
@@ -94,6 +94,7 @@ void ComputeChainObjfAndDerivE2e(const ChainTrainingOptions &opts,
                                  BaseFloat *weight,
                                  CuMatrixBase<BaseFloat> *nnet_output_deriv,
                                  CuMatrix<BaseFloat> *xent_output_deriv) {
+  NVTX_RANGE(__func__);
   BaseFloat num_logprob_weighted, den_logprob_weighted;
   bool denominator_ok = true;
   bool numerator_ok = true;
@@ -136,7 +137,8 @@ void ComputeChainObjfAndDerivE2e(const ChainTrainingOptions &opts,
 
 
   {
-    GenericNumeratorComputation numerator(supervision, nnet_output);
+    GenericNumeratorComputation numerator(opts.numerator_opts,
+                                          supervision, nnet_output);
     // note: supervision.weight is included as a factor in the derivative from
     // the numerator object, as well as the returned logprob.
     if (xent_output_deriv) {
@@ -211,6 +213,7 @@ void ComputeChainObjfAndDeriv(const ChainTrainingOptions &opts,
                               BaseFloat *weight,
                               CuMatrixBase<BaseFloat> *nnet_output_deriv,
                               CuMatrix<BaseFloat> *xent_output_deriv) {
+  NVTX_RANGE(__func__);
   if (!supervision.e2e_fsts.empty()) {
     ComputeChainObjfAndDerivE2e(opts, den_graph, supervision,
                                 nnet_output, objf, l2_term,
diff --git a/src/chain/chain-training.h b/src/chain/chain-training.h
index 3e7efbb59a1..edcfbbf17f3 100644
--- a/src/chain/chain-training.h
+++ b/src/chain/chain-training.h
@@ -34,6 +34,7 @@
 #include "hmm/transition-model.h"
 #include "chain/chain-den-graph.h"
 #include "chain/chain-supervision.h"
+#include "chain/chain-generic-numerator.h"
 
 namespace kaldi {
 namespace chain {
@@ -93,7 +94,12 @@ struct ChainTrainingOptions {
                    "nonzero, the network is expected to have an output "
                    "named 'output-xent', which should have a softmax as "
                    "its final nonlinearity.");
+
+    numerator_opts.Register(opts);
   }
+  
+  // Config for numerator graph object
+  GenericNumeratorComputationOptions numerator_opts;
 };
 
 
diff --git a/src/chainbin/Makefile b/src/chainbin/Makefile
index 41ac7342d17..955033c9d84 100644
--- a/src/chainbin/Makefile
+++ b/src/chainbin/Makefile
@@ -11,7 +11,9 @@ BINFILES = chain-est-phone-lm chain-get-supervision chain-make-den-fst \
         nnet3-chain-shuffle-egs nnet3-chain-subset-egs \
         nnet3-chain-acc-lda-stats nnet3-chain-train nnet3-chain-compute-prob \
         nnet3-chain-combine nnet3-chain-normalize-egs \
-        nnet3-chain-e2e-get-egs nnet3-chain-compute-post
+        nnet3-chain-e2e-get-egs nnet3-chain-compute-post \
+        chain-make-num-fst-e2e \
+		nnet3-chain-train2 nnet3-chain-combine2
 
 
 OBJFILES =
diff --git a/src/chainbin/chain-make-num-fst-e2e.cc b/src/chainbin/chain-make-num-fst-e2e.cc
new file mode 100644
index 00000000000..19b79438d3f
--- /dev/null
+++ b/src/chainbin/chain-make-num-fst-e2e.cc
@@ -0,0 +1,121 @@
+// chainbin/chain-make-num-fst-e2e.cc
+
+// Copyright 2020  Yiwen Shao
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+/** @brief Converts fsts (containing transition-ids) to fsts (containing pdf-ids + 1).
+*/
+#include "base/kaldi-common.h"
+#include "gmm/am-diag-gmm.h"
+#include "hmm/transition-model.h"
+#include "hmm/hmm-utils.h"
+#include "util/common-utils.h"
+#include "fst/fstlib.h"
+
+namespace kaldi {
+
+bool FstTransitionToPdfPlusOne(const fst::StdVectorFst &fst_transition,
+			       const TransitionModel &trans_model,
+			       fst::StdVectorFst *fst_pdf) {
+  fst::StdVectorFst fst_tmp(fst_transition);
+  fst::RemoveEpsLocal(&fst_tmp);
+  fst::RmEpsilon(&fst_tmp);
+  // first change labels to pdf-id + 1
+  int32 num_states = fst_tmp.NumStates();
+  for (int32 state = 0; state < num_states; state++) {
+    for (fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_tmp, state);
+         !aiter.Done(); aiter.Next()) {
+      const fst::StdArc &arc = aiter.Value();
+      if (arc.ilabel == 0) {
+        KALDI_WARN << "Utterance rejected due to eps on input label";
+        return false;
+      }
+      KALDI_ASSERT(arc.ilabel != 0);
+      fst::StdArc arc2(arc);
+      arc2.ilabel = arc2.olabel = trans_model.TransitionIdToPdf(arc.ilabel) + 1;
+      aiter.SetValue(arc2);
+    }
+  }
+  *fst_pdf = fst_tmp;
+  return true;
+}
+
+bool AddWeightToFst(const fst::StdVectorFst &normalization_fst,
+		    fst::StdVectorFst *fst) {
+  // Note: by default, 'Compose' will call 'Connect', so if the
+  // resulting FST is not connected, it will end up empty.
+  fst::StdVectorFst composed_fst;
+  fst::Compose(*fst, normalization_fst,
+	       &composed_fst);
+  *fst = composed_fst;
+  if (composed_fst.NumStates() == 0)
+    return false;
+  return true;
+}
+
+}
+
+int main(int argc, char *argv[]) {
+  using namespace kaldi;
+  typedef kaldi::int32 int32;
+  try {
+    const char *usage =
+      "Converts chain e2e numerator fst (containing transition-ids) to fst (containing pdf-ids+1, \n"
+      "and composed by the normalization fst) \n"
+      "Usage:  chain-make-num-fst-e2e [options] <model> <normalization-fst>\n"
+      "<trainsition-fst-rspecifier> <pdf-fst-wspecifier>\n"
+        "e.g.: \n"
+        " chain-make-num-fst-e2e 1.mdl ark:1.fst ark,t:-\n";
+    ParseOptions po(usage);
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 4) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string model_filename = po.GetArg(1),
+      normalization_fst_rxfilename = po.GetArg(2),
+      fsts_rspecifier = po.GetArg(3),
+      fsts_wspecifier = po.GetArg(4);
+
+    TransitionModel trans_model;
+    ReadKaldiObject(model_filename, &trans_model);
+
+    fst::StdVectorFst normalization_fst;
+    ReadFstKaldi(normalization_fst_rxfilename, &normalization_fst);
+
+    SequentialTableReader<fst::VectorFstHolder> fsts_reader(fsts_rspecifier);
+    TableWriter<fst::VectorFstHolder> fsts_writer(fsts_wspecifier);
+
+    int32 num_done = 0;
+    for (; !fsts_reader.Done(); fsts_reader.Next()) {
+      std::string key = fsts_reader.Key();
+      fst::VectorFst<fst::StdArc> fst_transition(fsts_reader.Value());
+      fst::StdVectorFst fst_pdf;
+      FstTransitionToPdfPlusOne(fst_transition, trans_model, &fst_pdf);
+      AddWeightToFst(normalization_fst, &fst_pdf);
+      fsts_writer.Write(key, fst_pdf);
+      num_done++;
+    }
+    KALDI_LOG << "Converted " << num_done << " Fsts with transition-id to Fsts with pdf-id and normalized.";
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/chainbin/nnet3-chain-combine2.cc b/src/chainbin/nnet3-chain-combine2.cc
new file mode 100644
index 00000000000..81d9ba5f061
--- /dev/null
+++ b/src/chainbin/nnet3-chain-combine2.cc
@@ -0,0 +1,224 @@
+// chainbin/nnet3-chain-combine.cc
+
+// Copyright 2012-2015  Johns Hopkins University (author:  Daniel Povey)
+//                2017  Yiming Wang
+//                2019   Srikanth Madikeri (Idiap Research Institute)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "nnet3/nnet-utils.h"
+#include "nnet3/nnet-compute.h"
+#include "nnet3/nnet-chain-diagnostics.h"
+#include "nnet3/nnet-chain-diagnostics2.h"
+
+
+namespace kaldi {
+namespace nnet3 {
+
+// Computes and returns the objective function for the examples in 'egs' given
+// the model in 'nnet'. If either of batchnorm/dropout test modes is true, we
+// make a copy of 'nnet', set test modes on that and evaluate its objective.
+// Note: the object that prob_computer->nnet_ refers to should be 'nnet'.
+double ComputeObjf(bool batchnorm_test_mode, bool dropout_test_mode,
+                   std::vector<std::pair<std::string, NnetChainExample> > &egs, const Nnet &nnet,
+                   const chain::ChainTrainingOptions &chain_config,
+                   NnetChainModel2 &model,
+                   NnetChainComputeProb2 *prob_computer) {
+  if (batchnorm_test_mode || dropout_test_mode) {
+    Nnet nnet_copy(nnet);
+    if (batchnorm_test_mode)
+      SetBatchnormTestMode(true, &nnet_copy);
+    if (dropout_test_mode)
+      SetDropoutTestMode(true, &nnet_copy);
+    NnetComputeProbOptions compute_prob_opts;
+    NnetChainComputeProb2 prob_computer_test(compute_prob_opts, chain_config,
+        model, nnet_copy);
+    return ComputeObjf(false, false, egs, nnet_copy,
+                       chain_config, model, &prob_computer_test);
+  } else {
+    prob_computer->Reset();
+    std::vector<std::pair<std::string, NnetChainExample> >::iterator iter = egs.begin(),
+                                                   end = egs.end();
+    for (; iter != end; ++iter) {
+      std::string lang_name = "default";
+      ParseFromQueryString(iter->first, "lang", &lang_name);
+      prob_computer->Compute(lang_name, (*iter).second);
+    }
+
+    double tot_weight = 0.0;
+    double tot_objf = prob_computer->GetTotalObjective(&tot_weight);
+
+    KALDI_ASSERT(tot_weight > 0.0);
+    // inf/nan tot_objf->return -inf objective.
+    if (!(tot_objf == tot_objf && tot_objf - tot_objf == 0))
+      return -std::numeric_limits<double>::infinity();
+    // we prefer to deal with normalized objective functions.
+    return tot_objf / tot_weight;
+  }
+}
+
+// Updates moving average over num_models nnets, given the average over
+// previous (num_models - 1) nnets, and the new nnet.
+void UpdateNnetMovingAverage(int32 num_models,
+    const Nnet &nnet, Nnet *moving_average_nnet) {
+  KALDI_ASSERT(NumParameters(nnet) == NumParameters(*moving_average_nnet));
+  ScaleNnet((num_models - 1.0) / num_models, moving_average_nnet);
+  AddNnet(nnet, 1.0 / num_models, moving_average_nnet);
+}
+
+}
+}
+
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace kaldi::nnet3;
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+
+    const char *usage =
+        "Using a subset of training or held-out nnet3+chain examples, compute\n"
+        "the average over the first n nnet models where we maximize the\n"
+        "'chain' objective function for n. Note that the order of models has\n"
+        "been reversed before feeding into this binary. So we are actually\n"
+        "combining last n models.\n"
+        "Inputs and outputs are nnet3 raw nnets.\n"
+        "\n"
+        "Usage:  nnet3-chain-combine [options] <den-fst> <raw-nnet-in1> <raw-nnet-in2> ... <raw-nnet-inN> <chain-examples-in> <raw-nnet-out>\n"
+        "\n"
+        "e.g.:\n"
+        " nnet3-combine den.fst 35.raw 36.raw 37.raw 38.raw ark:valid.cegs final.raw\n";
+
+    bool binary_write = true;
+    int32 max_objective_evaluations = 30;
+    bool batchnorm_test_mode = false,
+        dropout_test_mode = true;
+    std::string use_gpu = "yes";
+    chain::ChainTrainingOptions chain_config;
+    NnetChainTraining2Options opts;
+
+    ParseOptions po(usage);
+    po.Register("binary", &binary_write, "Write output in binary mode");
+    po.Register("max-objective-evaluations", &max_objective_evaluations, "The "
+                "maximum number of objective evaluations in order to figure "
+                "out the best number of models to combine. It helps to speedup "
+                "if the number of models provided to this binary is quite "
+                "large (e.g. several hundred)."); 
+    po.Register("use-gpu", &use_gpu,
+                "yes|no|optional|wait, only has effect if compiled with CUDA");
+    po.Register("batchnorm-test-mode", &batchnorm_test_mode,
+                "If true, set test-mode to true on any BatchNormComponents "
+                "while evaluating objectives.");
+    po.Register("dropout-test-mode", &dropout_test_mode,
+                "If true, set test-mode to true on any DropoutComponents and "
+                "DropoutMaskComponents while evaluating objectives.");
+
+    chain_config.Register(&po);
+    opts.Register(&po);
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() < 4) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+#if HAVE_CUDA==1
+    CuDevice::Instantiate().SelectGpuId(use_gpu);
+#endif
+
+    std::string
+        den_fst_dirname = po.GetArg(1),
+        raw_nnet_rxfilename = po.GetArg(2),
+        valid_examples_rspecifier = po.GetArg(po.NumArgs() - 1),
+        nnet_wxfilename = po.GetArg(po.NumArgs());
+
+    Nnet nnet;
+    ReadKaldiObject(raw_nnet_rxfilename, &nnet);
+    NnetChainModel2 model(opts, &nnet, den_fst_dirname);
+    Nnet moving_average_nnet(nnet), best_nnet(nnet);
+    NnetComputeProbOptions compute_prob_opts;
+    NnetChainComputeProb2 prob_computer(compute_prob_opts, chain_config,
+        model, moving_average_nnet);
+
+    std::vector<std::pair<std::string, NnetChainExample> > egs;
+    egs.reserve(10000);  // reserve a lot of space to minimize the chance of
+                         // reallocation.
+
+    { // This block adds training examples to "egs".
+      SequentialNnetChainExampleReader example_reader(
+          valid_examples_rspecifier);
+      for (; !example_reader.Done(); example_reader.Next())
+        egs.push_back(std::make_pair(example_reader.Key(), example_reader.Value()));
+      KALDI_LOG << "Read " << egs.size() << " examples.";
+      KALDI_ASSERT(!egs.empty());
+    }
+
+    // first evaluates the objective using the last model.
+    int32 best_num_to_combine = 1;
+    double
+        init_objf = ComputeObjf(batchnorm_test_mode, dropout_test_mode,
+            egs, moving_average_nnet, chain_config, model, &prob_computer),
+        best_objf = init_objf;
+    KALDI_LOG << "objective function using the last model is " << init_objf;
+
+    int32 num_nnets = po.NumArgs() - 3;
+    // then each time before we re-evaluate the objective function, we will add
+    // num_to_add models to the moving average.
+    int32 num_to_add = (num_nnets + max_objective_evaluations - 1) /
+                       max_objective_evaluations;
+    for (int32 n = 1; n < num_nnets; n++) {
+      std::string this_nnet_rxfilename = po.GetArg(n + 2);
+      ReadKaldiObject(this_nnet_rxfilename, &nnet);
+      // updates the moving average
+      UpdateNnetMovingAverage(n + 1, nnet, &moving_average_nnet);
+      // evaluates the objective everytime after adding num_to_add model or
+      // all the models to the moving average.
+      if ((n - 1) % num_to_add == num_to_add - 1 || n == num_nnets - 1) {
+        double objf = ComputeObjf(batchnorm_test_mode, dropout_test_mode,
+            egs, moving_average_nnet, chain_config, model, &prob_computer);
+        KALDI_LOG << "Combining last " << n + 1
+                  << " models, objective function is " << objf;
+        if (objf > best_objf) {
+          best_objf = objf;
+          best_nnet = moving_average_nnet;
+          best_num_to_combine = n + 1;
+        }
+      }
+    }
+    KALDI_LOG << "Combining " << best_num_to_combine
+              << " nnets, objective function changed from " << init_objf
+              << " to " << best_objf;
+
+    if (HasBatchnorm(nnet))
+      RecomputeStats2(egs, chain_config, model, &best_nnet);
+
+#if HAVE_CUDA==1
+    CuDevice::Instantiate().PrintProfile();
+#endif
+
+    WriteKaldiObject(best_nnet, nnet_wxfilename, binary_write);
+    KALDI_LOG << "Finished combining neural nets, wrote model to "
+              << nnet_wxfilename;
+  } catch(const std::exception &e) {
+    std::cerr << e.what() << '\n';
+    return -1;
+  }
+}
+
diff --git a/src/chainbin/nnet3-chain-get-egs.cc b/src/chainbin/nnet3-chain-get-egs.cc
index 1032b7e2125..9a53ef8ed9d 100644
--- a/src/chainbin/nnet3-chain-get-egs.cc
+++ b/src/chainbin/nnet3-chain-get-egs.cc
@@ -95,7 +95,7 @@ static bool ProcessFile(const TransitionModel *trans_mdl,
                         const VectorBase<BaseFloat> *deriv_weights,
                         int32 supervision_length_tolerance,
                         const std::string &utt_id,
-                        bool compress,
+                        bool compress, bool long_key,
                         UtteranceSplitter *utt_splitter,
                         NnetChainExampleWriter *example_writer) {
   KALDI_ASSERT(supervision.num_sequences == 1);
@@ -228,9 +228,14 @@ static bool ProcessFile(const TransitionModel *trans_mdl,
       nnet_chain_eg.Compress();
 
     std::ostringstream os;
-    os << utt_id << "-" << chunk.first_frame;
+    if (long_key)
+      os << utt_id
+         << "-" << chunk.first_frame << "-" << chunk.left_context
+         << "-" << chunk.num_frames << "-" << chunk.right_context << "-v1";
+    else  // key is <utt_id>-<frame_id>
+      os << utt_id << "-" << chunk.first_frame;
 
-    std::string key = os.str(); // key is <utt_id>-<frame_id>
+    std::string key = os.str(); 
 
     example_writer->Write(key, nnet_chain_eg);
   }
@@ -265,7 +270,7 @@ int main(int argc, char *argv[]) {
         "Note: the --frame-subsampling-factor option must be the same as given to\n"
         "chain-get-supervision.\n";
 
-    bool compress = true;
+    bool compress = true, long_key = false;
     int32 length_tolerance = 100, online_ivector_period = 1,
           supervision_length_tolerance = 1;
 
@@ -283,7 +288,7 @@ int main(int argc, char *argv[]) {
                 "in compressed format (recommended).  Update: this is now "
                 "only relevant if the features being read are un-compressed; "
                 "if already compressed, we keep we same compressed format when "
-                "dumping-egs.");
+                "dumping egs.");
     po.Register("ivectors", &online_ivector_rspecifier, "Alias for "
                 "--online-ivectors option, for back compatibility");
     po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier of "
@@ -311,6 +316,8 @@ int main(int argc, char *argv[]) {
                 "Filename of transition model to read; should only be supplied "
                 "if you want 'unconstrained' egs, and if you supplied "
                 "--convert-to-pdfs=false to chain-get-supervision.");
+    po.Register("long-key", &long_key, "If true, a long format will be used "
+                "for the key, which encodes context info, etc.");
 
     eg_config.Register(&po);
 
@@ -426,7 +433,7 @@ int main(int argc, char *argv[]) {
         if (!ProcessFile(trans_mdl_ptr, normalization_fst, feats,
                          online_ivector_feats, online_ivector_period,
                          supervision, deriv_weights, supervision_length_tolerance,
-                         key, compress,
+                         key, compress, long_key,
                          &utt_splitter, &example_writer))
           num_err++;
       }
diff --git a/src/chainbin/nnet3-chain-merge-egs.cc b/src/chainbin/nnet3-chain-merge-egs.cc
index a3686d2fc30..46df29bd95e 100644
--- a/src/chainbin/nnet3-chain-merge-egs.cc
+++ b/src/chainbin/nnet3-chain-merge-egs.cc
@@ -62,12 +62,25 @@ int main(int argc, char *argv[]) {
 
     merging_config.ComputeDerived();
     ChainExampleMerger merger(merging_config, &example_writer);
-    for (; !example_reader.Done(); example_reader.Next()) {
-      const NnetChainExample &cur_eg = example_reader.Value();
-      merger.AcceptExample(new NnetChainExample(cur_eg));
+    if(!merging_config.multilingual_eg) {
+        for (; !example_reader.Done(); example_reader.Next()) {
+          const NnetChainExample &cur_eg = example_reader.Value();
+          merger.AcceptExample(new NnetChainExample(cur_eg));
+        }
+        // the merger itself prints the necessary diagnostics.
+        merger.Finish();
+    } else {
+        for (; !example_reader.Done(); example_reader.Next()) {
+          const NnetChainExample &cur_eg = example_reader.Value();
+          const std::string &key = example_reader.Key();
+          std::string lang_name;
+          ParseFromQueryString(key, "lang", &lang_name);
+          // change output name to output-lang
+          auto new_cur_eg = new NnetChainExample(cur_eg);
+          new_cur_eg->outputs[0].name = "output-" + lang_name;
+          merger.AcceptExample(new_cur_eg);
+        }
     }
-    // the merger itself prints the necessary diagnostics.
-    merger.Finish();
     return merger.ExitStatus();
   } catch(const std::exception &e) {
     std::cerr << e.what() << '\n';
diff --git a/src/chainbin/nnet3-chain-train.cc b/src/chainbin/nnet3-chain-train.cc
index 536669a17d3..7342f0cafd8 100644
--- a/src/chainbin/nnet3-chain-train.cc
+++ b/src/chainbin/nnet3-chain-train.cc
@@ -22,7 +22,6 @@
 #include "nnet3/nnet-chain-training.h"
 #include "cudamatrix/cu-allocator.h"
 
-
 int main(int argc, char *argv[]) {
   try {
     using namespace kaldi;
@@ -53,6 +52,9 @@ int main(int argc, char *argv[]) {
                 "yes|no|optional|wait, only has effect if compiled with CUDA");
 
     opts.Register(&po);
+#if HAVE_CUDA==1
+    CuDevice::RegisterDeviceOptions(&po);
+#endif
     RegisterCuAllocatorOptions(&po);
 
     po.Read(argc, argv);
diff --git a/src/chainbin/nnet3-chain-train2.cc b/src/chainbin/nnet3-chain-train2.cc
new file mode 100644
index 00000000000..083b2637c8b
--- /dev/null
+++ b/src/chainbin/nnet3-chain-train2.cc
@@ -0,0 +1,105 @@
+// nnet3bin/nnet3-chain-train.cc
+
+// Copyright 2015  Johns Hopkins University (author: Daniel Povey)
+//           2019    Idiap Research Institute (author: Srikanth Madikeri)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "nnet3/nnet-chain-training2.h"
+#include "cudamatrix/cu-allocator.h"
+
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace kaldi::nnet3;
+    using namespace kaldi::chain;
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+
+    const char *usage =
+        "Train nnet3+chain neural network parameters with backprop and stochastic\n"
+        "gradient descent.  Minibatches are to be created by nnet3-chain-merge-egs in\n"
+        "the input pipeline.  This training program is single-threaded (best to\n"
+        "use it with a GPU).\n"
+        "\n"
+        "Usage:  nnet3-chain-train [options] <raw-nnet-in> <den-fst-dir> <chain-training-examples-in> <raw-nnet-out>\n"
+        "\n"
+        "nnet3-chain-train 1.raw den.fst 'ark:nnet3-merge-egs 1.cegs ark:-|' 2.raw\n";
+
+    int32 srand_seed = 0;
+    bool binary_write = true;
+    std::string use_gpu = "yes";
+    NnetChainTraining2Options opts;
+
+    ParseOptions po(usage);
+    po.Register("srand", &srand_seed, "Seed for random number generator ");
+    po.Register("binary", &binary_write, "Write output in binary mode");
+    po.Register("use-gpu", &use_gpu,
+                "yes|no|optional|wait, only has effect if compiled with CUDA");
+
+    opts.Register(&po);
+    RegisterCuAllocatorOptions(&po);
+
+    po.Read(argc, argv);
+
+    srand(srand_seed);
+
+    if (po.NumArgs() != 4) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+#if HAVE_CUDA==1
+    CuDevice::Instantiate().SelectGpuId(use_gpu);
+#endif
+
+    std::string nnet_rxfilename = po.GetArg(1),
+        den_fst_dirname = po.GetArg(2),
+        examples_rspecifier = po.GetArg(3),
+        nnet_wxfilename = po.GetArg(4);
+
+    Nnet nnet;
+    ReadKaldiObject(nnet_rxfilename, &nnet);
+
+    bool ok;
+
+    {
+      NnetChainModel2 model(opts, &nnet, den_fst_dirname);
+      NnetChainTrainer2 trainer(opts, model, &nnet);
+
+      SequentialNnetChainExampleReader example_reader(examples_rspecifier);
+
+      for (; !example_reader.Done(); example_reader.Next())
+        trainer.Train(example_reader.Key(), example_reader.Value());
+
+      ok = trainer.PrintTotalStats();
+    }
+
+#if HAVE_CUDA==1
+    CuDevice::Instantiate().PrintProfile();
+#endif
+    WriteKaldiObject(nnet, nnet_wxfilename, binary_write);
+    KALDI_LOG << "Wrote raw model to " << nnet_wxfilename;
+    return (ok ? 0 : 1);
+  } catch(const std::exception &e) {
+    std::cerr << e.what() << '\n';
+    return -1;
+  }
+}
+
diff --git a/src/configure b/src/configure
index e6ffdf337af..1186c6d12a8 100755
--- a/src/configure
+++ b/src/configure
@@ -70,9 +70,11 @@ Configuration options:
   --static              Build and link against static libraries [default=no]
   --shared              Build and link against shared libraries [default=no]
   --use-cuda            Build with CUDA [default=yes]
+  --with-cudadecoder    Build with CUDA decoder [default=yes]
   --cudatk-dir=DIR      CUDA toolkit directory
   --cuda-arch=FLAGS     Override the default CUDA_ARCH flags. See:
          https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#nvcc-examples.
+  --debug-level=N       Use assertion level 0 (disabled), 1, or 2 [default=1]
   --double-precision    Build with BaseFloat set to double if yes [default=no],
                         mostly useful for testing purposes.
   --static-fst          Build with static OpenFst libraries [default=no]
@@ -248,6 +250,8 @@ function linux_configure_mkllibdir {
     echo $mklroot/lib/em64t
   elif [ -d $mklroot/lib/intel64 ]; then
     echo $mklroot/lib/intel64
+  elif [ -d $mklroot/lib ]; then
+    echo $mklroot/lib
   else
     return 1;
   fi
@@ -499,11 +503,10 @@ function configure_cuda {
     echo >> kaldi.mk
     echo "# CUDA configuration" >> kaldi.mk
     echo >> kaldi.mk
+
     echo CUDA = true >> kaldi.mk
     echo CUDATKDIR = $CUDATKDIR >> kaldi.mk
     echo "CUDA_ARCH = $CUDA_ARCH" >> kaldi.mk
-    
-    
     echo >> kaldi.mk
 
     # 64bit/32bit? We do not support cross compilation with CUDA so, use direct
@@ -524,7 +527,7 @@ WARNING: CUDA will not be used!
          CUDA is not supported with 32-bit builds."
       exit 1;
     fi
-    
+
     #add cusolver flags for newer toolkits
     if [ "$CUSOLVER" == "true" ]; then
       echo "CUDA_LDLIBS += -lcusolver" >> kaldi.mk
@@ -739,9 +742,11 @@ ENV_LDFLAGS=$LDFLAGS
 ENV_LDLIBS=$LDLIBS
 
 # Default configuration
+debug_level=1
 double_precision=false
 dynamic_kaldi=false
 use_cuda=true
+with_cudadecoder=1
 static_fst=false
 static_math=false
 threaded_atlas=false
@@ -771,6 +776,9 @@ do
     static_math=false;
     static_fst=false;
     shift ;;
+  --debug-level=*)
+    GetSwitchValueOrDie debug_level "$1"
+    shift ;;
   --double-precision)
     double_precision=true;
     shift ;;
@@ -813,6 +821,15 @@ do
   --use-cuda=no)
     use_cuda=false;
     shift ;;
+  --with-cudadecoder)
+    with_cudadecoder=1;
+    shift ;;
+  --with-cudadecoder=yes)
+    with_cudadecoder=1;
+    shift ;;
+  --with-cudadecoder=no)
+    with_cudadecoder=0;
+    shift ;;
   --static-math)
     static_math=true;
     shift ;;
@@ -901,6 +918,11 @@ do
   esac
 done
 
+case "$debug_level" in
+  [012]) ;;
+  *) failure "Invalid value --debug-level=$debug_level. Supported values are 0, 1, and 2." ;;
+esac
+
 # The idea here is that if you change the configuration options from using
 # CUDA to not using it, or vice versa, we want to recompile all parts of the
 # code that may use a GPU. Touching this file is a way to force this.
@@ -1033,6 +1055,7 @@ if $dynamic_kaldi ; then
   echo "KALDI_FLAVOR := dynamic" >> kaldi.mk
   echo "KALDILIBDIR := $KALDILIBDIR" >> kaldi.mk
 fi
+echo "DEBUG_LEVEL = $debug_level" >> kaldi.mk
 if $double_precision; then
   echo "DOUBLE_PRECISION = 1" >> kaldi.mk
 else
@@ -1079,6 +1102,8 @@ else
   echo "CUBROOT = $CUBROOT" >> kaldi.mk
 fi
 
+echo "WITH_CUDADECODER = $with_cudadecoder" >> kaldi.mk
+echo >> kaldi.mk
 
 # OS-specific steps given below append to kaldi.mk
 echo "Doing OS specific configurations ..."
diff --git a/src/cudadecoder/Makefile b/src/cudadecoder/Makefile
index 166f72e060f..278faa5ce67 100644
--- a/src/cudadecoder/Makefile
+++ b/src/cudadecoder/Makefile
@@ -1,9 +1,10 @@
 all:
-
+		
 EXTRA_CXXFLAGS = -Wno-sign-compare
 include ../kaldi.mk
 
 ifeq ($(CUDA), true)
+ifneq ($(WITH_CUDADECODER), 0)
 
 # Make sure we have CUDA_ARCH from kaldi.mk,
 ifndef CUDA_ARCH
@@ -12,11 +13,12 @@ endif
 
 TESTFILES =
 
-OBJFILES = batched-threaded-nnet3-cuda-pipeline.o decodable-cumatrix.o \
-           cuda-decoder.o cuda-decoder-kernels.o cuda-fst.o
-
-LDFLAGS += $(CUDA_LDFLAGS)
-LDLIBS += $(CUDA_LDLIBS)
+OBJFILES = cuda-decoder.o cuda-decoder-kernels.o cuda-fst.o \
+	   batched-threaded-nnet3-cuda-online-pipeline.o \
+	   batched-threaded-nnet3-cuda-pipeline.o \
+	   batched-threaded-nnet3-cuda-pipeline2.o \
+	   batched-static-nnet3.o batched-static-nnet3-kernels.o \
+	   decodable-cumatrix.o
 
 LIBNAME = kaldi-cudadecoder
 
@@ -26,9 +28,19 @@ ADDLIBS = ../cudamatrix/kaldi-cudamatrix.a ../base/kaldi-base.a ../matrix/kaldi-
           ../tree/kaldi-tree.a ../online2/kaldi-online2.a ../nnet3/kaldi-nnet3.a \
 					../cudafeat/kaldi-cudafeat.a
 
+LDFLAGS += $(CUDA_LDFLAGS)
+LDLIBS += $(CUDA_LDLIBS)
+
+
 # Implicit rule for kernel compilation
 %.o : %.cu
 	$(CUDATKDIR)/bin/nvcc -c $< -o $@ $(CUDA_INCLUDE) $(CUDA_FLAGS) $(CUDA_ARCH) -I../ -I$(OPENFSTINC)
+
+else
+all:
+		$(warning "Not building cudadecoder extension -- to build with it, configure with --with-cudadecoder[=true]")
+endif
+
 endif
 
 include ../makefiles/default_rules.mk
diff --git a/src/cudadecoder/batched-static-nnet3-kernels.cu b/src/cudadecoder/batched-static-nnet3-kernels.cu
new file mode 100644
index 00000000000..f02a78ed1af
--- /dev/null
+++ b/src/cudadecoder/batched-static-nnet3-kernels.cu
@@ -0,0 +1,208 @@
+// cudadecoder/batched-static-nnet3-kernels.cu
+//
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cudadecoder/batched-static-nnet3-kernels.h"
+
+#include <stdio.h>
+namespace kaldi {
+namespace cuda_decoder {
+
+__global__ void build_batch_with_context_kernel(
+    BatchedStaticNnet3KernelParams params) {
+  for (int batch_slot = blockIdx.z; batch_slot < params.batch_size;
+       batch_slot += gridDim.z) {
+    BatchSlotAssignment batch_assign =
+        params.d_batch_slot_assignement[batch_slot];
+    const BaseFloat *d_batch_slot_features = batch_assign.d_features;
+    BaseFloat *d_channel_context =
+        &params
+             .d_all_context_frames[batch_assign.ichannel *
+                                   params.d_all_context_frames_channel_stride];
+    BaseFloat *d_batch_slot_with_context =
+        &params.d_batch_with_context[params.d_batch_with_context_batch_stride *
+                                     batch_slot];
+
+    int n_frames_available =
+        batch_assign.n_frames_already_in_context + batch_assign.n_new_frames;
+    int n_frames_to_set = n_frames_available;
+    int n_left_context_frames_from_frame0 = 0;
+    if (batch_assign.n_frames_already_in_context == 0) {
+      // First chunk for that utterance. Generating left context by duplicating
+      // frame0
+      n_frames_to_set += params.total_nnet_left_context;
+      n_left_context_frames_from_frame0 = params.total_nnet_left_context;
+    }
+
+    for (int iframe = blockIdx.y; iframe < n_frames_to_set;
+         iframe += gridDim.y) {
+      for (int idim = threadIdx.x; idim < params.input_dim;
+           idim += blockDim.x) {
+        if (iframe < n_left_context_frames_from_frame0) {
+          d_batch_slot_with_context
+              [iframe * params.d_batch_with_context_frame_stride + idim] =
+                  d_batch_slot_features[0 + idim];  // frame 0
+        } else if (iframe < (n_left_context_frames_from_frame0 +
+                             batch_assign.n_frames_already_in_context)) {
+          // Those are the frames coming from context
+          int src_iframe_in_saved_context =
+              iframe - n_left_context_frames_from_frame0;
+          d_batch_slot_with_context[iframe *
+                                        params
+                                            .d_batch_with_context_frame_stride +
+                                    idim] =
+              d_channel_context[src_iframe_in_saved_context *
+                                    params.d_all_context_frames_frame_stride +
+                                idim];
+        } else {
+          // Now we are moving the frames coming from the new chunk
+          int src_iframe_in_new_chunk =
+              iframe - n_left_context_frames_from_frame0 -
+              batch_assign.n_frames_already_in_context;
+          d_batch_slot_with_context
+              [iframe * params.d_batch_with_context_frame_stride + idim] =
+                  d_batch_slot_features[src_iframe_in_new_chunk *
+                                            params.d_features_frame_stride +
+                                        idim];
+        }
+      }
+
+      if (iframe == 0 &&
+          params.d_batch_ivectors) {  // one CTA moves the ivectors
+        for (int idim = threadIdx.x; idim < params.ivector_dim;
+             idim += blockDim.x) {
+          params.d_batch_ivectors[batch_slot * params.d_batch_ivectors_stride +
+                                  idim] = batch_assign.d_ivectors[idim];
+        }
+      }
+    }
+  }
+}
+
+void BuildBatchWithContextKernel(const dim3 &grid, const dim3 &block,
+                                 const cudaStream_t &stream,
+                                 const BatchedStaticNnet3KernelParams &params) {
+  build_batch_with_context_kernel<<<grid, block, 0, stream>>>(params);
+}
+
+__global__ void build_batch_with_context_context_flush_kernel(
+    BatchedStaticNnet3KernelParams params) {
+  for (int batch_slot = blockIdx.z; batch_slot < params.batch_size;
+       batch_slot += gridDim.z) {
+    BatchSlotAssignment batch_assign =
+        params.d_batch_slot_assignement[batch_slot];
+    BaseFloat *d_channel_context =
+        &params
+             .d_all_context_frames[batch_assign.ichannel *
+                                   params.d_all_context_frames_channel_stride];
+    BaseFloat *d_batch_slot_with_context =
+        &params.d_batch_with_context[params.d_batch_with_context_batch_stride *
+                                     batch_slot];
+
+    int n_frames_in_context = batch_assign.n_frames_already_in_context;
+    int n_frames_to_set = n_frames_in_context + params.total_nnet_right_context;
+
+    for (int iframe = blockIdx.y; iframe < n_frames_to_set;
+         iframe += gridDim.y) {
+      for (int idim = threadIdx.x; idim < params.input_dim;
+           idim += blockDim.x) {
+        if (iframe < n_frames_in_context) {
+          d_batch_slot_with_context
+              [iframe * params.d_batch_with_context_frame_stride +
+               idim] = d_channel_context
+                  [iframe * params.d_all_context_frames_frame_stride + idim];
+        } else if (iframe < n_frames_to_set) {
+          // Generating right context from last frame
+          int src_iframe_in_saved_context = n_frames_in_context - 1;
+          d_batch_slot_with_context[iframe *
+                                        params
+                                            .d_batch_with_context_frame_stride +
+                                    idim] =
+              d_channel_context[src_iframe_in_saved_context *
+                                    params.d_all_context_frames_frame_stride +
+                                idim];
+        }
+      }
+
+      if (iframe == 0 &&
+          params.d_batch_ivectors) {  // one CTA moves the ivectors
+        for (int idim = threadIdx.x; idim < params.ivector_dim;
+             idim += blockDim.x) {
+          params.d_batch_ivectors[batch_slot * params.d_batch_ivectors_stride +
+                                  idim] = batch_assign.d_ivectors[idim];
+        }
+      }
+    }
+  }
+}
+
+void BuildBatchWithContextKernelContextFlush(
+    const dim3 &grid, const dim3 &block, const cudaStream_t &stream,
+    const BatchedStaticNnet3KernelParams &params) {
+  build_batch_with_context_context_flush_kernel<<<grid, block, 0, stream>>>(
+      params);
+}
+
+__global__ void save_context_from_batch_kernel(
+    BatchedStaticNnet3KernelParams params) {
+  for (int batch_slot = blockIdx.z; batch_slot < params.batch_size;
+       batch_slot += gridDim.z) {
+    BatchSlotAssignment batch_assign =
+        params.d_batch_slot_assignement[batch_slot];
+
+    // Real frames : does not include frame0 copies for left context
+    int n_real_frames_available =
+        batch_assign.n_frames_already_in_context + batch_assign.n_new_frames;
+    // total frames : includes frame0 copies
+    int total_frames_in_batch_slot = n_real_frames_available;
+    if (batch_assign.n_frames_already_in_context == 0) {
+      // First chunk for that utterance. We generated left context by
+      // duplicating frame0
+      total_frames_in_batch_slot += params.total_nnet_left_context;
+    }
+    // total frames : includes frame0 copies
+    int n_to_copy = min(total_frames_in_batch_slot, params.total_nnet_context);
+    int copy_from_frame = total_frames_in_batch_slot - n_to_copy;
+    BaseFloat *d_batch_slot_with_context =
+        &params.d_batch_with_context[params.d_batch_with_context_batch_stride *
+                                     batch_slot];
+    BaseFloat *d_channel_context =
+        &params
+             .d_all_context_frames[batch_assign.ichannel *
+                                   params.d_all_context_frames_channel_stride];
+
+    for (int dst_iframe = blockIdx.y; dst_iframe < n_to_copy;
+         dst_iframe += gridDim.y) {
+      int src_iframe = copy_from_frame + dst_iframe;
+      for (int idim = threadIdx.x; idim < params.input_dim;
+           idim += blockDim.x) {
+        d_channel_context[dst_iframe *
+                              params.d_all_context_frames_frame_stride +
+                          idim] = d_batch_slot_with_context
+            [src_iframe * params.d_batch_with_context_frame_stride + idim];
+      }
+    }
+  }
+}
+
+void SaveContextFromBatchKernel(const dim3 &grid, const dim3 &block,
+                                const cudaStream_t &stream,
+                                const BatchedStaticNnet3KernelParams &params) {
+  save_context_from_batch_kernel<<<grid, block, 0, stream>>>(params);
+}
+
+}  // namespace cuda_decoder
+}  // namespace kaldi
diff --git a/src/cudadecoder/batched-static-nnet3-kernels.h b/src/cudadecoder/batched-static-nnet3-kernels.h
new file mode 100644
index 00000000000..45064e15071
--- /dev/null
+++ b/src/cudadecoder/batched-static-nnet3-kernels.h
@@ -0,0 +1,87 @@
+// cudadecoder/batched-static-nnet3-kernels.h
+//
+// Copyright (c) 2019; NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun
+//
+// Licensed under the Apache License; Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing; software
+// distributed under the License is distributed on an "AS IS" BASIS;
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND; either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+
+#include <cuda_runtime_api.h>
+#include "base/kaldi-types.h"
+
+#ifndef KALDI_CUDA_DECODER_BATCHED_STATIC_NNET3_KERNELS_H_
+#define KALDI_CUDA_DECODER_BATCHED_STATIC_NNET3_KERNELS_H_
+
+namespace kaldi {
+namespace cuda_decoder {
+
+// Describe what each batch slot is made of. Used by the context switch kernels
+struct BatchSlotAssignment {
+  BaseFloat *d_features;
+  BaseFloat *d_ivectors;
+  int ichannel;
+  int n_frames_already_in_context;
+  int n_new_frames;
+};
+
+struct BatchedStaticNnet3KernelParams {
+  const BaseFloat *d_all_new_features;
+  const BatchSlotAssignment *d_batch_slot_assignement;
+  BaseFloat *d_all_context_frames;
+  BaseFloat *d_batch_with_context;
+  BaseFloat *d_batch_ivectors;
+  int d_batch_ivectors_stride;
+  int batch_size;
+  int d_features_frame_stride;
+  int d_ivectors_frame_stride;
+  int d_all_context_frames_frame_stride;
+  int d_batch_with_context_frame_stride;
+  int d_all_context_frames_channel_stride;
+  int d_batch_with_context_batch_stride;
+  int input_dim;
+  int ivector_dim;
+  int total_nnet_context;
+  int total_nnet_left_context;
+  int total_nnet_right_context;
+  int input_frames_per_chunk_with_context;
+};
+
+// Takes as a input strided new chunks ptrs [chk0, chk1, chk2..]
+// associated to channels [ch0, ch1, ch2...]
+// And build a continuous batch such as:
+// Batch with context:
+// row0: [left_context(ch0), chk0]
+// row0: [left_context(ch1), chk1]
+// row0: [left_context(ch2), chk2]
+// With left context being either part of a previous chunk for that channel, or
+// just duplications of frame0 if this is the first chunk for that channel The
+// end of each chunk for each row will then be used as a right context
+void BuildBatchWithContextKernel(const dim3 &grid, const dim3 &block,
+                                 const cudaStream_t &stream,
+                                 const BatchedStaticNnet3KernelParams &params);
+
+// Same thing than BuildBatchWithContextKernelContextFlush, except that the
+// final frame is replicated to create the right context
+void BuildBatchWithContextKernelContextFlush(
+    const dim3 &grid, const dim3 &block, const cudaStream_t &stream,
+    const BatchedStaticNnet3KernelParams &params);
+void SaveContextFromBatchKernel(const dim3 &grid, const dim3 &block,
+                                const cudaStream_t &stream,
+                                const BatchedStaticNnet3KernelParams &params);
+
+}  // namespace cuda_decoder
+}  // namespace kaldi
+
+#endif  // KALDI_CUDA_DECODER_BATCHED_STATIC_NNET3_KERNELS_H_
+#endif  // HAVE_CUDA
diff --git a/src/cudadecoder/batched-static-nnet3.cc b/src/cudadecoder/batched-static-nnet3.cc
new file mode 100644
index 00000000000..87736a12bd0
--- /dev/null
+++ b/src/cudadecoder/batched-static-nnet3.cc
@@ -0,0 +1,393 @@
+// cudadecoder/batched-static-nnet3.cc
+//
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+
+#include "cudadecoder/batched-static-nnet3.h"
+#include "nnet3/nnet-utils.h"
+
+namespace kaldi {
+namespace cuda_decoder {
+
+void BatchedStaticNnet3::ReadParametersFromModelAndConfig() {
+  input_frames_per_chunk_ = config_.compute_opts.frames_per_chunk;
+  int32 nnet_left_context, nnet_right_context;
+  nnet3::ComputeSimpleNnetContext(am_nnet_.GetNnet(), &nnet_left_context,
+                                  &nnet_right_context);
+  total_nnet_left_context_ =
+      nnet_left_context + config_.compute_opts.extra_left_context;
+  total_nnet_right_context_ =
+      nnet_right_context + config_.compute_opts.extra_right_context;
+  total_nnet_context_ = total_nnet_left_context_ + total_nnet_right_context_;
+  subsampling_factor_ = config_.compute_opts.frame_subsampling_factor,
+  input_frames_per_chunk_ = config_.compute_opts.frames_per_chunk;
+  input_frames_per_chunk_with_context_ = input_frames_per_chunk_ +
+                                         total_nnet_left_context_ +
+                                         total_nnet_right_context_;
+  output_frames_per_chunk_ =
+      (subsampling_factor_ - 1 + input_frames_per_chunk_) / subsampling_factor_;
+  KALDI_ASSERT(output_frames_per_chunk_ > 0);
+
+  input_dim_ = am_nnet_.InputDim();
+  if (has_ivector_) ivector_dim_ = am_nnet_.IvectorDim();
+}
+
+void BatchedStaticNnet3::PresetKernelParams() {
+  //   context_switch_kernel_params_.d_all_new_features; <- To be set when
+  //   called
+  context_switch_kernel_params_.d_batch_slot_assignement =
+      d_batch_slot_assignement_;
+  context_switch_kernel_params_.d_all_context_frames =
+      d_all_context_frames_.Data();
+  context_switch_kernel_params_.d_all_context_frames_frame_stride =
+      d_all_context_frames_.Stride();
+  context_switch_kernel_params_.d_all_context_frames_channel_stride =
+      d_all_context_frames_.Stride() * total_nnet_context_;
+  // context_switch_kernel_params_.d_batch_with_context = <- To be set
+  // when called
+  //      d_batch_with_context_.Data();
+  //  context_switch_kernel_params_.batch_size; <- To be set when called
+  // context_switch_kernel_params_.d_all_new_features_stride = <- To be
+  // set when called
+
+  context_switch_kernel_params_.input_dim = input_dim_;
+  context_switch_kernel_params_.ivector_dim = ivector_dim_;
+  context_switch_kernel_params_.total_nnet_context = total_nnet_context_;
+  context_switch_kernel_params_.total_nnet_left_context =
+      total_nnet_left_context_;
+  context_switch_kernel_params_.total_nnet_right_context =
+      total_nnet_right_context_;
+  context_switch_kernel_params_.input_frames_per_chunk_with_context =
+      input_frames_per_chunk_with_context_;
+}
+
+void BatchedStaticNnet3::Allocate() {
+  cudaEventCreate(&batch_slot_assignement_copy_evt_);
+  d_all_context_frames_.Resize(nchannels_ * total_nnet_context_, input_dim_);
+  d_batch_with_context_.Resize(
+      max_batch_size_ * input_frames_per_chunk_with_context_, input_dim_);
+  if (has_ivector_) d_batch_ivectors_.Resize(max_batch_size_, ivector_dim_);
+  cudaMalloc(&d_batch_slot_assignement_,
+             max_batch_size_ * sizeof(*d_batch_slot_assignement_));
+  cudaMallocHost(&h_batch_slot_assignement_,
+                 max_batch_size_ * sizeof(*h_batch_slot_assignement_));
+  channel_n_frames_in_context_.resize(nchannels_, -1);
+  st_ = cudaStreamPerThread;
+  PresetKernelParams();
+}
+
+void BatchedStaticNnet3::Deallocate() {
+  cudaFreeHost(h_batch_slot_assignement_);
+  cudaFreeHost(d_batch_slot_assignement_);
+  cudaEventDestroy(batch_slot_assignement_copy_evt_);
+}
+
+void BatchedStaticNnet3::CompileNnet3() {
+  SetComputationRequest();
+  config_.compute_opts.compiler_config.cache_capacity +=
+      max_batch_size_ * input_frames_per_chunk_;
+  compiler_.reset(new nnet3::CachingOptimizingCompiler(
+      am_nnet_.GetNnet(), config_.compute_opts.compiler_config));
+  computation_ = compiler_->Compile(request_);
+}
+
+void BatchedStaticNnet3::SetComputationRequest() {
+  request_.need_model_derivative = false;
+  request_.store_component_stats = false;
+  request_.inputs.reserve(2);
+
+  int32 num_input_frames = input_frames_per_chunk_ + total_nnet_left_context_ +
+                           total_nnet_right_context_;
+  int32 first_input_t = 0 - total_nnet_left_context_;
+  int32 num_output_frames = output_frames_per_chunk_;
+  int32 output_t_stride = subsampling_factor_;
+
+  std::vector<nnet3::Index> input_indexes, ivector_indexes, output_indexes;
+  input_indexes.reserve(nnet3_batch_size_ * num_input_frames);
+  output_indexes.reserve(nnet3_batch_size_ * num_output_frames);
+  if (has_ivector_) ivector_indexes.reserve(nnet3_batch_size_);
+  for (int32 n = 0; n < nnet3_batch_size_; n++) {
+    for (int32 t = first_input_t; t < first_input_t + num_input_frames; t++) {
+      input_indexes.push_back(nnet3::Index(n, t, 0));
+    }
+    if (config_.has_ivector) ivector_indexes.push_back(nnet3::Index(n, 0, 0));
+    for (int32 t = 0; t < num_output_frames; t++)
+      output_indexes.push_back(nnet3::Index(n, t * output_t_stride, 0));
+  }
+  request_.inputs.push_back(nnet3::IoSpecification("input", input_indexes));
+  if (has_ivector_)
+    request_.inputs.push_back(
+        nnet3::IoSpecification("ivector", ivector_indexes));
+  request_.outputs.push_back(nnet3::IoSpecification("output", output_indexes));
+}
+
+void BatchedStaticNnet3::BatchContextSwitch(
+    const std::vector<int> &channels,
+    const std::vector<BaseFloat *> &d_features, const int features_frame_stride,
+    const std::vector<BaseFloat *> &d_ivectors,
+    const std::vector<int> &n_input_frames_valid, bool flush_eos_context,
+    std::vector<int> *n_output_frames_valid) {
+  int batch_size = channels.size();
+
+  // AcceptInput destroys input, resizing
+  d_batch_with_context_.Resize(
+      max_batch_size_ * input_frames_per_chunk_with_context_, input_dim_);
+  if (has_ivector_) d_batch_ivectors_.Resize(max_batch_size_, ivector_dim_);
+
+  n_output_frames_valid->resize(batch_size);
+
+  cudaEventSynchronize(
+      batch_slot_assignement_copy_evt_);  // reusing same pinned memory
+  for (int i = 0; i < channels.size(); ++i) {
+    int channel = channels[i];
+    int nframes_in_context = channel_n_frames_in_context_[channel];
+    int ninput_frames = n_input_frames_valid[i];
+
+    KALDI_ASSERT(ninput_frames <= input_frames_per_chunk_);
+    h_batch_slot_assignement_[i].d_features = d_features[i];
+    h_batch_slot_assignement_[i].d_ivectors =
+        has_ivector_ ? d_ivectors[i] : NULL;
+    h_batch_slot_assignement_[i].ichannel = channel;
+    h_batch_slot_assignement_[i].n_frames_already_in_context =
+        nframes_in_context;
+    h_batch_slot_assignement_[i].n_new_frames = ninput_frames;
+
+    // Left context will be generated as necessary (copying first
+    // frame) However we must have a full right context to start
+    // decoding frames
+    KALDI_ASSERT(!flush_eos_context || ninput_frames == 0);
+    int nframes_in_batch = ninput_frames;
+    if (nframes_in_context == 0)
+      nframes_in_batch += total_nnet_left_context_;  // using frame0 as left
+                                                     // context
+    else
+      nframes_in_batch += nframes_in_context;
+    if (flush_eos_context)
+      nframes_in_batch += total_nnet_right_context_;  // using last frame as
+                                                      // right context
+    KALDI_ASSERT(
+        "Please set --frames-per-chunk at least as large as the neural net "
+        "right context" &&
+        input_frames_per_chunk_ >= total_nnet_right_context_);
+
+    channel_n_frames_in_context_[channel] =
+        std::min(nframes_in_batch, total_nnet_context_);
+
+    // Computing number of output frames
+    int total_nframes_minus_context =
+        std::max(0, nframes_in_batch - total_nnet_context_);
+    int total_output_nframes =
+        (total_nframes_minus_context + subsampling_factor_ - 1) /
+        subsampling_factor_;
+    (*n_output_frames_valid)[i] = total_output_nframes;
+  }
+  context_switch_kernel_params_.batch_size = batch_size;
+  context_switch_kernel_params_.d_features_frame_stride = features_frame_stride;
+  context_switch_kernel_params_.d_batch_with_context =
+      d_batch_with_context_.Data();
+  context_switch_kernel_params_.d_batch_with_context_frame_stride =
+      d_batch_with_context_.Stride();
+  context_switch_kernel_params_.d_batch_ivectors =
+      has_ivector_ ? d_batch_ivectors_.Data() : NULL;
+  context_switch_kernel_params_.d_batch_ivectors_stride =
+      has_ivector_ ? d_batch_ivectors_.Stride() : 0;
+  context_switch_kernel_params_.d_batch_with_context_batch_stride =
+      d_batch_with_context_.Stride() * input_frames_per_chunk_with_context_;
+
+  cudaMemcpyAsync(d_batch_slot_assignement_, h_batch_slot_assignement_,
+                  batch_size * sizeof(*d_batch_slot_assignement_),
+                  cudaMemcpyHostToDevice, st_);
+  cudaEventRecord(batch_slot_assignement_copy_evt_, st_);
+
+  dim3 grid = {1,
+               static_cast<unsigned int>(input_frames_per_chunk_with_context_),
+               static_cast<unsigned int>(batch_size)};
+  dim3 block = {
+      64, 1,
+      1};  // Expecting chunks in the order of magnitude of 64 frames. It will
+           // still work with any numbers of frames per chunk, this only impacts
+           // performance. This kernel is not a bottleneck anyway
+  if (flush_eos_context) {
+    BuildBatchWithContextKernelContextFlush(grid, block, st_,
+                                            context_switch_kernel_params_);
+  } else {
+    BuildBatchWithContextKernel(grid, block, st_,
+                                context_switch_kernel_params_);
+    SaveContextFromBatchKernel(grid, block, st_, context_switch_kernel_params_);
+  }
+}
+
+void BatchedStaticNnet3::RunNnet3(CuMatrix<BaseFloat> *d_all_log_posteriors,
+                                  int batch_size) {
+  for (int off = 0; off < batch_size; off += nnet3_batch_size_) {
+    // Nnet3 destroys input, resizing
+    d_nnet3_input_.Resize(
+        nnet3_batch_size_ * input_frames_per_chunk_with_context_, input_dim_);
+    if (has_ivector_) d_nnet3_ivectors_.Resize(nnet3_batch_size_, ivector_dim_);
+
+    int minibatch_size = std::min(nnet3_batch_size_, batch_size - off);
+    {
+      // Copy minibatch from batch : mfcc
+      int frames_per_minibatch =
+          minibatch_size * input_frames_per_chunk_with_context_;
+      CuSubMatrix<BaseFloat> dst =
+          d_nnet3_input_.RowRange(0, frames_per_minibatch);
+      CuSubMatrix<BaseFloat> src = d_batch_with_context_.RowRange(
+          off * input_frames_per_chunk_with_context_, frames_per_minibatch);
+      dst.CopyFromMat(src);
+    }
+
+    if (has_ivector_) {
+      // Copy minibatch from batch : ivectors
+      CuSubMatrix<BaseFloat> dst =
+          d_nnet3_ivectors_.RowRange(0, minibatch_size);
+      CuSubMatrix<BaseFloat> src =
+          d_batch_ivectors_.RowRange(off, minibatch_size);
+      dst.CopyFromMat(src);
+    }
+
+    // Using pre-compiled computation_
+    nnet3::NnetComputer computer(config_.compute_opts.compute_config,
+                                 *computation_, am_nnet_.GetNnet(), NULL);
+
+    computer.AcceptInput("input", &d_nnet3_input_);
+    if (has_ivector_) computer.AcceptInput("ivector", &d_nnet3_ivectors_);
+    computer.Run();
+
+    d_nnet3_output_ = computer.GetOutput("output");
+
+    {
+      int output_rows_per_minibatch = minibatch_size * output_frames_per_chunk_;
+
+      // Copy nnet3 minibatch output to batch
+      CuSubMatrix<BaseFloat> src =
+          d_nnet3_output_.RowRange(0, output_rows_per_minibatch);
+      CuSubMatrix<BaseFloat> dst = d_all_log_posteriors->RowRange(
+          off * output_frames_per_chunk_, output_rows_per_minibatch);
+      dst.CopyFromMat(src);
+    }
+  }
+
+  // Postprocessing of the loglikehoods
+  if (log_priors_.Dim() != 0)
+    d_all_log_posteriors->AddVecToRows(-1.0, log_priors_);
+  if (config_.compute_opts.acoustic_scale != 1.0f)
+    d_all_log_posteriors->Scale(config_.compute_opts.acoustic_scale);
+}
+
+void BatchedStaticNnet3::RunBatch(
+    const std::vector<int> &channels,
+    const std::vector<BaseFloat *> &d_features, const int features_stride,
+    const std::vector<BaseFloat *> &d_ivectors,
+    const std::vector<int> &n_input_frames_valid,
+    const std::vector<bool> &is_first_chunk,
+    const std::vector<bool> &is_last_chunk,
+    CuMatrix<BaseFloat> *d_all_log_posteriors,
+    std::vector<std::vector<std::pair<int, BaseFloat *>>>
+        *all_frames_log_posteriors_ptrs) {
+  KALDI_ASSERT(d_features.size() == channels.size());
+  KALDI_ASSERT(is_last_chunk.size() == channels.size());
+  KALDI_ASSERT(is_first_chunk.size() == channels.size());
+  if (has_ivector_) {
+    KALDI_ASSERT(d_ivectors.size() == channels.size());
+  }
+  // Initializing the new channels
+  for (size_t i = 0; i < is_first_chunk.size(); ++i) {
+    if (is_first_chunk[i]) InitChannel(channels[i]);
+  }
+
+  all_frames_log_posteriors_ptrs
+      ->clear();  // will start setting output frames now
+
+  //
+  // Step1: Processing chunks in d_features
+  //
+
+  // Building a continuous execution batch made of the current assignements,
+  // while adding left and right context to the chunks
+  BatchContextSwitch(channels, d_features, features_stride, d_ivectors,
+                     n_input_frames_valid, false, &n_output_frames_valid_);
+  // Running this batch
+  RunNnet3(d_all_log_posteriors, channels.size());
+  // Building the list of pointers to output frames. Will be used by the decoder
+  FormatOutputPtrs(channels, d_all_log_posteriors,
+                   all_frames_log_posteriors_ptrs, n_output_frames_valid_);
+
+  //
+  // Step2: Flushing context for chunks with is_last_chunk set
+  //
+
+  eos_channels_.clear();
+  d_eos_features_.clear();
+  d_eos_ivectors_.clear();
+  eos_n_input_frames_valid_.clear();
+  eos_n_output_frames_offset_.clear();
+  for (int i = 0; i < channels.size(); ++i) {
+    if (!is_last_chunk[i]) continue;
+    eos_channels_.push_back(channels[i]);
+    d_eos_features_.push_back(NULL);  // the context will serve as features
+    if (has_ivector_) d_eos_ivectors_.push_back(d_ivectors[i]);
+    eos_n_input_frames_valid_.push_back(0);
+    eos_n_output_frames_offset_.push_back(
+        n_output_frames_valid_[i]);  // append to previously generated frames
+                                     // (in step1)
+  }
+
+  if (!eos_channels_.empty()) {
+    BatchContextSwitch(eos_channels_, d_eos_features_, 0, d_eos_ivectors_,
+                       eos_n_input_frames_valid_, /* flush context */ true,
+                       &eos_n_output_frames_valid_);
+    d_all_eos_log_posteriors_.Resize(d_all_log_posteriors->NumRows(),
+                                     d_all_log_posteriors->NumCols());
+    RunNnet3(&d_all_eos_log_posteriors_, eos_channels_.size());
+    FormatOutputPtrs(eos_channels_, &d_all_eos_log_posteriors_,
+                     all_frames_log_posteriors_ptrs, eos_n_output_frames_valid_,
+                     &eos_n_output_frames_offset_);
+  }
+}
+
+void BatchedStaticNnet3::FormatOutputPtrs(
+    const std::vector<int> &channels, CuMatrix<BaseFloat> *d_all_log_posteriors,
+    std::vector<std::vector<std::pair<int, BaseFloat *>>>
+        *all_frames_log_posteriors_ptrs,
+    const std::vector<int> &n_output_frames_valid,
+    const std::vector<int> *n_output_frames_valid_offset) {
+  // Build the list of pointers to output frames. Will be used by the decoder
+  KALDI_ASSERT(channels.size() == n_output_frames_valid.size());
+  for (int i = 0; i < channels.size(); ++i) {
+    int ichannel = channels[i];
+    int offset =
+        (n_output_frames_valid_offset) ? (*n_output_frames_valid_offset)[i] : 0;
+    int total_output_nframes = offset + n_output_frames_valid[i];
+    if (all_frames_log_posteriors_ptrs->size() < total_output_nframes)
+      all_frames_log_posteriors_ptrs->resize(total_output_nframes);
+    for (int iframe = offset; iframe < total_output_nframes; ++iframe) {
+      std::vector<std::pair<int, BaseFloat *>> &this_frame =
+          (*all_frames_log_posteriors_ptrs)[iframe];
+      int local_iframe = iframe - offset;
+      CuSubVector<BaseFloat> out = d_all_log_posteriors->Row(
+          i * output_frames_per_chunk_ + local_iframe);
+      BaseFloat *frame = out.Data();
+      this_frame.push_back({ichannel, frame});
+    }
+  }
+}
+
+}  // namespace cuda_decoder
+}  // namespace kaldi
+
+#endif  // HAVE_CUDA
diff --git a/src/cudadecoder/batched-static-nnet3.h b/src/cudadecoder/batched-static-nnet3.h
new file mode 100644
index 00000000000..df03e924854
--- /dev/null
+++ b/src/cudadecoder/batched-static-nnet3.h
@@ -0,0 +1,227 @@
+// cudadecoder/batched-static-nnet3.h
+//
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+
+#ifndef KALDI_CUDA_DECODER_BATCHED_STATIC_NNET3_H_
+#define KALDI_CUDA_DECODER_BATCHED_STATIC_NNET3_H_
+
+// Following define is NOT an upper bound for max_batch_size
+// It only concerns the nnet3 compiled computation
+// If we use a batch size > MAX_COMPUTE_BATCH_SIZE, we will run nnet3
+// multiple times, each computing minibatches of size MAX_COMPUTE_BATCH_SIZE
+// MAX_COMPUTE_BATCH_SIZE is defined to be big enough to hide kernel launch
+// latency and increase the arithmetic intensity of the GEMMs
+// not not bigger so that running partial batches is faster
+// (e.g. running a batch size = 72 with max_batch_size_=512)
+#define MAX_COMPUTE_BATCH_SIZE 64
+
+#include "cudadecoder/batched-static-nnet3-kernels.h"
+#include "nnet3/am-nnet-simple.h"
+#include "nnet3/nnet-am-decodable-simple.h"
+#include "nnet3/nnet-compute.h"
+#include "nnet3/nnet-optimize.h"
+
+namespace kaldi {
+namespace cuda_decoder {
+
+struct BatchedStaticNnet3Config {
+  BatchedStaticNnet3Config()
+      : max_batch_size(200), nchannels(-1), has_ivector(false) {}
+  nnet3::NnetSimpleComputationOptions compute_opts;
+  int max_batch_size;
+  int nchannels;
+  bool has_ivector;  // probably can be deducted from am_nnet?
+};
+
+// Light driver for Nnet3. Compiles the nnet only once and reuse it.
+// It is cheaper to waste some computation by adding partial chunks to a batch
+// than recompiling a nnet3 computation just for that chunk (and running smaller
+// batches, because each batch would be specialized to a specific chunk
+// size/batch size)
+// Also takes care of storing/restoring left/right context, generating initial
+// context/final context, flushing this context.
+// Supports context switch with ivectors
+class BatchedStaticNnet3 {
+ public:
+  BatchedStaticNnet3(const BatchedStaticNnet3Config &config,
+                     const nnet3::AmNnetSimple &am_nnet)
+      : config_(config),
+        am_nnet_(am_nnet),
+        max_batch_size_(config.max_batch_size),
+        has_ivector_(config.has_ivector),
+        log_priors_(am_nnet.Priors()) {
+    nchannels_ = (config.nchannels != -1) ? config.nchannels : max_batch_size_;
+    KALDI_ASSERT(max_batch_size_ > 0);
+    nnet3_batch_size_ = std::min(max_batch_size_, MAX_COMPUTE_BATCH_SIZE);
+    KALDI_ASSERT(nchannels_ >= max_batch_size_);
+    ReadParametersFromModelAndConfig();
+    CompileNnet3();
+    Allocate();
+  }
+
+  virtual ~BatchedStaticNnet3() { Deallocate(); }
+
+  // Receives a batch with a set of chunks (at most one chunk per channel).
+  // Restore contextes, run nnet3, save the context for next RunBatch.
+  // Pointers to the output frames are set in all_frames_log_posteriors
+  //
+  // For each batch slot i:
+  // 	- channels[i] is the associated channel.
+  // 	- d_features[i] points to a submatrix of features. It is made of
+  // mfcc_dim*n_input_frames_valid[i] BaseFloats
+  // 	- d_ivectors[i] is the ivector to use for this nnet3 run, if ivectors
+  // are available.
+  // 	- n_input_frames_valid[i] how many frames can be read from d_features.
+  // It can be strictly less than frames_per_chunk, for instance for the last
+  // chunk
+  // 	- is_first_chunk[i] set <=> first chunk for that channel. Will reset
+  // left context
+  // 	- is_last_chunk[i] set <=> last chunk for that channel. Will flush right
+  // context
+  //    - d_all_log_posteriors where to store the output frames. Could be owned
+  //    by that class (the decoder is supposed to access those frames through
+  //    all_frames_log_posteriors
+  //    - all_frames_log_posteriors. For each output frame index (dim1), list
+  //    all the channels which have a valid frame, and the corresponding pointer
+  //    in memory.
+  //
+  //    E.g.: We called RunBatch with channels = {4,7} Channels 4 has
+  //    2 valid output frames, channel 7 has 3 valid output frames.
+  //    all_frames_log_posteriors = [
+  //    [[4,ptr0,4],[7,ptr0,7]],
+  //    [[4,ptr1,4],[7,ptr1,7]],
+  //    [[7,ptr2,7]],
+  //    ]
+  //    with ptri,j the pointer to the output frame i for channel j.
+  //    frame i is a local indexing: the first frame for channel j for this
+  //    RunBatch call will always be 0, even if other output frames have already
+  //    been generated for that channel in previous RunBatch calls.
+  void RunBatch(const std::vector<int> &channels,
+                const std::vector<BaseFloat *> &d_features,
+                const int features_stride,
+                const std::vector<BaseFloat *> &d_ivectors,
+                const std::vector<int> &n_input_frames_valid,
+                const std::vector<bool> &is_first_chunk,
+                const std::vector<bool> &is_last_chunk,
+                CuMatrix<BaseFloat> *d_all_log_posteriors,
+                std::vector<std::vector<std::pair<int, BaseFloat *>>>
+                    *all_frames_log_posteriors);
+
+  // Nnet3 puts the output frames in the matrix all_frames_log_posteriors_ptrs
+  // However, we still have to only consider "valid" output frames.
+  // See RunBatch comments for a description of the output
+  // n_output_frames_valid_offset describes how many valid output frames we
+  // already have in all_frames_log_posteriors_ptrs for each channel
+  void FormatOutputPtrs(
+      const std::vector<int> &channels,
+      CuMatrix<BaseFloat> *d_all_log_posteriors,
+      std::vector<std::vector<std::pair<int, BaseFloat *>>>
+          *all_frames_log_posteriors_ptrs,
+      const std::vector<int> &n_output_frames_valid,
+      const std::vector<int> *n_output_frames_valid_offset = NULL);
+
+  int GetNOutputFramesPerChunk() { return output_frames_per_chunk_; }
+  int GetTotalNnet3RightContext() { return total_nnet_right_context_; }
+
+ private:
+  // Compiling nnet3 using that computation request
+  void ReadParametersFromModelAndConfig();
+  // Define the computation request for nnet3 based on parameters
+  void SetComputationRequest();
+  void Allocate();
+  void PresetKernelParams();
+  void Deallocate();
+  void CompileNnet3();
+  // Run Nnet3 itself. Divides the execution batch into smaller nnet3 batches
+  // That nnet3 batch size is choosen so that we saturate the GPU, but we still
+  // keep the smallest batch size possible to have a better granularity with
+  // partial batches
+  void RunNnet3(CuMatrix<BaseFloat> *d_all_log_posteriors, int batch_size);
+  void BatchContextSwitch(const std::vector<int> &channels,
+                          const std::vector<BaseFloat *> &d_features,
+                          const int features_stride,
+                          const std::vector<BaseFloat *> &d_ivectors,
+                          const std::vector<int> &n_input_frames_valid,
+                          bool flush_eos_context,
+                          std::vector<int> *n_output_frames_valid);
+  void InitChannel(int32 ichannel) {
+    KALDI_ASSERT(ichannel < nchannels_);
+    channel_n_frames_in_context_[ichannel] = 0;
+  }
+
+  BatchedStaticNnet3Config config_;
+  cudaStream_t st_;
+  nnet3::AmNnetSimple am_nnet_;
+  int max_batch_size_;
+  int nnet3_batch_size_;  // Cf RunNnet3. Batch size for the execution for nnet3
+  int nchannels_;  // Number of possible channels. Each channel owns a context.
+  bool has_ivector_;
+  CuVector<BaseFloat> log_priors_;
+
+  // Extracted from config or models
+  int input_dim_;    // mfcc dim
+  int ivector_dim_;  // ivector dim
+  int input_frames_per_chunk_;
+  int input_frames_per_chunk_with_context_;  // input_frames_per_chunk_ with
+                                             // left and right context
+  int total_nnet_left_context_;
+  int total_nnet_right_context_;
+  int total_nnet_context_;
+  int output_frames_per_chunk_;
+  int subsampling_factor_;
+
+  // Storing frames which will be used in future context
+  // If the channel has just been resetted, those frames are empty.
+  // Otherwise, it contains at most total_nnet_context_ frames
+  CuMatrix<BaseFloat> d_all_context_frames_;
+  CuMatrix<BaseFloat> d_batch_with_context_;
+  CuMatrix<BaseFloat> d_nnet3_input_;
+  CuMatrix<BaseFloat> d_nnet3_ivectors_;
+  CuMatrix<BaseFloat> d_nnet3_output_;
+  CuMatrix<BaseFloat> d_batch_ivectors_;
+  CuMatrix<BaseFloat> d_all_log_posteriors_;
+  CuMatrix<BaseFloat> d_all_eos_log_posteriors_;
+  // batch slot assignement. Size [max_batch_size]
+  BatchSlotAssignment *d_batch_slot_assignement_;
+  BatchSlotAssignment *h_batch_slot_assignement_;
+  BatchedStaticNnet3KernelParams context_switch_kernel_params_;
+  cudaEvent_t batch_slot_assignement_copy_evt_;
+  // Number of frames already stored in context
+  // Size [nchannels]
+  // If channel not initialized, equals to -1
+  std::vector<int> channel_n_frames_in_context_;
+  std::vector<int> n_output_frames_valid_;
+
+  // Used to flush context at eos (end of sequence)
+  std::vector<int> eos_channels_;
+  std::vector<BaseFloat *> d_eos_features_;
+  std::vector<BaseFloat *> d_eos_ivectors_;
+  std::vector<int> eos_n_input_frames_valid_;
+  std::vector<int> eos_n_output_frames_valid_;
+  std::vector<int> eos_n_output_frames_offset_;
+
+  std::unique_ptr<nnet3::CachingOptimizingCompiler> compiler_;
+  std::shared_ptr<const nnet3::NnetComputation>
+      computation_;  // shared because returned as shared by compiler
+  nnet3::ComputationRequest request_;
+};
+}  // namespace cuda_decoder
+}  // namespace kaldi
+
+#endif  // KALDI_CUDA_DECODER_BATCHED_STATIC_NNET3_H_
+#endif  // HAVE_CUDA
diff --git a/src/cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.cc b/src/cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.cc
new file mode 100644
index 00000000000..6fe87ee3dc7
--- /dev/null
+++ b/src/cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.cc
@@ -0,0 +1,476 @@
+// cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.cc
+//
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+
+#define KALDI_CUDA_DECODER_WAIT_FOR_CALLBACKS_US 10000
+#define KALDI_CUDA_DECODER_WAIT_FOR_CPU_FEATURES_THREADS_US 1000
+#define KALDI_CUDA_DECODER_WAIT_FOR_AVAILABLE_CHANNEL_US 1000
+
+#include "cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.h"
+#include <nvToolsExt.h>
+#include "feat/feature-window.h"
+#include "lat/lattice-functions.h"
+#include "nnet3/nnet-utils.h"
+
+namespace kaldi {
+namespace cuda_decoder {
+void BatchedThreadedNnet3CudaOnlinePipeline::Initialize(
+    const fst::Fst<fst::StdArc> &decode_fst) {
+  ReadParametersFromModel();
+  AllocateAndInitializeData(decode_fst);
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::AllocateAndInitializeData(
+    const fst::Fst<fst::StdArc> &decode_fst) {
+  d_all_features_.Resize(max_batch_size_ * input_frames_per_chunk_, input_dim_,
+                         kUndefined, kStrideEqualNumCols);
+
+  if (config_.use_gpu_feature_extraction) {
+    h_all_waveform_.Resize(max_batch_size_, samples_per_chunk_, kUndefined,
+                           kStrideEqualNumCols);
+    cudaHostRegister(h_all_waveform_.Data(), h_all_waveform_.SizeInBytes(),
+                     cudaHostRegisterDefault);
+    d_all_waveform_.Resize(max_batch_size_, samples_per_chunk_, kUndefined,
+                           kStrideEqualNumCols);
+  } else {
+    h_all_features_.Resize(max_batch_size_ * input_frames_per_chunk_,
+                           input_dim_, kUndefined, kStrideEqualNumCols);
+  }
+
+  if (use_ivectors_) {
+    d_all_ivectors_.Resize(max_batch_size_ * ivector_dim_, kSetZero);
+    h_all_ivectors_.Resize(max_batch_size_, ivector_dim_, kSetZero,
+                           kStrideEqualNumCols);
+  }
+
+  d_all_log_posteriors_.Resize(max_batch_size_ * output_frames_per_chunk_,
+                               trans_model_->NumPdfs(), kUndefined);
+  available_channels_.resize(config_.num_channels);
+  channels_callbacks_.resize(config_.num_channels);
+  std::iota(available_channels_.begin(), available_channels_.end(),
+            0);  // 0,1,2,3..
+  corr_id2channel_.reserve(config_.num_channels);
+  channel_frame_offset_.resize(config_.num_channels, 0);
+
+  // Feature extraction
+  if (config_.use_gpu_feature_extraction) {
+    gpu_feature_pipeline_.reset(new OnlineBatchedFeaturePipelineCuda(
+        config_.feature_opts, samples_per_chunk_, config_.max_batch_size,
+        config_.num_channels));
+  } else {
+    feature_pipelines_.resize(config_.num_channels);
+  }
+
+  // Decoder
+  cuda_fst_ = std::make_shared<CudaFst>();
+  cuda_fst_->Initialize(decode_fst, trans_model_);
+  cuda_decoder_.reset(new CudaDecoder(*cuda_fst_, config_.decoder_opts,
+                                      max_batch_size_, config_.num_channels));
+  if (config_.num_decoder_copy_threads > 0) {
+    cuda_decoder_->SetThreadPoolAndStartCPUWorkers(
+        thread_pool_.get(), config_.num_decoder_copy_threads);
+  }
+  n_samples_valid_.resize(max_batch_size_);
+  n_input_frames_valid_.resize(max_batch_size_);
+  n_lattice_callbacks_not_done_.store(0);
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::SetLatticeCallback(
+    CorrelationID corr_id,
+    const std::function<void(CompactLattice &)> &callback) {
+  auto it = corr_id2channel_.find(corr_id);
+  KALDI_ASSERT(it != corr_id2channel_.end());
+  ChannelId ichannel = it->second;
+  channels_callbacks_[ichannel].reset(
+      new std::function<void(CompactLattice &)>(callback));
+}
+
+bool BatchedThreadedNnet3CudaOnlinePipeline::TryInitCorrID(
+    CorrelationID corr_id, int wait_for) {
+  bool inserted;
+  decltype(corr_id2channel_.end()) it;
+  std::tie(it, inserted) = corr_id2channel_.insert({corr_id, -1});
+  int32 ichannel;
+  if (inserted) {
+    // The corr_id was not in use
+    std::unique_lock<std::mutex> lk(available_channels_m_);
+    bool channel_available = (available_channels_.size() > 0);
+    if (!channel_available) {
+      // We cannot use that corr_id
+      int waited_for = 0;
+      while (waited_for < wait_for) {
+        lk.unlock();
+        usleep(KALDI_CUDA_DECODER_WAIT_FOR_AVAILABLE_CHANNEL_US);
+        waited_for += KALDI_CUDA_DECODER_WAIT_FOR_AVAILABLE_CHANNEL_US;
+        lk.lock();
+        channel_available = (available_channels_.size() > 0);
+        if (channel_available) break;
+      }
+
+      // If still not available return
+      if (!channel_available) {
+        corr_id2channel_.erase(it);
+        return false;
+      }
+    }
+
+    ichannel = available_channels_.back();
+    available_channels_.pop_back();
+    it->second = ichannel;
+  } else {
+    // This corr id was already in use but not closed
+    // It can happen if for instance a channel lost connection and
+    // did not send its last chunk Cleaning up
+    KALDI_WARN << "This corr_id was already in use";
+    ichannel = it->second;
+  }
+  channels_callbacks_[ichannel].reset();
+
+  if (!config_.use_gpu_feature_extraction) {
+    KALDI_ASSERT(!feature_pipelines_[ichannel]);
+    feature_pipelines_[ichannel].reset(
+        new OnlineNnet2FeaturePipeline(*feature_info_));
+  }
+
+  channel_frame_offset_[ichannel] = 0;
+  return true;
+}  // namespace cuda_decoder
+
+void BatchedThreadedNnet3CudaOnlinePipeline::ComputeGPUFeatureExtraction(
+    const std::vector<int> &channels,
+    const std::vector<SubVector<BaseFloat>> &wave_samples,
+    const std::vector<bool> &is_first_chunk,
+    const std::vector<bool> &is_last_chunk) {
+  for (int i = 0; i < wave_samples.size(); ++i) {
+    const SubVector<BaseFloat> &src = wave_samples[i];
+    int size = src.Dim();
+    n_samples_valid_[i] = size;
+    const BaseFloat *wave_src = src.Data();
+    BaseFloat *wave_dst = h_all_waveform_.RowData(i);
+    std::memcpy(wave_dst, wave_src, size * sizeof(BaseFloat));
+  }
+  // CopyFromMat syncs, avoiding it
+  KALDI_ASSERT(d_all_waveform_.SizeInBytes() == h_all_waveform_.SizeInBytes());
+  cudaMemcpyAsync(d_all_waveform_.Data(), h_all_waveform_.Data(),
+                  h_all_waveform_.SizeInBytes(), cudaMemcpyHostToDevice,
+                  cudaStreamPerThread);
+
+  KALDI_ASSERT(channels.size() == is_last_chunk.size());
+  KALDI_ASSERT(channels.size() == is_first_chunk.size());
+
+  KALDI_ASSERT(gpu_feature_pipeline_);
+  gpu_feature_pipeline_->ComputeFeaturesBatched(
+      channels.size(), channels, n_samples_valid_, is_first_chunk,
+      is_last_chunk, model_frequency_, d_all_waveform_, &d_all_features_,
+      &d_all_ivectors_, &n_input_frames_valid_);
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::ComputeCPUFeatureExtraction(
+    const std::vector<int> &channels,
+    const std::vector<SubVector<BaseFloat>> &wave_samples,
+    const std::vector<bool> &is_last_chunk) {
+  // Will be used by worker threads to grab work
+  fe_threads_channels_ = &channels;
+  fe_threads_wave_samples_ = &wave_samples;
+
+  n_compute_features_not_done_.store(channels.size());
+
+  for (size_t i = 0; i < channels.size(); ++i) {
+    thread_pool_->Push(
+        {&BatchedThreadedNnet3CudaOnlinePipeline::ComputeOneFeatureWrapper,
+         this, i, 0});  // second argument "0" is not used
+  }
+
+  while (n_compute_features_not_done_.load(std::memory_order_acquire))
+    usleep(KALDI_CUDA_DECODER_WAIT_FOR_CPU_FEATURES_THREADS_US);
+
+  KALDI_ASSERT(d_all_features_.NumRows() == h_all_features_.NumRows() &&
+               d_all_features_.NumCols() == h_all_features_.NumCols());
+  cudaMemcpyAsync(d_all_features_.Data(), h_all_features_.Data(),
+                  h_all_features_.SizeInBytes(), cudaMemcpyHostToDevice,
+                  cudaStreamPerThread);
+  if (use_ivectors_) {
+    KALDI_ASSERT(d_all_ivectors_.Dim() >=
+                 (h_all_ivectors_.NumRows() * h_all_ivectors_.NumCols()));
+    cudaMemcpyAsync(d_all_ivectors_.Data(), h_all_ivectors_.Data(),
+                    h_all_ivectors_.SizeInBytes(), cudaMemcpyHostToDevice,
+                    cudaStreamPerThread);
+  }
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::DecodeBatch(
+    const std::vector<CorrelationID> &corr_ids,
+    const std::vector<SubVector<BaseFloat>> &wave_samples,
+    const std::vector<bool> &is_first_chunk,
+    const std::vector<bool> &is_last_chunk) {
+  nvtxRangePushA("DecodeBatch");
+  KALDI_ASSERT(corr_ids.size() > 0);
+  KALDI_ASSERT(corr_ids.size() == wave_samples.size());
+  KALDI_ASSERT(corr_ids.size() == is_last_chunk.size());
+
+  ListIChannelsInBatch(corr_ids, &channels_);
+
+  if (config_.use_gpu_feature_extraction)
+    ComputeGPUFeatureExtraction(channels_, wave_samples, is_first_chunk,
+                                is_last_chunk);
+  else
+    ComputeCPUFeatureExtraction(channels_, wave_samples, is_last_chunk);
+
+  d_features_ptrs_.clear();
+  d_ivectors_ptrs_.clear();
+  for (int i = 0; i < channels_.size(); ++i) {
+    d_features_ptrs_.push_back(d_all_features_.Data() +
+                               i * input_frames_per_chunk_ *
+                                   d_all_features_.Stride());
+    if (use_ivectors_) {
+      d_ivectors_ptrs_.push_back(d_all_ivectors_.Data() + i * ivector_dim_);
+    }
+  }
+  int features_frame_stride = d_all_features_.Stride();
+  DecodeBatch(corr_ids, d_features_ptrs_, features_frame_stride,
+              n_input_frames_valid_, d_ivectors_ptrs_, is_first_chunk,
+              is_last_chunk, &channels_);
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::DecodeBatch(
+    const std::vector<CorrelationID> &corr_ids,
+    const std::vector<BaseFloat *> &d_features, const int features_frame_stride,
+    const std::vector<int> &n_input_frames_valid,
+    const std::vector<BaseFloat *> &d_ivectors,
+    const std::vector<bool> &is_first_chunk,
+    const std::vector<bool> &is_last_chunk, std::vector<int> *channels) {
+  nvtxRangePushA("DecodeBatch");
+  if (!channels) {
+    channels = &channels_;
+    ListIChannelsInBatch(corr_ids, channels);
+  }
+
+  list_channels_first_chunk_.clear();
+  for (size_t i = 0; i < is_first_chunk.size(); ++i) {
+    if (is_first_chunk[i]) list_channels_first_chunk_.push_back((*channels)[i]);
+  }
+  if (!list_channels_first_chunk_.empty())
+    cuda_decoder_->InitDecoding(list_channels_first_chunk_);
+
+  RunNnet3(*channels, d_features, features_frame_stride, n_input_frames_valid,
+           is_first_chunk, is_last_chunk, d_ivectors);
+  RunDecoder(*channels);
+
+  BuildLatticesAndRunCallbacks(corr_ids, *channels, is_last_chunk);
+  nvtxRangePop();
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::ComputeOneFeature(int element) {
+  const SubVector<BaseFloat> &wave_samples =
+      (*fe_threads_wave_samples_)[element];
+  const int ichannel = (*fe_threads_channels_)[element];
+  OnlineNnet2FeaturePipeline &feature_pipeline = *feature_pipelines_[ichannel];
+  // KALDI_ASSERT("Mismatch sample frequency/model frequency" &&
+  //             (model_frequency_ ==
+  //             utt_chunk.sample_frequency_));
+  KALDI_ASSERT(
+      "Too many samples for one chunk. Must be <= "
+      "this.GetNSampsPerChunk()" &&
+      wave_samples.Dim() <= samples_per_chunk_);
+  int32 start_iframe = feature_pipeline.NumFramesReady();
+  feature_pipeline.AcceptWaveform(model_frequency_, wave_samples);
+
+  // All frames should be ready here
+  int32 end_iframe = feature_pipeline.NumFramesReady();
+  int32 nframes = end_iframe - start_iframe;
+  if (nframes > 0) {
+    SubMatrix<BaseFloat> utt_features =
+        h_all_features_.RowRange(element * input_frames_per_chunk_, nframes);
+    std::vector<int> frames(nframes);
+    for (int j = start_iframe; j < end_iframe; ++j)
+      frames[j - start_iframe] = j;
+    //
+    // Copy Features
+    feature_pipeline.InputFeature()->GetFrames(frames, &utt_features);
+
+    // If available, copy ivectors
+    if (use_ivectors_) {
+      SubVector<BaseFloat> utt_ivector = h_all_ivectors_.Row(element);
+      feature_pipeline.IvectorFeature()->GetFrame(end_iframe - 1, &utt_ivector);
+    }
+  }
+  n_input_frames_valid_[element] = nframes;
+
+  n_compute_features_not_done_.fetch_sub(1, std::memory_order_release);
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::BuildLatticesAndRunCallbacks(
+    const std::vector<CorrelationID> &corr_ids,
+    const std::vector<int> &channels, const std::vector<bool> &is_last_chunk) {
+  list_channels_last_chunk_.clear();
+  list_corr_id_last_chunk_.clear();
+  for (int i = 0; i < is_last_chunk.size(); ++i) {
+    if (is_last_chunk[i]) {
+      list_channels_last_chunk_.push_back(channels[i]);
+      list_corr_id_last_chunk_.push_back(corr_ids[i]);
+    }
+  }
+  cuda_decoder_->PrepareForGetRawLattice(list_channels_last_chunk_, true);
+  // Storing number of callbacks not done. Used if
+  // WaitForLatticeCallbacks() is called
+  n_lattice_callbacks_not_done_.fetch_add(list_channels_last_chunk_.size(),
+                                          std::memory_order_acquire);
+
+  // delete data used for decoding that corr_id
+  for (int32 i = 0; i < list_channels_last_chunk_.size(); ++i) {
+    uint64_t ichannel = list_channels_last_chunk_[i];
+    CorrelationID corr_id = list_corr_id_last_chunk_[i];
+    int32 ndeleted = corr_id2channel_.erase(corr_id);
+    KALDI_ASSERT(ndeleted == 1);
+    thread_pool_->Push(
+        {&BatchedThreadedNnet3CudaOnlinePipeline::FinalizeDecodingWrapper, this,
+         ichannel, corr_id});
+    if (!config_.use_gpu_feature_extraction) {
+      // Done with this CPU FE pipeline
+      KALDI_ASSERT(feature_pipelines_[ichannel]);
+      feature_pipelines_[ichannel].reset();
+    }
+  }
+  list_channels_last_chunk_.clear();
+  list_corr_id_last_chunk_.clear();
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::ListIChannelsInBatch(
+    const std::vector<CorrelationID> &corr_ids, std::vector<int> *channels) {
+  channels->clear();
+  list_channels_last_chunk_.clear();
+  list_corr_id_last_chunk_.clear();
+  for (int i = 0; i < corr_ids.size(); ++i) {
+    int corr_id = corr_ids[i];
+    auto it = corr_id2channel_.find(corr_id);
+    KALDI_ASSERT(it != corr_id2channel_.end());
+    int ichannel = it->second;
+    channels->push_back(ichannel);
+  }
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::RunNnet3(
+    const std::vector<int> &channels,
+    const std::vector<BaseFloat *> &d_features, const int features_stride,
+    const std::vector<int> &n_input_frames_valid,
+    const std::vector<bool> &is_first_chunk,
+    const std::vector<bool> &is_last_chunk,
+    const std::vector<BaseFloat *> &d_ivectors) {
+  cuda_nnet3_->RunBatch(channels, d_features, features_stride, d_ivectors,
+                        n_input_frames_valid, is_first_chunk, is_last_chunk,
+                        &d_all_log_posteriors_, &all_frames_log_posteriors_);
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::RunDecoder(
+    const std::vector<int> &channels) {
+  for (int iframe = 0; iframe < all_frames_log_posteriors_.size(); ++iframe) {
+    cuda_decoder_->AdvanceDecoding(all_frames_log_posteriors_[iframe]);
+  }
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::ReadParametersFromModel() {
+  feature_info_.reset(new OnlineNnet2FeaturePipelineInfo(config_.feature_opts));
+  feature_info_->ivector_extractor_info.use_most_recent_ivector = true;
+  feature_info_->ivector_extractor_info.greedy_ivector_extractor = true;
+
+  OnlineNnet2FeaturePipeline feature(*feature_info_);
+  use_ivectors_ = (feature.IvectorFeature() != NULL);
+  input_dim_ = feature.InputFeature()->Dim();
+  if (use_ivectors_) ivector_dim_ = feature.IvectorFeature()->Dim();
+  model_frequency_ = feature_info_->GetSamplingFrequency();
+  BaseFloat frame_shift = feature_info_->FrameShiftInSeconds();
+  input_frames_per_chunk_ = config_.compute_opts.frames_per_chunk;
+  seconds_per_chunk_ = input_frames_per_chunk_ * frame_shift;
+  int32 samp_per_frame = static_cast<int>(model_frequency_ * frame_shift);
+  samples_per_chunk_ = input_frames_per_chunk_ * samp_per_frame;
+  BatchedStaticNnet3Config nnet3_config;
+  nnet3_config.compute_opts = config_.compute_opts;
+  nnet3_config.max_batch_size = max_batch_size_;
+  nnet3_config.nchannels = config_.num_channels;
+  nnet3_config.has_ivector = (feature.IvectorFeature() != NULL);
+
+  cuda_nnet3_.reset(new BatchedStaticNnet3(nnet3_config, *am_nnet_));
+  output_frames_per_chunk_ = cuda_nnet3_->GetNOutputFramesPerChunk();
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::FinalizeDecoding(
+    int32 ichannel, CorrelationID corr_id) {
+  Lattice lat;
+  cuda_decoder_->ConcurrentGetRawLatticeSingleChannel(ichannel, &lat);
+
+  // Getting the channel callback now, we're going to free that channel
+  std::unique_ptr<std::function<void(CompactLattice &)>> callback;
+  callback = std::move(channels_callbacks_[ichannel]);
+  // Done with this channel. Making it available again
+  {
+    std::lock_guard<std::mutex> lk(available_channels_m_);
+    available_channels_.push_back(ichannel);
+  }
+
+  // If necessary, determinize the lattice
+  CompactLattice dlat;
+  if (config_.determinize_lattice) {
+    DeterminizeLatticePhonePrunedWrapper(*trans_model_, &lat,
+                                         config_.decoder_opts.lattice_beam,
+                                         &dlat, config_.det_opts);
+  } else {
+    ConvertLattice(lat, &dlat);
+  }
+
+  if (dlat.NumStates() > 0) {
+    if (word_syms_) {
+      CompactLattice best_path_clat;
+      CompactLatticeShortestPath(dlat, &best_path_clat);
+
+      Lattice best_path_lat;
+      ConvertLattice(best_path_clat, &best_path_lat);
+
+      std::vector<int32> alignment;
+      std::vector<int32> words;
+      LatticeWeight weight;
+      GetLinearSymbolSequence(best_path_lat, &alignment, &words, &weight);
+      std::ostringstream oss;
+      for (size_t i = 0; i < words.size(); i++) {
+        std::string s = word_syms_->Find(words[i]);
+        if (s == "") oss << "Word-id " << words[i] << " not in symbol table.";
+        oss << s << " ";
+      }
+      {
+        std::lock_guard<std::mutex> lk(stdout_m_);
+        KALDI_LOG << "OUTPUT: " << oss.str();
+      }
+    }
+  }
+
+  // if ptr set and if callback func callable
+  if (callback && *callback) {
+    (*callback)(dlat);
+  }
+
+  n_lattice_callbacks_not_done_.fetch_sub(1, std::memory_order_release);
+}
+
+void BatchedThreadedNnet3CudaOnlinePipeline::WaitForLatticeCallbacks() {
+  while (n_lattice_callbacks_not_done_.load() != 0)
+    usleep(KALDI_CUDA_DECODER_WAIT_FOR_CALLBACKS_US);
+}
+
+}  // namespace cuda_decoder
+}  // namespace kaldi
+
+#endif  // HAVE_CUDA
diff --git a/src/cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.h b/src/cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.h
new file mode 100644
index 00000000000..ccb91cb2fc9
--- /dev/null
+++ b/src/cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.h
@@ -0,0 +1,367 @@
+// cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.h
+//
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+
+#ifndef KALDI_CUDA_DECODER_BATCHED_THREADED_CUDA_ONLINE_PIPELINE_H_
+#define KALDI_CUDA_DECODER_BATCHED_THREADED_CUDA_ONLINE_PIPELINE_H_
+
+#define KALDI_CUDA_DECODER_MIN_NCHANNELS_FACTOR 2
+
+#include <atomic>
+#include <thread>
+
+#include "base/kaldi-utils.h"
+#include "cudadecoder/batched-static-nnet3.h"
+#include "cudadecoder/cuda-decoder.h"
+#include "cudadecoder/thread-pool-light.h"
+#include "cudafeat/online-batched-feature-pipeline-cuda.h"
+#include "feat/wave-reader.h"
+#include "lat/determinize-lattice-pruned.h"
+#include "nnet3/am-nnet-simple.h"
+#include "nnet3/nnet-am-decodable-simple.h"
+#include "nnet3/nnet-compute.h"
+#include "nnet3/nnet-optimize.h"
+#include "online2/online-nnet2-feature-pipeline.h"
+
+namespace kaldi {
+namespace cuda_decoder {
+
+//
+// Online Streaming Batched Pipeline calling feature extraction, CUDA light
+// Nnet3 driver and CUDA decoder. Can handle up to num_channels streaming audio
+// channels in parallel. Each channel is externally identified by a correlation
+// id (corr_id). Receives chunks of audio (up to max_batch_size per DecodeBatch
+// call). Will call a callback with the final lattice once the processing of the
+// final chunk is done.
+//
+// For an example on how to use that pipeline, see
+// cudadecoderbin/batched-threaded-wav-nnet3-online.cc
+//
+// Feature extraction can be CUDA or CPU
+// (multithreaded).
+// Internally reuses the concept of channels and lanes from the CUDA decoder
+//
+
+struct BatchedThreadedNnet3CudaOnlinePipelineConfig {
+  BatchedThreadedNnet3CudaOnlinePipelineConfig()
+      : max_batch_size(400),
+        num_channels(600),
+        num_worker_threads(-1),
+        determinize_lattice(true),
+        num_decoder_copy_threads(2),
+        use_gpu_feature_extraction(true) {}
+  void Register(OptionsItf *po) {
+    po->Register("max-batch-size", &max_batch_size,
+                 "The maximum execution batch size. "
+                 "Larger = Better throughput slower latency.");
+    po->Register("num-channels", &num_channels,
+                 "The number of parallel audio channels. This is the maximum "
+                 "number of parallel audio channels supported by the pipeline"
+                 ". This should be larger "
+                 "than max_batch_size.");
+    po->Register("cuda-worker-threads", &num_worker_threads,
+                 "(optional) The total number of CPU threads launched to "
+                 "process CPU tasks. -1 = use std::hardware_concurrency()");
+    po->Register("determinize-lattice", &determinize_lattice,
+                 "Determinize the lattice before output.");
+    po->Register("cuda-decoder-copy-threads", &num_decoder_copy_threads,
+                 "Advanced - Number of worker threads used in the "
+                 "decoder for "
+                 "the host to host copies.");
+    po->Register("gpu-feature-extract", &use_gpu_feature_extraction,
+                 "Use GPU feature extraction");
+
+    feature_opts.Register(po);
+    decoder_opts.Register(po);
+    det_opts.Register(po);
+    compute_opts.Register(po);
+  }
+  int max_batch_size;
+  int num_channels;
+  int num_worker_threads;
+  bool determinize_lattice;
+  int num_decoder_copy_threads;
+  bool use_gpu_feature_extraction;
+
+  OnlineNnet2FeaturePipelineConfig feature_opts;
+  CudaDecoderConfig decoder_opts;
+  fst::DeterminizeLatticePhonePrunedOptions det_opts;
+  nnet3::NnetSimpleComputationOptions compute_opts;
+
+  void CheckAndFixConfigs() {
+    KALDI_ASSERT(max_batch_size > 0);
+    // Lower bound on nchannels.
+    // Using strictly more than max_batch_size because channels are still used
+    // when the lattice postprocessing is running. We still want to run full
+    // max_batch_size batches in the meantime
+    int min_nchannels =
+        max_batch_size * KALDI_CUDA_DECODER_MIN_NCHANNELS_FACTOR;
+    num_channels = std::max(num_channels, min_nchannels);
+
+    // If not set use number of physical threads
+    num_worker_threads = (num_worker_threads > 0)
+                             ? num_worker_threads
+                             : std::thread::hardware_concurrency();
+  }
+};
+
+class BatchedThreadedNnet3CudaOnlinePipeline {
+ public:
+  using CorrelationID = uint64_t;
+  BatchedThreadedNnet3CudaOnlinePipeline(
+      const BatchedThreadedNnet3CudaOnlinePipelineConfig &config,
+      const fst::Fst<fst::StdArc> &decode_fst,
+      const nnet3::AmNnetSimple &am_nnet, const TransitionModel &trans_model)
+      : config_(config),
+        max_batch_size_(config.max_batch_size),
+        trans_model_(&trans_model),
+        am_nnet_(&am_nnet),
+        word_syms_(NULL) {
+    config_.compute_opts.CheckAndFixConfigs(am_nnet_->GetNnet().Modulus());
+    config_.CheckAndFixConfigs();
+    int num_worker_threads = config_.num_worker_threads;
+    thread_pool_.reset(new ThreadPoolLight(num_worker_threads));
+
+    Initialize(decode_fst);
+  }
+
+  // Called when a new utterance will be decoded w/ correlation id corr_id
+  // When this utterance will be done (when it will receive a chunk with
+  // last_chunk=true)
+  // If no channels are available, will wait for "wait_for" microseconds
+  // Returns true if a channel was available (eventually after waiting for
+  // up to wait_for seconds)
+  bool TryInitCorrID(CorrelationID corr_id, int wait_for = 0);
+
+  // Set the callback function to call with the final lattice for a given
+  // corr_id
+  void SetLatticeCallback(
+      CorrelationID corr_id,
+      const std::function<void(CompactLattice &)> &callback);
+
+  // Chunk of one utterance. We receive batches of those chunks through
+  // DecodeBatch
+  // Contains pointers to that chunk, the corresponding correlation ID,
+  // and whether that chunk is the last one for that utterance
+  struct UtteranceChunk {
+    CorrelationID corr_id;
+    SubVector<BaseFloat> wave_samples;
+    bool last_chunk;  // sets to true if last chunk for that
+                      // utterance
+  };
+
+  // Receive a batch of chunks. Will decode them, then return.
+  // If it contains some last chunks for given utterances, it will call
+  // FinalizeDecoding (building the final lattice, determinize it, etc.)
+  // asynchronously. The callback for that utterance will then be called
+  void DecodeBatch(const std::vector<CorrelationID> &corr_ids,
+                   const std::vector<SubVector<BaseFloat>> &wave_samples,
+                   const std::vector<bool> &is_first_chunk,
+                   const std::vector<bool> &is_last_chunk);
+
+  // Version providing directly the features. Only runs nnet3 & decoder
+  // Used when we want to provide the final ivectors (offline case)
+  // channels can be provided if they are known (internal use)
+  void DecodeBatch(const std::vector<CorrelationID> &corr_ids,
+                   const std::vector<BaseFloat *> &d_features,
+                   const int features_frame_stride,
+                   const std::vector<int> &n_input_frames_valid,
+                   const std::vector<BaseFloat *> &d_ivectors,
+                   const std::vector<bool> &is_first_chunk,
+                   const std::vector<bool> &is_last_chunk,
+                   std::vector<int> *channels = NULL);
+
+  void ComputeGPUFeatureExtraction(
+      const std::vector<int> &channels,
+      const std::vector<SubVector<BaseFloat>> &wave_samples,
+      const std::vector<bool> &is_first_chunk,
+      const std::vector<bool> &is_last_chunk);
+
+  void ComputeCPUFeatureExtraction(
+      const std::vector<int> &channels,
+      const std::vector<SubVector<BaseFloat>> &wave_samples,
+      const std::vector<bool> &is_last_chunk);
+
+  // Maximum number of samples per chunk
+  int32 GetNSampsPerChunk() { return samples_per_chunk_; }
+  int32 GetNInputFramesPerChunk() { return input_frames_per_chunk_; }
+  float GetModelFrequency() { return model_frequency_; }
+  int GetTotalNnet3RightContext() {
+    return cuda_nnet3_->GetTotalNnet3RightContext();
+  }
+  // Maximum number of seconds per chunk
+  BaseFloat GetSecondsPerChunk() { return seconds_per_chunk_; }
+
+  // Used when debugging. Used to Print the text when a decoding is done
+  void SetSymbolTable(fst::SymbolTable *word_syms) { word_syms_ = word_syms; }
+
+  // Wait for all lattice callbacks to complete
+  // Can be called after DecodeBatch
+  void WaitForLatticeCallbacks();
+
+ private:
+  // Initiliaze this object
+  void Initialize(const fst::Fst<fst::StdArc> &decode_fst);
+
+  // Allocate and initialize data that will be used for computation
+  void AllocateAndInitializeData(const fst::Fst<fst::StdArc> &decode_fst);
+
+  // Reads what's needed from models, such as left and right context
+  void ReadParametersFromModel();
+
+  // Following functions are DecodeBatch's helpers
+
+  // Filling  curr_batch_ichannels_
+  void ListIChannelsInBatch(const std::vector<CorrelationID> &corr_ids,
+                            std::vector<int> *channels);
+  void CPUFeatureExtraction(
+      const std::vector<int> &channels,
+      const std::vector<SubVector<BaseFloat>> &wave_samples);
+
+  // Compute features and ivectors for the chunk
+  // curr_batch[element]
+  // CPU function
+  void ComputeOneFeature(int element);
+  static void ComputeOneFeatureWrapper(void *obj, uint64_t element,
+                                       uint64_t ignored) {
+    static_cast<BatchedThreadedNnet3CudaOnlinePipeline *>(obj)
+        ->ComputeOneFeature(element);
+  }
+  void RunNnet3(const std::vector<int> &channels,
+                const std::vector<BaseFloat *> &d_features,
+                const int feature_stride,
+                const std::vector<int> &n_input_frames_valid,
+                const std::vector<bool> &is_first_chunk,
+                const std::vector<bool> &is_last_chunk,
+                const std::vector<BaseFloat *> &d_ivectors);
+
+  void RunDecoder(const std::vector<int> &channels);
+
+  void BuildLatticesAndRunCallbacks(const std::vector<CorrelationID> &corr_ids,
+                                    const std::vector<int> &channels,
+                                    const std::vector<bool> &is_last_chunk);
+
+  // If an utterance is done, we call FinalizeDecoding async on
+  // the threadpool
+  // it will call the utterance's callback when done
+  void FinalizeDecoding(int32 ichannel, CorrelationID corr_id);
+  // static wrapper for thread pool
+  static void FinalizeDecodingWrapper(void *obj, uint64_t ichannel64,
+                                      uint64_t corr_id) {
+    int32 ichannel = static_cast<int32>(ichannel64);
+    static_cast<BatchedThreadedNnet3CudaOnlinePipeline *>(obj)
+        ->FinalizeDecoding(ichannel, corr_id);
+  }
+  // Data members
+
+  BatchedThreadedNnet3CudaOnlinePipelineConfig config_;
+  int32 max_batch_size_;  // extracted from config_
+  // Models
+  const TransitionModel *trans_model_;
+  const nnet3::AmNnetSimple *am_nnet_;
+  std::unique_ptr<OnlineNnet2FeaturePipelineInfo> feature_info_;
+
+  // Decoder channels currently available, w/ mutex
+  std::vector<int32> available_channels_;
+  std::mutex available_channels_m_;
+
+  // corr_id -> decoder channel map
+  std::unordered_map<CorrelationID, int32> corr_id2channel_;
+
+  // channels -> callbacks
+  // the callback is called once the final lattice is ready
+  std::vector<std::unique_ptr<std::function<void(CompactLattice &)>>>
+      channels_callbacks_;
+
+  // New channels in the current batch. We've just received
+  // their first batch
+  std::vector<int32> list_channels_first_chunk_;
+
+  std::vector<int> n_samples_valid_, n_input_frames_valid_;
+
+  std::vector<std::vector<std::pair<int, BaseFloat *>>>
+      all_frames_log_posteriors_;
+
+  // Channels done after current batch. We've just received
+  // their last chunk
+  std::vector<int> list_channels_last_chunk_;
+  std::vector<CorrelationID> list_corr_id_last_chunk_;
+
+  // Number of frames already computed in channel (before
+  // curr_batch_)
+  std::vector<int32> channel_frame_offset_;
+
+  // Parameters extracted from the models
+  int input_frames_per_chunk_;
+  int output_frames_per_chunk_;
+  BaseFloat seconds_per_chunk_;
+  BaseFloat samples_per_chunk_;
+  BaseFloat model_frequency_;
+  int32 ivector_dim_, input_dim_;
+
+  // Buffers used during computation
+  Matrix<BaseFloat> h_all_features_;
+  Matrix<BaseFloat> h_all_waveform_;
+  CuMatrix<BaseFloat> d_all_waveform_;
+  CuMatrix<BaseFloat> d_all_features_;
+  Matrix<BaseFloat> h_all_ivectors_;
+  CuVector<BaseFloat> d_all_ivectors_;  // gpu pipeline uses a meta vector
+  CuMatrix<BaseFloat> d_all_log_posteriors_;
+
+  bool use_ivectors_;
+  // Used with CPU features extraction. Contains the number of CPU FE tasks
+  // still running
+  std::atomic<int> n_compute_features_not_done_;
+  // Number of CPU lattice postprocessing tasks still running
+  std::atomic<int> n_lattice_callbacks_not_done_;
+
+  // Current assignement buffers, when DecodeBatch is running
+  std::vector<int> channels_;
+  std::vector<BaseFloat *> d_features_ptrs_;
+  std::vector<BaseFloat *> d_ivectors_ptrs_;
+
+  // Used by CPU FE threads. Could be merged with channels_
+  const std::vector<int> *fe_threads_channels_;
+  const std::vector<SubVector<BaseFloat>> *fe_threads_wave_samples_;
+
+  std::unique_ptr<OnlineBatchedFeaturePipelineCuda> gpu_feature_pipeline_;
+  std::unique_ptr<BatchedStaticNnet3> cuda_nnet3_;
+
+  // Feature pipelines, associated to a channel
+  // Only used if feature extraction is run on the CPU
+  std::vector<std::unique_ptr<OnlineNnet2FeaturePipeline>> feature_pipelines_;
+
+  // HCLG graph : CudaFst object is a host object, but contains
+  // data stored in
+  // GPU memory
+  std::shared_ptr<CudaFst> cuda_fst_;
+  std::unique_ptr<CudaDecoder> cuda_decoder_;
+
+  std::unique_ptr<ThreadPoolLight> thread_pool_;
+
+  // Used for debugging
+  fst::SymbolTable *word_syms_;
+  // Used when printing to stdout for debugging purposes
+  std::mutex stdout_m_;
+};
+
+}  // end namespace cuda_decoder
+}  // end namespace kaldi.
+
+#endif  // KALDI_CUDA_DECODER_BATCHED_THREADED_CUDA_ONLINE_PIPELINE_H_
+#endif  // HAVE_CUDA
diff --git a/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.cc b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.cc
index d3ad909d80a..87602f0920c 100644
--- a/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.cc
+++ b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.cc
@@ -23,12 +23,18 @@
 #include <nvToolsExt.h>
 #include "base/kaldi-utils.h"
 
+// This pipeline is deprecated and will be removed. Please switch to
+// batched-threaded-nnet3-cuda-pipeline2
+
 namespace kaldi {
 namespace cuda_decoder {
 
 void BatchedThreadedNnet3CudaPipeline::Initialize(
     const fst::Fst<fst::StdArc> &decode_fst, const nnet3::AmNnetSimple &am_nnet,
     const TransitionModel &trans_model) {
+  KALDI_LOG << "\n\nIMPORTANT: This pipeline is deprecated. Please switch to "
+               "cudadecoderbin/batch-wav-nnet3-cuda2 (binary) or "
+               "cudadecoder/batched-threaded-nnet3-cuda-pipeline2.h (class)\n";
   KALDI_LOG << "BatchedThreadedNnet3CudaPipeline Initialize with "
             << config_.num_control_threads << " control threads, "
             << config_.num_worker_threads << " worker threads"
@@ -45,16 +51,21 @@ void BatchedThreadedNnet3CudaPipeline::Initialize(
   // initialize threads and save their contexts so we can join them later
   thread_contexts_.resize(config_.num_control_threads);
 
-  // create work queue
-  pending_task_queue_ = new TaskState *[config_.max_pending_tasks + 1];
+  // create work queue, padding so that we can better detect if this
+  // overflows. this should not happen and is just there as a sanity check
+  pending_task_queue_ = new TaskState
+      *[config_.max_pending_tasks + config_.pending_queue_padding];
   tasks_front_ = 0;
   tasks_back_ = 0;
 
-  // ensure all allocations/kernels above are complete before launching threads
-  // in different streams.
+  // ensure all allocations/kernels above are complete before launching
+  // threads in different streams.
   cudaStreamSynchronize(cudaStreamPerThread);
 
   // Create threadpool for CPU work
+  // Using the thread pool light for decoder
+  // This pipeline is deprecated. Using two thread pools is not ideal but
+  // this pipeline will be removed eventually
   work_pool_ = new ThreadPool(config_.num_worker_threads);
 
   exit_ = false;
@@ -66,7 +77,8 @@ void BatchedThreadedNnet3CudaPipeline::Initialize(
         std::thread(&BatchedThreadedNnet3CudaPipeline::ExecuteWorker, this, i);
   }
 
-  // wait for threads to start to ensure allocation time isn't in the timings
+  // wait for threads to start to ensure allocation time isn't in the
+  // timings
   while (numStarted_ < config_.num_control_threads)
     kaldi::Sleep(SLEEP_BACKOFF_S);
 }
@@ -146,9 +158,8 @@ void BatchedThreadedNnet3CudaPipeline::WaitForGroup(const std::string &group) {
   std::unique_lock<std::mutex> lk(group_tasks_mutex_);
   group_done_cv_.wait(
       lk, [this, &group] { return group_tasks_not_done_[group] == 0; });
-  // Safe to delete entry from the map now. If the user creates new task in that
-  // group,
-  // the entry will be created once more
+  // Safe to delete entry from the map now. If the user creates new task
+  // in that group, the entry will be created once more
   group_tasks_not_done_.erase(group);
 }
 
@@ -191,8 +202,10 @@ void BatchedThreadedNnet3CudaPipeline::CloseAllDecodeHandlesForGroup(
   WaitForGroup(group);
   std::lock_guard<std::mutex> lk1(tasks_lookup_mutex_);
   auto p = tasks_group_lookup_.equal_range(group);
-  for (auto it = p.first; it != p.second; ++it)
+  for (auto it = p.first; it != p.second; ++it) {
+    KALDI_ASSERT(it->second->finished == true);
     tasks_lookup_.erase(it->second->key);
+  }
   tasks_group_lookup_.erase(p.first, p.second);
   std::lock_guard<std::mutex> lk2(group_tasks_mutex_);
   group_tasks_not_done_.erase(group);
@@ -293,8 +306,8 @@ bool BatchedThreadedNnet3CudaPipeline::GetRawLattice(const std::string &key,
   // intervention from the master thread.
   while (task->finished == false) kaldi::Sleep(SLEEP_BACKOFF_S);
 
-  // GetRawLattice on a determinized lattice is not supported (Per email from
-  // DanP)
+  // GetRawLattice on a determinized lattice is not supported (Per email
+  // from DanP)
   KALDI_ASSERT(task->determinized == false);
 
   if (task->error) {
@@ -327,9 +340,8 @@ bool BatchedThreadedNnet3CudaPipeline::GetLattice(const std::string &key,
     return false;
   }
 
-  // if user has not requested a determinized lattice from the decoder then we
-  // must
-  // determinize it here since it was done done already.
+  // if user has not requested a determinized lattice from the decoder
+  // then we must determinize it here since it was done done already.
   if (!config_.determinize_lattice && !task->determinized) {
     // Determinzation was not done by worker threads so do it here
     DeterminizeOneLattice(task);
@@ -345,8 +357,8 @@ void BatchedThreadedNnet3CudaPipeline::AddTaskToPendingTaskQueue(
     TaskState *task) {
   std::lock_guard<std::mutex> lk(tasks_add_mutex_);
   if (NumPendingTasks() == config_.max_pending_tasks) {
-    // task queue is full launch a new thread to add this task and exit to make
-    // room for other work
+    // task queue is full launch a new thread to add this task and
+    // exit to make room for other work
     work_pool_->enqueue(
         THREAD_POOL_LOW_PRIORITY,
         &BatchedThreadedNnet3CudaPipeline::AddTaskToPendingTaskQueue, this,
@@ -356,7 +368,9 @@ void BatchedThreadedNnet3CudaPipeline::AddTaskToPendingTaskQueue(
     // insert into pending task queue
     pending_task_queue_[tasks_back_] = task;
     // (int)tasks_back_);
-    tasks_back_ = (tasks_back_ + 1) % (config_.max_pending_tasks + 1);
+    tasks_back_ = (tasks_back_ + 1) %
+                  (config_.max_pending_tasks + config_.pending_queue_padding);
+    KALDI_ASSERT(NumPendingTasks() <= config_.max_pending_tasks);
   }
 }
 
@@ -371,6 +385,7 @@ void BatchedThreadedNnet3CudaPipeline::AquireAdditionalTasks(
   int tasksRequested =
       std::min(free_channels.size(), config_.max_batch_size - channels.size());
   int tasksAssigned = 0;
+  int firstTask = channels.size();
 
   {
     // lock required because front might change from other
@@ -384,8 +399,10 @@ void BatchedThreadedNnet3CudaPipeline::AquireAdditionalTasks(
       // grab tasks
       for (int i = 0; i < tasksAssigned; i++) {
         // pending_task_queue_[tasks_front_]);
+        KALDI_ASSERT(NumPendingTasks() > 0);
         tasks.push_back(pending_task_queue_[tasks_front_]);
-        tasks_front_ = (tasks_front_ + 1) % (config_.max_pending_tasks + 1);
+        tasks_front_ = (tasks_front_ + 1) % (config_.max_pending_tasks +
+                                             config_.pending_queue_padding);
       }
     }
   }
@@ -401,11 +418,13 @@ void BatchedThreadedNnet3CudaPipeline::AquireAdditionalTasks(
       ChannelId channel;
       {
         std::lock_guard<std::mutex> lk(channel_state.free_channels_mutex);
-        KALDI_ASSERT(free_channels.size() >
-                     0);  // it should always be true (cf std::min above)
+        KALDI_ASSERT(free_channels.size() > 0);  // it should always be true
+                                                 // (cf std::min above)
         channel = free_channels.back();
         free_channels.pop_back();
       }
+      // assign channel to task
+      tasks[i + firstTask]->ichannel = channel;
       // add channel to processing list
       channels.push_back(channel);
       // add new channel to initialization list
@@ -434,7 +453,7 @@ void BatchedThreadedNnet3CudaPipeline::ComputeBatchNnet(
   // for all new batches enqueue up nnet work.
   for (int i = first; i < tasks.size(); i++) {
     TaskState &task = *tasks[i];
-    std::shared_ptr<TaskData> &task_data = task.task_data;
+    std::unique_ptr<TaskData> &task_data = task.task_data;
     std::vector<nnet3::NnetInferenceTask> &ntasks = nnet_tasks[i];
 
     if (config_.gpu_feature_extract) {
@@ -467,8 +486,8 @@ void BatchedThreadedNnet3CudaPipeline::ComputeBatchNnet(
     }
   }
 
-  // process all minibatches, we allow partial minibatches but this should only
-  // occur on the last iteration
+  // process all minibatches, we allow partial minibatches but this should
+  // only occur on the last iteration
   bool allow_partial_minibatch = true;
   while (computer.Compute(allow_partial_minibatch))
     ;
@@ -476,14 +495,15 @@ void BatchedThreadedNnet3CudaPipeline::ComputeBatchNnet(
   // Extract Posteriors
   for (int i = first; i < tasks.size(); i++) {
     TaskState &task = *tasks[i];
-    std::shared_ptr<TaskData> &task_data = task.task_data;
+    std::unique_ptr<TaskData> &task_data = task.task_data;
     CuMatrix<BaseFloat> &posteriors = task_data->posteriors;
     MergeTaskOutput(nnet_tasks[i], &posteriors);
 
-    // nnet output is no longer necessary as we have copied the output out
+    // nnet output is no longer necessary as we have copied the
+    // output out
     nnet_tasks[i].resize(0);
 
-    // featurs are no longer needed so free memory
+    // features are no longer needed so free memory here
     task_data->ivector_features.Resize(0);
     task_data->input_features.Resize(0, 0);
   }
@@ -495,7 +515,7 @@ void BatchedThreadedNnet3CudaPipeline::ComputeBatchNnet(
 void BatchedThreadedNnet3CudaPipeline::ComputeOneFeatureCPU(TaskState *task_) {
   nvtxRangePushA("ComputeOneFeatureCPU");
   TaskState &task = *task_;
-  std::shared_ptr<TaskData> &task_data = task.task_data;
+  std::unique_ptr<TaskData> &task_data = task.task_data;
   Vector<BaseFloat> &ivector_features = task_data->ivector_features_cpu;
   Matrix<BaseFloat> &input_features = task_data->input_features_cpu;
 
@@ -544,10 +564,11 @@ void BatchedThreadedNnet3CudaPipeline::ComputeBatchFeatures(
     OnlineCudaFeaturePipeline &feature_pipeline) {
   KALDI_ASSERT(config_.gpu_feature_extract == true);
   nvtxRangePushA("CopyBatchWaves");
-  // below we will pack waves into a single buffer for efficient transfer across
-  // device
+  // below we will pack waves into a single buffer for efficient transfer
+  // across device
 
-  // first count the total number of elements and create a single large vector
+  // first count the total number of elements and create a single large
+  // vector
   int count = 0;
   for (int i = first; i < tasks.size(); i++) {
     count += tasks[i]->task_data->wave_samples->Dim();
@@ -559,22 +580,25 @@ void BatchedThreadedNnet3CudaPipeline::ComputeBatchFeatures(
   thread_local Vector<BaseFloat> pinned_vector;
 
   if (pinned_vector.Dim() < count) {
+    // WAR:  Not pinning memory because it seems to impact
+    // correctness we are continuing to look into a fix but want to
+    // commit this workaround as a temporary measure.
     if (pinned_vector.Dim() != 0) {
       cudaHostUnregister(pinned_vector.Data());
     }
+
     // allocated array 2x size
     pinned_vector.Resize(count * 2, kUndefined);
     cudaHostRegister(pinned_vector.Data(),
                      pinned_vector.Dim() * sizeof(BaseFloat), 0);
   }
 
-  // We will launch a thread for each task in order to get better host memory
-  // bandwidth
+  // We will launch a thread for each task in order to get better host
+  // memory bandwidth
   std::vector<std::future<void>> futures;  // for syncing
 
   // vector copy function for threading below.
-  auto copy_vec = [](SubVector<BaseFloat> &dst,
-                     const SubVector<BaseFloat> &src) {
+  auto copy_vec = [](SubVector<BaseFloat> dst, const SubVector<BaseFloat> src) {
     nvtxRangePushA("CopyVec");
     dst.CopyFromVec(src);
     nvtxRangePop();
@@ -583,7 +607,7 @@ void BatchedThreadedNnet3CudaPipeline::ComputeBatchFeatures(
   // next launch threads to copy all waves for each task in parallel
   count = 0;
   for (int i = first; i < tasks.size(); i++) {
-    std::shared_ptr<TaskData> &task_data = tasks[i]->task_data;
+    std::unique_ptr<TaskData> &task_data = tasks[i]->task_data;
     SubVector<BaseFloat> wave(pinned_vector, count,
                               task_data->wave_samples->Dim());
     count += task_data->wave_samples->Dim();
@@ -597,11 +621,10 @@ void BatchedThreadedNnet3CudaPipeline::ComputeBatchFeatures(
   }
 
   CuVector<BaseFloat> cu_waves(count, kUndefined);
-  // copy memory down asynchronously.  Vector copy functions are synchronous so
-  // we do it manually.
-  // It is important for this to happen asynchrously to help hide launch latency
-  // of smaller kernels
-  // that come in the future.
+  // copy memory down asynchronously.  Vector copy functions are
+  // synchronous so we do it manually. It is important for this to happen
+  // asynchrously to help hide launch latency of smaller kernels that come
+  // in the future.
   cudaMemcpyAsync(cu_waves.Data(), pinned_vector.Data(),
                   cu_waves.Dim() * sizeof(BaseFloat), cudaMemcpyHostToDevice,
                   cudaStreamPerThread);
@@ -612,7 +635,7 @@ void BatchedThreadedNnet3CudaPipeline::ComputeBatchFeatures(
   count = 0;
   for (int i = first; i < tasks.size(); i++) {
     TaskState &task = *tasks[i];
-    std::shared_ptr<TaskData> &task_data = task.task_data;
+    std::unique_ptr<TaskData> &task_data = task.task_data;
 
     CuSubVector<BaseFloat> cu_wave(cu_waves, count,
                                    task_data->wave_samples->Dim());
@@ -624,7 +647,8 @@ void BatchedThreadedNnet3CudaPipeline::ComputeBatchFeatures(
     int32 numFrames = task_data->input_features.NumRows();
 
     if (numFrames == 0) {
-      // Make this a warning for now.  Need to check how this is handled
+      // Make this a warning for now.  Need to check how this
+      // is handled
       KALDI_WARN << "Warning empty audio file";
     }
   }
@@ -637,7 +661,7 @@ void BatchedThreadedNnet3CudaPipeline::AllocateDecodables(
     std::vector<CudaDecodableInterface *> &decodables) {
   // Create mapped decodable here
   for (int i = first; i < tasks.size(); i++) {
-    std::shared_ptr<TaskData> &task_data = tasks[i]->task_data;
+    std::unique_ptr<TaskData> &task_data = tasks[i]->task_data;
     CuMatrix<BaseFloat> &posteriors = task_data->posteriors;
     decodables.push_back(
         new DecodableCuMatrixMapped(*trans_model_, posteriors, 0));
@@ -670,6 +694,10 @@ void BatchedThreadedNnet3CudaPipeline::RemoveCompletedChannels(
       // add channel to free and completed queues
       completed_channels.push_back(channel);
 
+      // this was assigned earlier just making sure it is
+      // still consistent
+      KALDI_ASSERT(tasks[cur]->ichannel == channel);
+
       // Rearrange queues,
       // move this element to end and end to this spot
       std::swap(tasks[cur], tasks[back]);
@@ -700,26 +728,29 @@ void BatchedThreadedNnet3CudaPipeline::PostDecodeProcessing(
   std::vector<ChannelId> &channels = channel_state.channels;
   std::vector<ChannelId> &completed_channels = channel_state.completed_channels;
 
-  /*
-  // Generate lattices for GetRawLattice
-  std::vector<Lattice *> lattices(completed_channels.size());
-  for (int i = 0; i < completed_channels.size(); i++) {
-    // reverse order of lattices to match channel order
-    // tasks order was reversed when reordering to the back
-    lattices[i] = &(tasks[tasks.size() - i - 1]->lat);
-  }
-  */
+  // consistency check
+  KALDI_ASSERT(tasks.size() == channels.size() + completed_channels.size());
 
   // Prepare data for GetRawLattice
   cuda_decoder.PrepareForGetRawLattice(completed_channels, true);
   // clean up datastructures for completed tasks
   for (int i = channels.size(); i < tasks.size(); i++) {
+    tasks[i]->task_data->posteriors.Resize(0, 0);
     delete decodables[i];
   }
 
   // Calling GetRawLattice + Determinize (optional) on a CPU worker thread
   for (int i = channels.size(); i < tasks.size(); i++) {
-    tasks[i]->ichannel = channels[i];
+    // checking that this channel is actually in the completed
+    // channels list order is reversed because we used push_back
+    // into completed_channel list
+    KALDI_ASSERT(tasks[i]->ichannel ==
+                 completed_channels[channels.size() +
+                                    completed_channels.size() - i - 1]);
+    // enqueue task completion on a worker thread.  We do not need
+    // to wait for sychronization on this thread as the parameters
+    // passed to this thread are persistent and that thread will
+    // return resources to the system when they free up.
     work_pool_->enqueue(THREAD_POOL_NORMAL_PRIORITY,
                         &BatchedThreadedNnet3CudaPipeline::CompleteTask, this,
                         &cuda_decoder, &channel_state, tasks[i]);
@@ -733,11 +764,12 @@ void BatchedThreadedNnet3CudaPipeline::PostDecodeProcessing(
 void BatchedThreadedNnet3CudaPipeline::CompleteTask(CudaDecoder *cuda_decoder,
                                                     ChannelState *channel_state,
                                                     TaskState *task) {
-  // Calling GetRawLattice for that channel. PrepareForGetRawLattice was already
-  // called
+  // Calling GetRawLattice for that channel. PrepareForGetRawLattice was
+  // already called
   cuda_decoder->ConcurrentGetRawLatticeSingleChannel(task->ichannel,
                                                      &task->lat);
-  // We are done using that channel. Putting it back into the free channels
+  // We are done using that channel. Putting it back into the free
+  // channels
   {
     std::lock_guard<std::mutex> lk(channel_state->free_channels_mutex);
     channel_state->free_channels.push_back(task->ichannel);
@@ -754,14 +786,13 @@ void BatchedThreadedNnet3CudaPipeline::CompleteTask(CudaDecoder *cuda_decoder,
     task->callback(task->dlat);
 
   task->finished = true;
-  // Clear working data (raw input, posteriors, etc.)
-  task->task_data.reset();
 
   {
     std::lock_guard<std::mutex> lk(group_tasks_mutex_);
     --all_group_tasks_not_done_;
     int32 left_in_group = --group_tasks_not_done_[task->group];
-    //    std::cout << "left in group " << task->group << " " << left_in_group
+    //    std::cout << "left in group " << task->group << " " <<
+    //    left_in_group
     //    << std::endl;
     if (left_in_group == 0) group_done_cv_.notify_all();
   }
@@ -783,12 +814,10 @@ void BatchedThreadedNnet3CudaPipeline::ExecuteWorker(int threadId) {
 
   KALDI_LOG << "CudaDecoder batch_size=" << config_.max_batch_size
             << " num_channels=" << config_.num_channels;
-  // Data structures that are reusable across decodes but unique to each thread
+  // Data structures that are reusable across decodes but unique to each
+  // thread
   CudaDecoder cuda_decoder(cuda_fst_, config_.decoder_opts,
                            config_.max_batch_size, config_.num_channels);
-  if (config_.num_decoder_copy_threads > 0)
-    cuda_decoder.SetThreadPoolAndStartCPUWorkers(
-        work_pool_, config_.num_decoder_copy_threads);
   nnet3::NnetBatchComputer computer(config_.compute_opts, am_nnet_->GetNnet(),
                                     am_nnet_->Priors());
 
@@ -817,29 +846,26 @@ void BatchedThreadedNnet3CudaPipeline::ExecuteWorker(int threadId) {
 
   numStarted_++;  // Tell master I have started
 
-  // main control loop.  At each iteration a thread will see if it has been
-  // asked to shut
-  // down.  If it has it will exit.  This loop condition will only be processed
-  // if all
-  // other work assigned to this thread has been processed.
+  // main control loop.  At each iteration a thread will see if it has
+  // been asked to shut down.  If it has it will exit.  This loop
+  // condition will only be processed if all other work assigned to this
+  // thread has been processed.
   while (!exit_) {
-    // main processing loop.  At each iteration the thread will do the
-    // following:
-    // 1) Attempt to grab more work.
-    // 2) Initialize any new work
-    // do
-    // 3) Process work in a batch
-    // while(free lanes < drain_count)
-    // 4) Postprocess any completed work
+    // main processing loop.  At each iteration the thread will do
+    // the following: 1) Attempt to grab more work. 2) Initialize
+    // any new work do 3) Process work in a batch while(free lanes <
+    // drain_count) 4) Postprocess any completed work
     do {
       // 1) attempt to fill the batch
-      if (tasks_front_ != tasks_back_) {  // if work is available grab more work
+      if (tasks_front_ != tasks_back_) {  // if work is available grab more
+                                          // work
 
-        int start = tasks.size();  // Save the current assigned tasks size
+        int start = tasks.size();  // Save the current assigned
+                                   // tasks size
 
         AquireAdditionalTasks(cuda_decoder, channel_state, tasks);
-
-        // New tasks are now in the in tasks[start,tasks.size())
+        // New tasks are now in the in
+        // tasks[start,tasks.size())
         if (start != tasks.size()) {  // if there are new tasks
           if (config_.gpu_feature_extract)
             ComputeBatchFeatures(start, tasks, feature_pipeline);
@@ -849,9 +875,10 @@ void BatchedThreadedNnet3CudaPipeline::ExecuteWorker(int threadId) {
       }  // end if (tasks_front_!=tasks_back_)
 
       // check if there is no active work on this thread.
-      // This can happen if another thread was assigned the work.
+      // This can happen if another thread was assigned the
+      // work.
       if (tasks.size() == 0) {
-        // Thread is spinning waiting for work.  Backoff.
+        // Thread is spinning waiting for work. Backoff.
         kaldi::Sleep(SLEEP_BACKOFF_S);
         break;
       }
@@ -859,45 +886,55 @@ void BatchedThreadedNnet3CudaPipeline::ExecuteWorker(int threadId) {
       // try/catch to catch and report errors inside decoder.
       // errors can be recoverable or non-recoverable
       // unrecoverable errors will assert
-      // recoverable errors will cancel the batch (output empty lattice)
-      // and print a warning.
-      // There should be no errors and this is just a sanity check
+      // recoverable errors will cancel the batch (output
+      // empty lattice) and print a warning. There should be
+      // no errors and this is just a sanity check
       try {
-        // This is in a loop in case we want to drain the batch a little.
-        // Draining the batch will cause initialization tasks to be batched.
+        // This is in a loop in case we want to drain
+        // the batch a little. Draining the batch will
+        // cause initialization tasks to be batched.
         do {
-          // 3) Process outstanding work in a batch
-          // Advance decoding on all open channels
+          // 3) Process outstanding work in a
+          // batch Advance decoding on all open
+          // channels
           cuda_decoder.AdvanceDecoding(channel_state.channels, decodables);
 
-          // Adjust channel state for all completed decodes
+          // Adjust channel state for all
+          // completed decodes
           RemoveCompletedChannels(cuda_decoder, channel_state, decodables,
                                   tasks);
-          // do loop repeates until we meet drain size or run out of work
+          // do loop repeates until we meet drain
+          // size or run out of work
         } while (config_.max_batch_size - channel_state.channels.size() <
                      config_.batch_drain_size &&
                  channel_state.channels.size() > 0);
-        // 4) Post process work.  This reorders completed work to the end,
-        // copies results outs, and cleans up data structures
+        // 4) Post process work.  This reorders
+        // completed work to the end, copies results
+        // outs, and cleans up data structures
         PostDecodeProcessing(cuda_decoder, channel_state, decodables, tasks);
 
       } catch (CudaDecoderException e) {
-        // Code to catch errors.  Most errors are unrecoverable but a user can
-        // mark them
-        // recoverable which will cancel the entire batch but keep processing.
+        // Code to catch errors.  Most errors are
+        // unrecoverable but a user can mark them
+        // recoverable which will cancel the entire
+        // batch but keep processing.
         if (!e.recoverable) {
           bool UNRECOVERABLE_EXCEPTION = false;
-          KALDI_LOG << "Error unrecoverable cuda decoder error '" << e.what()
-                    << "'\n";
+          KALDI_LOG << "Error unrecoverable cuda "
+                       "decoder error '"
+                    << e.what() << "'\n";
           KALDI_ASSERT(UNRECOVERABLE_EXCEPTION);
         } else {
-          KALDI_LOG << "Error recoverable cuda decoder error '" << e.what()
-                    << "'\n";
-          KALDI_LOG << "    Aborting batch for recovery.  Canceling the "
+          KALDI_LOG << "Error recoverable cuda "
+                       "decoder error '"
+                    << e.what() << "'\n";
+          KALDI_LOG << "    Aborting batch for "
+                       "recovery.  Canceling the "
                        "following decodes:\n";
           // Cancel all outstanding tasks
           for (int i = 0; i < tasks.size(); i++) {
-            // move all channels to free channel queue
+            // move all channels to free
+            // channel queue
             ChannelId channel = channel_state.channels[i];
             {
               std::lock_guard<std::mutex> lk(channel_state.free_channels_mutex);
@@ -913,7 +950,8 @@ void BatchedThreadedNnet3CudaPipeline::ExecuteWorker(int threadId) {
             // cleanup memory
             delete decodables[i];
 
-            // notifiy master decode is finished
+            // notifiy master decode is
+            // finished
             task.finished = true;
           }
           tasks.resize(0);
diff --git a/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.h b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.h
index 6401b24b7db..694e4728eeb 100644
--- a/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.h
+++ b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.h
@@ -15,20 +15,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef KALDI_CUDA_DECODER_BATCHED_THREADED_CUDA_DECODER_H_
-#define KALDI_CUDA_DECODER_BATCHED_THREADED_CUDA_DECODER_H_
+#ifndef KALDI_CUDA_DECODER_BATCHED_THREADED_NNET3_CUDA_PIPELINE_H_
+#define KALDI_CUDA_DECODER_BATCHED_THREADED_NNET3_CUDA_PIPELINE_H_
 
 #include <atomic>
 #include <thread>
 
 #include "cudadecoder/cuda-decoder.h"
-#include "decodable-cumatrix.h"
+#include "cudadecoder/decodable-cumatrix.h"
+#include "cudadecoder/thread-pool.h"
+#include "cudafeat/online-cuda-feature-pipeline.h"
 #include "feat/wave-reader.h"
 #include "lat/determinize-lattice-pruned.h"
 #include "nnet3/nnet-batch-compute.h"
 #include "online2/online-nnet2-feature-pipeline.h"
-#include "cudafeat/online-cuda-feature-pipeline.h"
-#include "thread-pool.h"
+
+// This pipeline is deprecated and will be removed. Please switch to
+// batched-threaded-nnet3-cuda-pipeline2
 
 // If num_channels sets to automatic,
 // num_channels = [this define] * max_batch_size
@@ -54,8 +57,9 @@ struct BatchedThreadedNnet3CudaPipelineConfig {
         num_worker_threads(20),
         determinize_lattice(true),
         max_pending_tasks(4000),
+        pending_queue_padding(10),
         num_decoder_copy_threads(2),
-        gpu_feature_extract(true) {};
+        gpu_feature_extract(true){};
   void Register(OptionsItf *po) {
     po->Register("max-batch-size", &max_batch_size,
                  "The maximum batch size to be used by the decoder. "
@@ -76,19 +80,22 @@ struct BatchedThreadedNnet3CudaPipelineConfig {
                  "batches pre/post decode work.");
     po->Register("cuda-control-threads", &num_control_threads,
                  "The number of pipeline control threads for the CUDA work. "
-                 "e.g. 2 control threads -> 2 independent CUDA pipeline (nnet3 "
+                 "e.g. 2 control threads -> 2 independent CUDA pipeline "
+                 "(nnet3 "
                  "and decoder).");
-    po->Register(
-        "cuda-worker-threads", &num_worker_threads,
-        "The total number of CPU threads launched to process CPU tasks.");
+    po->Register("cuda-worker-threads", &num_worker_threads,
+                 "The total number of CPU threads launched to "
+                 "process CPU tasks.");
     po->Register("determinize-lattice", &determinize_lattice,
                  "Determinize the lattice before output.");
     po->Register("max-outstanding-queue-length", &max_pending_tasks,
-                 "Number of files to allow to be outstanding at a time. When "
+                 "Number of files to allow to be outstanding at a time. "
+                 "When "
                  "the number of files is larger than this handles will be "
                  "closed before opening new ones in FIFO order.");
     po->Register("cuda-decoder-copy-threads", &num_decoder_copy_threads,
-                 "Advanced - Number of worker threads used in the decoder for "
+                 "Advanced - Number of worker threads used in the "
+                 "decoder for "
                  "the host to host copies.");
     po->Register("gpu-feature-extract", &gpu_feature_extract,
                  "Extract features on the GPU.  This reduces CPU overhead "
@@ -107,6 +114,7 @@ struct BatchedThreadedNnet3CudaPipelineConfig {
   int num_worker_threads;
   bool determinize_lattice;
   int max_pending_tasks;
+  int pending_queue_padding;
   int num_decoder_copy_threads;
   bool gpu_feature_extract;
 
@@ -116,166 +124,182 @@ struct BatchedThreadedNnet3CudaPipelineConfig {
           max_batch_size * KALDI_CUDA_DECODER_CHANNELS_BATCH_SIZE_RATIO;
   }
 
-  OnlineNnet2FeaturePipelineConfig feature_opts;      // constant readonly
-  CudaDecoderConfig decoder_opts;                     // constant readonly
-  fst::DeterminizeLatticePhonePrunedOptions det_opts; // constant readonly
-  nnet3::NnetBatchComputerOptions compute_opts;       // constant readonly
+  OnlineNnet2FeaturePipelineConfig feature_opts;       // constant readonly
+  CudaDecoderConfig decoder_opts;                      // constant readonly
+  fst::DeterminizeLatticePhonePrunedOptions det_opts;  // constant readonly
+  nnet3::NnetBatchComputerOptions compute_opts;        // constant readonly
 };
 
 /*
- * BatchedThreadedNnet3CudaPipeline uses multiple levels of parallelism in order to
- * decode quickly on CUDA GPUs. This is the primary interface for cuda decoding.
- * For examples of how to use this decoder see cudadecoder/README and
+ * BatchedThreadedNnet3CudaPipeline uses multiple levels of parallelism in order
+ * to decode quickly on CUDA GPUs. This is the primary interface for cuda
+ * decoding. For examples of how to use this decoder see cudadecoder/README and
  * cudadecoderbin/batched-wav-nnet3-cuda.cc
  */
 class BatchedThreadedNnet3CudaPipeline {
-public:
- BatchedThreadedNnet3CudaPipeline(
-     const BatchedThreadedNnet3CudaPipelineConfig &config)
-     : config_(config), all_group_tasks_not_done_(0) {
-   config_.ComputeConfig();
- };
-
- // allocates reusable objects that are common across all decodings
- void Initialize(const fst::Fst<fst::StdArc> &decode_fst,
-                 const nnet3::AmNnetSimple &nnet,
-                 const TransitionModel &trans_model);
-
- // deallocates reusable objects
- void Finalize();
-
- // query a specific key to see if compute on it is complete
- bool isFinished(const std::string &key);
-
- // remove an audio file from the decoding and clean up resources
- void CloseDecodeHandle(const std::string &key);
- void CloseAllDecodeHandlesForGroup(const std::string &group);
- void CloseAllDecodeHandles();
-
- // Adds a decoding task to the decoder
- // When passing in a vector of data, the caller must ensure the data exists
- // until the CloseDecodeHandle/WaitForAllTasks is called
- // callback is called once task is done and we pass it the final lattice
- // callback can be used to compute lattice rescoring, find best path in
- // lattice, writing lattice to disk, etc.
- // Important: callback is launched in the threadpool. It must be threadsafe.
- // For instance, if writing to disk, or to stdout,
- // use a lock:
- // e.g. :
- // {
- // 	std::lock_guard<std::mutex> lock(global_mutex);
- // 	// write lattice to disk
- //    // lock is released in the destructor of lock_guard<>
- // }
- void OpenDecodeHandle(
-     const std::string &key, const WaveData &wave_data,
-     const std::string &group = std::string(),
-     const std::function<void(CompactLattice &clat)> &callback =
-         std::function<void(CompactLattice &clat)>());
- // When passing in a vector of data, the caller must ensure the data exists
- // until the CloseDecodeHandle is called
- void OpenDecodeHandle(
-     const std::string &key, const VectorBase<BaseFloat> &wave_data,
-     float sample_rate, const std::string &group = std::string(),
-     const std::function<void(CompactLattice &clat)> &callback =
-         std::function<void(CompactLattice &clat)>());
-
- // Copies the raw lattice for decoded handle "key" into lat
- bool GetRawLattice(const std::string &key, Lattice *lat);
- // Determinizes raw lattice and returns a compact lattice
- bool GetLattice(const std::string &key, CompactLattice *lat);
-
- int32 GetNumberOfTasksPending();
-
- // Wait for all tasks to complete
- void WaitForAllTasks();
- // Wait for all tasks in the group to complete
- void WaitForGroup(const std::string &group);
- // Check if a group is available. Returns if not.
- bool IsGroupCompleted(const std::string &group);
- // Wait for any group to complete, then returns which group completed
- std::string WaitForAnyGroup();
- // Check if any group is available. If one is available, set its name in *group
- bool IsAnyGroupCompleted(std::string *group);
- inline int NumPendingTasks() {
-   return (tasks_back_ - tasks_front_ + config_.max_pending_tasks + 1) %
-          (config_.max_pending_tasks + 1);
+ public:
+  BatchedThreadedNnet3CudaPipeline(
+      const BatchedThreadedNnet3CudaPipelineConfig &config)
+      : config_(config), all_group_tasks_not_done_(0) {
+    config_.ComputeConfig();
+  };
+
+  // allocates reusable objects that are common across all decodings
+  void Initialize(const fst::Fst<fst::StdArc> &decode_fst,
+                  const nnet3::AmNnetSimple &nnet,
+                  const TransitionModel &trans_model);
+
+  // deallocates reusable objects
+  void Finalize();
+
+  // query a specific key to see if compute on it is complete
+  bool isFinished(const std::string &key);
+
+  // remove an audio file from the decoding and clean up resources
+  void CloseDecodeHandle(const std::string &key);
+  void CloseAllDecodeHandlesForGroup(const std::string &group);
+  void CloseAllDecodeHandles();
+
+  // Adds a decoding task to the decoder
+  // When passing in a vector of data, the caller must ensure the data
+  // exists until the CloseDecodeHandle/WaitForAllTasks is called callback
+  // is called once task is done and we pass it the final lattice callback
+  // can be used to compute lattice rescoring, find best path in lattice,
+  // writing lattice to disk, etc. Important: callback is launched in the
+  // threadpool. It must be threadsafe. For instance, if writing to disk,
+  // or to stdout, use a lock: e.g. :
+  // {
+  // 	std::lock_guard<std::mutex> lock(global_mutex);
+  // 	// write lattice to disk
+  //    // lock is released in the destructor of lock_guard<>
+  // }
+  void OpenDecodeHandle(
+      const std::string &key, const WaveData &wave_data,
+      const std::string &group = std::string(),
+      const std::function<void(CompactLattice &clat)> &callback =
+          std::function<void(CompactLattice &clat)>());
+  // When passing in a vector of data, the caller must ensure the data
+  // exists until the CloseDecodeHandle is called
+  void OpenDecodeHandle(
+      const std::string &key, const VectorBase<BaseFloat> &wave_data,
+      float sample_rate, const std::string &group = std::string(),
+      const std::function<void(CompactLattice &clat)> &callback =
+          std::function<void(CompactLattice &clat)>());
+
+  // Copies the raw lattice for decoded handle "key" into lat
+  bool GetRawLattice(const std::string &key, Lattice *lat);
+  // Determinizes raw lattice and returns a compact lattice
+  bool GetLattice(const std::string &key, CompactLattice *lat);
+
+  int32 GetNumberOfTasksPending();
+
+  // Wait for all tasks to complete
+  void WaitForAllTasks();
+  // Wait for all tasks in the group to complete
+  void WaitForGroup(const std::string &group);
+  // Check if a group is available. Returns if not.
+  bool IsGroupCompleted(const std::string &group);
+  // Wait for any group to complete, then returns which group completed
+  std::string WaitForAnyGroup();
+  // Check if any group is available. If one is available, set its name in
+  // *group
+  bool IsAnyGroupCompleted(std::string *group);
+  inline int NumPendingTasks() {
+    return (tasks_back_ - tasks_front_ + config_.max_pending_tasks +
+            config_.pending_queue_padding) %
+           (config_.max_pending_tasks + config_.pending_queue_padding);
+  };
+
+ private:
+  // Task data used during computation
+  // Is cleared when task is completed
+  struct TaskData {
+    Vector<BaseFloat> raw_data;  // Wave input data when wave_reader passed
+    std::shared_ptr<SubVector<BaseFloat>>
+        wave_samples;  // Used as a pointer to either the raw
+                       // data or the samples passed
+    float sample_frequency;
+    Vector<BaseFloat> ivector_features_cpu;
+    Matrix<BaseFloat> input_features_cpu;
+    CuVector<BaseFloat> ivector_features;
+    CuMatrix<BaseFloat> input_features;
+    CuMatrix<BaseFloat> posteriors;
+
+    TaskData(const WaveData &wave_data_in)
+        : wave_samples(NULL), sample_frequency(0) {
+      int rows = wave_data_in.Data().NumRows();
+      int cols = wave_data_in.Data().NumCols();
+      int stride = wave_data_in.Data().Stride();
+
+      raw_data.Resize(rows * cols, kUndefined);
+
+      if (stride == cols) {
+        // contigious so use one large memory copy
+        memcpy(raw_data.Data(), wave_data_in.Data().Data(),
+               rows * cols * sizeof(BaseFloat));
+      } else {
+        // data is not contigious so we need to copy one
+        // row at a time
+        for (int i = 0; i < rows; i++) {
+          memcpy(raw_data.Data() + i * cols, wave_data_in.Data().RowData(i),
+                 cols * sizeof(BaseFloat));
+        }
+      }
+      wave_samples =
+          std::make_shared<SubVector<BaseFloat>>(raw_data, 0, raw_data.Dim());
+      sample_frequency = wave_data_in.SampFreq();
+    };
+
+    // Init when raw data is passed in.  This data is shallow
+    // copied.
+    TaskData(const VectorBase<BaseFloat> &wave_data_in, float sample_rate) {
+      wave_samples = std::make_shared<SubVector<BaseFloat>>(wave_data_in, 0,
+                                                            wave_data_in.Dim());
+      sample_frequency = sample_rate;
+    }
   };
 
-private:
- // Task data used during computation
- // Is cleared when task is completed
- struct TaskData {
-   Vector<BaseFloat> raw_data;  // Wave input data when wave_reader passed
-   std::shared_ptr<SubVector<BaseFloat>>
-       wave_samples;  // Used as a pointer to either the raw
-                      // data or the samples passed
-   float sample_frequency;
-   Vector<BaseFloat> ivector_features_cpu;
-   Matrix<BaseFloat> input_features_cpu;
-   CuVector<BaseFloat> ivector_features;
-   CuMatrix<BaseFloat> input_features;
-   CuMatrix<BaseFloat> posteriors;
-
-   TaskData(const WaveData &wave_data_in)
-       : wave_samples(NULL), sample_frequency(0) {
-     raw_data.Resize(
-         wave_data_in.Data().NumRows() * wave_data_in.Data().NumCols(),
-         kUndefined);
-     memcpy(raw_data.Data(), wave_data_in.Data().Data(),
-            raw_data.Dim() * sizeof(BaseFloat));
-     wave_samples =
-         std::make_shared<SubVector<BaseFloat>>(raw_data, 0, raw_data.Dim());
-     sample_frequency = wave_data_in.SampFreq();
-   };
-
-   // Init when raw data is passed in.  This data is shallow copied.
-   TaskData(const VectorBase<BaseFloat> &wave_data_in, float sample_rate) {
-     wave_samples = std::make_shared<SubVector<BaseFloat>>(wave_data_in, 0,
-                                                           wave_data_in.Dim());
-     sample_frequency = sample_rate;
-   }
- };
-
- // State needed for each decode task.
- // This state can be passed around by reference or pointer safely
- // and provides a convieniet way to store all decoding state.
- struct TaskState {
-   std::string key;
-   std::string group;  // group for that task. "" is default
-   bool error;
-   std::string error_string;
-
-   std::shared_ptr<TaskData> task_data;
-
-   int32 ichannel;              // associated CudaDecoder channel
-   Lattice lat;                 // Raw Lattice output
-   CompactLattice dlat;         // Determinized lattice output.  Only set if
-                                // determinize-lattice=true
-   std::atomic<bool> finished;  // Tells master thread if task has finished
-                                // execution
-
-   bool determinized;
-
-   // (optional) callback is called task is finished and we have a lattice
-   // ready
-   // that way we can compute all CPU tasks in the threadpool (lattice
-   // rescoring, find best path in lattice, etc.)
-   std::function<void(CompactLattice &clat)> callback;
-
-   TaskState() : error(false), finished(false), determinized(false) {}
-
-   // Init when wave data is passed directly in.  This data is deep copied.
-   void Init(const std::string &key_in, const WaveData &wave_data_in) {
-     task_data = std::make_shared<TaskData>(wave_data_in);
-     key = key_in;
-   };
-   // Init when raw data is passed in.  This data is shallow copied.
-   void Init(const std::string &key_in,
-             const VectorBase<BaseFloat> &wave_data_in, float sample_rate) {
-     task_data = std::make_shared<TaskData>(wave_data_in, sample_rate);
-     key = key_in;
-   }
+  // State needed for each decode task.
+  // This state can be passed around by reference or pointer safely
+  // and provides a convieniet way to store all decoding state.
+  struct TaskState {
+    std::string key;
+    std::string group;  // group for that task. "" is default
+    bool error;
+    std::string error_string;
+
+    std::unique_ptr<TaskData> task_data;
+
+    int32 ichannel;              // associated CudaDecoder channel
+    Lattice lat;                 // Raw Lattice output
+    CompactLattice dlat;         // Determinized lattice output.  Only set
+                                 // if determinize-lattice=true
+    std::atomic<bool> finished;  // Tells master thread if task has
+                                 // finished execution
+
+    bool determinized;
+
+    // (optional) callback is called task is finished and we have a
+    // lattice ready that way we can compute all CPU tasks in the
+    // threadpool (lattice rescoring, find best path in lattice,
+    // etc.)
+    std::function<void(CompactLattice &clat)> callback;
+
+    TaskState() : error(false), finished(false), determinized(false) {}
+
+    // Init when wave data is passed directly in.  This data is deep
+    // copied.
+    void Init(const std::string &key_in, const WaveData &wave_data_in) {
+      task_data.reset(new TaskData(wave_data_in));
+      key = key_in;
+    };
+    // Init when raw data is passed in.  This data is shallow
+    // copied.
+    void Init(const std::string &key_in,
+              const VectorBase<BaseFloat> &wave_data_in, float sample_rate) {
+      task_data.reset(new TaskData(wave_data_in, sample_rate));
+      key = key_in;
+    }
   };
 
   // Creating a new task in the hashmaps
@@ -292,8 +316,8 @@ class BatchedThreadedNnet3CudaPipeline {
   // Adds task to the PendingTaskQueue
   void AddTaskToPendingTaskQueue(TaskState *task);
 
-  // Attempts to fill the batch from the task queue.  May not fully fill the
-  // batch.
+  // Attempts to fill the batch from the task queue.  May not fully fill
+  // the batch.
   void AquireAdditionalTasks(CudaDecoder &cuda_decoder,
                              ChannelState &channel_state,
                              std::vector<TaskState *> &tasks);
@@ -302,8 +326,7 @@ class BatchedThreadedNnet3CudaPipeline {
   void ComputeOneFeatureCPU(TaskState *task);
 
   // Computes features across the tasks[first,tasks.size()
-  void ComputeBatchFeatures(int32 first,
-                            std::vector<TaskState *> &tasks,
+  void ComputeBatchFeatures(int32 first, std::vector<TaskState *> &tasks,
                             OnlineCudaFeaturePipeline &feature_pipeline);
 
   // Computes Nnet across the current decode batch
@@ -317,11 +340,10 @@ class BatchedThreadedNnet3CudaPipeline {
 
   // Removes all completed channels from the channel list.
   // Also enqueues up work for post processing
-  void
-  RemoveCompletedChannels(CudaDecoder &cuda_decoder,
-                          ChannelState &channel_state,
-                          std::vector<CudaDecodableInterface *> &decodables,
-                          std::vector<TaskState *> &tasks);
+  void RemoveCompletedChannels(
+      CudaDecoder &cuda_decoder, ChannelState &channel_state,
+      std::vector<CudaDecodableInterface *> &decodables,
+      std::vector<TaskState *> &tasks);
 
   // For each completed decode perform post processing work and clean up
   void PostDecodeProcessing(CudaDecoder &cuda_decoder,
@@ -336,8 +358,8 @@ class BatchedThreadedNnet3CudaPipeline {
 
   // Determinize one lattice
   void DeterminizeOneLattice(TaskState *task);
-  // Thread execution function.  This is a single worker thread which processes
-  // input.
+  // Thread execution function.  This is a single worker thread which
+  // processes input.
   void ExecuteWorker(int threadId);
 
   BatchedThreadedNnet3CudaPipelineConfig config_;
@@ -348,19 +370,19 @@ class BatchedThreadedNnet3CudaPipeline {
   nnet3::DecodableNnetSimpleLoopedInfo *decodable_info_;
   OnlineNnet2FeaturePipelineInfo *feature_info_;
 
-  std::mutex tasks_mutex_; // protects tasks_front_ and pending_task_queue_ for
-                           // workers
-  std::mutex tasks_add_mutex_; // protect OpenDecodeHandle if multiple threads
-                               // access
-  std::mutex tasks_lookup_mutex_; // protext tasks_lookup map
+  std::mutex tasks_mutex_;         // protects tasks_front_ and
+                                   // pending_task_queue_ for workers
+  std::mutex tasks_add_mutex_;     // protect OpenDecodeHandle if multiple
+                                   // threads access
+  std::mutex tasks_lookup_mutex_;  // protext tasks_lookup map
   std::condition_variable tasks_lookup_cv_;
   std::atomic<int> tasks_front_, tasks_back_;
   TaskState **pending_task_queue_;
 
-  std::atomic<bool> exit_;      // signals threads to exit
-  std::atomic<int> numStarted_; // signals master how many threads have started
+  std::atomic<bool> exit_;       // signals threads to exit
+  std::atomic<int> numStarted_;  // signals master how many threads have started
 
-  ThreadPool *work_pool_; // thread pool for CPU work
+  ThreadPool *work_pool_;  // thread pool for CPU work
   std::map<std::string, int32> group_tasks_not_done_;
   int32 all_group_tasks_not_done_;
   std::mutex group_tasks_mutex_;
@@ -368,12 +390,12 @@ class BatchedThreadedNnet3CudaPipeline {
   std::unordered_multimap<std::string, TaskState *>
       tasks_group_lookup_;  // group -> list of tasks
   std::unordered_map<std::string, TaskState>
-      tasks_lookup_;                              // Contains a map of
-                                                  // utterance to TaskState
-  std::vector<std::thread> thread_contexts_;      // A list of thread contexts
+      tasks_lookup_;                          // Contains a map of
+                                              // utterance to TaskState
+  std::vector<std::thread> thread_contexts_;  // A list of thread contexts
 };
 
 }  // end namespace cuda_decoder
-} // end namespace kaldi.
+}  // end namespace kaldi.
 
 #endif  // KALDI_CUDA_DECODER_BATCHED_THREADED_CUDA_DECODER_H_
diff --git a/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline2.cc b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline2.cc
new file mode 100644
index 00000000000..95171f32fb4
--- /dev/null
+++ b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline2.cc
@@ -0,0 +1,296 @@
+// cudadecoder/batched-threaded-nnet3-cuda-pipeline2.cc
+//
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+
+#define KALDI_CUDA_DECODER_WAIT_FOR_TASKS_US 10000
+#define KALDI_CUDA_DECODER_WAIT_FOR_NEW_TASKS_US 100
+
+#include "cudadecoder/batched-threaded-nnet3-cuda-pipeline2.h"
+#include <nvToolsExt.h>
+
+namespace kaldi {
+namespace cuda_decoder {
+
+void BatchedThreadedNnet3CudaPipeline2::BuildBatchFromCurrentTasks() {
+  batch_corr_ids_.clear();
+  batch_is_last_chunk_.clear();
+  batch_is_first_chunk_.clear();
+  if (use_online_features_) {
+    batch_wave_samples_.clear();
+  } else {
+    batch_features_.clear();
+    batch_ivectors_.clear();
+    batch_n_input_frames_valid_.clear();
+  }
+  for (size_t task_id = 0; task_id < current_tasks_.size();) {
+    UtteranceTask &task = current_tasks_[task_id];
+    int32 total_n_input;
+    if (use_online_features_) {
+      KALDI_ASSERT(task.h_wave);
+      SubVector<BaseFloat> &h_wave = *task.h_wave;
+      total_n_input = h_wave.Dim();
+    } else {
+      total_n_input = task.d_features->NumRows();
+    }
+
+    int32 samp_offset = task.samp_offset;
+    int32 samp_remaining = total_n_input - samp_offset;
+    int32 num_samp = std::min(n_input_per_chunk_, samp_remaining);
+    KALDI_ASSERT(num_samp > 0);
+    bool is_last_chunk = (samp_remaining == num_samp);
+    bool is_first_chunk = (task.samp_offset == 0);
+    CorrelationID corr_id = task.corr_id;
+    task.samp_offset += num_samp;
+
+    batch_corr_ids_.push_back(corr_id);
+    batch_is_last_chunk_.push_back(is_last_chunk);
+    batch_is_first_chunk_.push_back(is_first_chunk);
+
+    if (use_online_features_) {
+      SubVector<BaseFloat> &h_wave = *task.h_wave;
+      SubVector<BaseFloat> wave_part(h_wave, samp_offset, num_samp);
+      batch_wave_samples_.push_back(wave_part);
+    } else {
+      batch_features_.push_back(task.d_features->Data() +
+                                samp_offset * task.d_features->Stride());
+      if (task_id == 0)
+        batch_features_frame_stride_ = task.d_features->Stride();
+      else
+        KALDI_ASSERT(batch_features_frame_stride_ == task.d_features->Stride());
+      batch_ivectors_.push_back(task.d_ivectors->Data());
+      batch_n_input_frames_valid_.push_back(num_samp);
+    }
+
+    // If last chunk, moving the task to tasks_last_chunk_
+    if (is_last_chunk) {
+      tasks_last_chunk_.push_back(std::move(task));
+      size_t last_task_id = current_tasks_.size() - 1;
+      current_tasks_[task_id] = std::move(current_tasks_[last_task_id]);
+      current_tasks_.pop_back();
+    } else {
+      // If it was the last chunk, we replaced the current
+      // task with another one we must process that task_id
+      // again (because it is now another task) If it was not
+      // the last chunk, then we must take care of the next
+      // task_id
+      ++task_id;
+    }
+  }
+}
+
+void BatchedThreadedNnet3CudaPipeline2::WaitForAllTasks() {
+  while (n_tasks_not_done_.load() != 0) {
+    usleep(KALDI_CUDA_DECODER_WAIT_FOR_TASKS_US);
+  }
+}
+
+void BatchedThreadedNnet3CudaPipeline2::CreateTaskGroup(
+    const std::string &group) {
+  std::lock_guard<std::mutex> lk(n_group_tasks_not_done_m_);
+  bool inserted;
+  std::unique_ptr<std::atomic<int>> group_cnt;
+  group_cnt.reset(new std::atomic<int>(0));
+  std::tie(std::ignore, inserted) =
+      n_group_tasks_not_done_.emplace(group, std::move(group_cnt));
+  KALDI_ASSERT("Group is already in use" && inserted);
+}
+
+void BatchedThreadedNnet3CudaPipeline2::DestroyTaskGroup(
+    const std::string &group) {
+  std::lock_guard<std::mutex> lk(n_group_tasks_not_done_m_);
+  int nerased = n_group_tasks_not_done_.erase(group);
+  KALDI_ASSERT("Group does not exist" && (nerased == 1));
+}
+
+void BatchedThreadedNnet3CudaPipeline2::WaitForGroup(const std::string &group) {
+  std::atomic<int> *n_not_done;
+  {
+    std::lock_guard<std::mutex> lk(n_group_tasks_not_done_m_);
+    auto it = n_group_tasks_not_done_.find(group);
+    KALDI_ASSERT("Group does not exist. Call CreateTaskGroup() first" &&
+                 (it != n_group_tasks_not_done_.end()));
+    n_not_done = it->second.get();
+  }
+
+  while (n_not_done->load(std::memory_order_consume) != 0)
+    usleep(KALDI_CUDA_DECODER_WAIT_FOR_TASKS_US);
+}
+
+void BatchedThreadedNnet3CudaPipeline2::DecodeWithCallback(
+    const std::string &key, const std::shared_ptr<WaveData> &wave_data,
+    std::unique_ptr<SubVector<BaseFloat>> &&h_wave,
+    const std::function<void(CompactLattice &)> &callback,
+    const std::string &group) {
+  if (wave_data) {
+    KALDI_ASSERT(
+        "Mismatch in model and utt frequency" &&
+        (wave_data->SampFreq() == cuda_online_pipeline_.GetModelFrequency()));
+  }
+
+  UtteranceTask task;
+  if (wave_data) task.wave_data = wave_data;
+  if (h_wave) {
+    task.h_wave = std::move(h_wave);
+  } else {
+    KALDI_ASSERT(wave_data);
+    task.h_wave.reset(new SubVector<BaseFloat>(wave_data->Data(), 0));
+  }
+
+  if (task.h_wave->Dim() == 0) return;  // nothing to do
+  n_tasks_not_done_.fetch_add(1);
+  task.key = key;
+  task.samp_offset = 0;
+  task.corr_id = corr_id_cnt_.fetch_add(
+      1);  // at 5000 files/s, expected to overflow in ~116 million years
+  task.callback = callback;
+
+  if (!group.empty()) {
+    // Need to add it to group
+    std::lock_guard<std::mutex> lk(n_group_tasks_not_done_m_);
+    auto it = n_group_tasks_not_done_.find(group);
+    KALDI_ASSERT("Group does not exist. Call CreateTaskGroup() first" &&
+                 (it != n_group_tasks_not_done_.end()));
+    it->second->fetch_add(1);           // adding current task
+    task.group_cnt = it->second.get();  // will be used to --cnt
+  } else {
+    task.group_cnt = NULL;
+  }
+
+  if (use_online_features_) {
+    // If we use online ivectors, we can just add it to the
+    // outstanding queue. ivectors and mfcc will be computed in the
+    // online pipeline
+    std::lock_guard<std::mutex> lk(outstanding_utt_m_);
+    outstanding_utt_.push(std::move(task));
+  } else {
+    // Otherwise we first need to compute ivectors and mfcc for the
+    // full audio file Adding it to the preprocessing queue
+    std::lock_guard<std::mutex> lk(preprocessing_utt_queue_m_);
+    preprocessing_utt_queue_.push(std::move(task));
+  }
+}
+
+void BatchedThreadedNnet3CudaPipeline2::ComputeOfflineFeatures() {
+  bool iterate = true;
+  do {
+    UtteranceTask task;
+    {
+      std::lock_guard<std::mutex> lk(preprocessing_utt_queue_m_);
+      if (preprocessing_utt_queue_.empty()) {
+        iterate = false;
+        break;
+      }
+
+      task = std::move(preprocessing_utt_queue_.front());
+      preprocessing_utt_queue_.pop();
+    }
+    KALDI_ASSERT(task.h_wave);
+    SubVector<BaseFloat> &h_wave = *task.h_wave;
+    int32 nsamp = h_wave.Dim();
+
+    cudaEventSynchronize(wave_buffer_->evt);
+    if (nsamp > wave_buffer_->size) {
+      wave_buffer_->Reallocate(nsamp);
+    }
+    std::memcpy(wave_buffer_->h_data, h_wave.Data(),
+                h_wave.Dim() * sizeof(BaseFloat));
+    cudaMemcpyAsync(wave_buffer_->d_data, wave_buffer_->h_data,
+                    sizeof(BaseFloat) * nsamp, cudaMemcpyHostToDevice,
+                    cudaStreamPerThread);
+
+    task.d_features.reset(new CuMatrix<BaseFloat>());
+    task.d_ivectors.reset(new CuVector<BaseFloat>());
+    CuSubVector<BaseFloat> wrapper(wave_buffer_->d_data, nsamp);
+    cuda_features_->ComputeFeatures(
+        wrapper, cuda_online_pipeline_.GetModelFrequency(),
+        task.d_features.get(), task.d_ivectors.get());
+    cudaEventRecord(wave_buffer_->evt, cudaStreamPerThread);
+    std::swap(wave_buffer_, next_wave_buffer_);
+    if (task.wave_data) task.wave_data.reset();  // delete wave samples on host
+    {
+      std::lock_guard<std::mutex> lk(outstanding_utt_m_);
+      outstanding_utt_.push(std::move(task));
+      // We dont want to have too many files ready in
+      // outstanding_utt_ (using device memory) using
+      // max_batch_size_ as an arbitrary (large enough) value
+      iterate = (outstanding_utt_.size() < max_batch_size_);
+    }
+  } while (iterate);
+  cudaStreamSynchronize(cudaStreamPerThread);  // to keep CuVector in scope
+}
+
+void BatchedThreadedNnet3CudaPipeline2::AcquireTasks() {
+  // Trying to get new tasks
+  std::unique_lock<std::mutex> lk(outstanding_utt_m_);
+  while (current_tasks_.size() < max_batch_size_) {
+    // If use_online_features_ is false, we have to fill
+    // outstanding_utt_ by computing features
+    if (!use_online_features_ && outstanding_utt_.size() == 0) {
+      lk.unlock();
+      ComputeOfflineFeatures();
+      lk.lock();
+    }
+    // If still empty, break
+    if (outstanding_utt_.size() == 0) break;
+    UtteranceTask &task = outstanding_utt_.front();
+    bool was_created = cuda_online_pipeline_.TryInitCorrID(task.corr_id);
+    // No channel was available. Breaking for now
+    if (!was_created) break;
+
+    auto &callback = task.callback;
+    auto &key = task.key;
+    std::atomic<int> *group_cnt = task.group_cnt;
+    cuda_online_pipeline_.SetLatticeCallback(
+        task.corr_id, [this, callback, key, group_cnt](CompactLattice &clat) {
+          if (callback) callback(clat);
+          n_tasks_not_done_.fetch_sub(1, std::memory_order_release);
+          if (group_cnt) group_cnt->fetch_sub(1, std::memory_order_release);
+        });
+    current_tasks_.push_back(std::move(task));
+    outstanding_utt_.pop();
+  }
+}
+
+void BatchedThreadedNnet3CudaPipeline2::ComputeTasks() {
+  while (threads_running_) {
+    if (current_tasks_.size() < max_batch_size_) AcquireTasks();
+    if (current_tasks_.empty()) {
+      // If we still have nothing to do, let's sleep a bit
+      usleep(KALDI_CUDA_DECODER_WAIT_FOR_NEW_TASKS_US);
+      continue;
+    }
+    BuildBatchFromCurrentTasks();
+
+    if (use_online_features_)
+      cuda_online_pipeline_.DecodeBatch(batch_corr_ids_, batch_wave_samples_,
+                                        batch_is_first_chunk_,
+                                        batch_is_last_chunk_);
+    else
+      cuda_online_pipeline_.DecodeBatch(
+          batch_corr_ids_, batch_features_, batch_features_frame_stride_,
+          batch_n_input_frames_valid_, batch_ivectors_, batch_is_first_chunk_,
+          batch_is_last_chunk_);
+    // Calling the destructors, freeing memory
+    tasks_last_chunk_.clear();
+  }
+}
+
+}  // end namespace cuda_decoder
+}  // end namespace kaldi.
+
+#endif  // HAVE_CUDA
diff --git a/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline2.h b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline2.h
new file mode 100644
index 00000000000..e7f00910222
--- /dev/null
+++ b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline2.h
@@ -0,0 +1,268 @@
+// cudadecoder/batched-threaded-nnet3-cuda-pipeline2.h
+//
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+
+#ifndef KALDI_CUDA_DECODER_BATCHED_THREADED_NNET3_CUDA_PIPELINE2_H_
+#define KALDI_CUDA_DECODER_BATCHED_THREADED_NNET3_CUDA_PIPELINE2_H_
+
+#define KALDI_CUDA_DECODER_AUDIO_HOST_DEVICE_BUFFER_SIZE 16000 * 50
+
+#include <atomic>
+#include <thread>
+
+#include "cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.h"
+#include "cudadecoder/cuda-decoder.h"
+#include "cudafeat/online-cuda-feature-pipeline.h"
+#include "feat/wave-reader.h"
+
+//
+// Offline wrapper for the online pipeline.
+// Supports non-greedy features (such as non-greedy ivectors)
+//
+
+namespace kaldi {
+namespace cuda_decoder {
+struct BatchedThreadedNnet3CudaPipeline2Config {
+  BatchedThreadedNnet3CudaPipeline2Config() : use_online_features(false) {}
+  BatchedThreadedNnet3CudaOnlinePipelineConfig cuda_online_pipeline_opts;
+  bool use_online_features;
+  void Register(OptionsItf *po) {
+    po->Register("use-online-features", &use_online_features,
+                 "Run feature extraction in an online manner (greedy)");
+
+    cuda_online_pipeline_opts.Register(po);
+  }
+};
+
+class BatchedThreadedNnet3CudaPipeline2 {
+  const BatchedThreadedNnet3CudaPipeline2Config &config_;
+  BatchedThreadedNnet3CudaOnlinePipeline cuda_online_pipeline_;
+  using CorrelationID = BatchedThreadedNnet3CudaOnlinePipeline::CorrelationID;
+
+  struct UtteranceTask {
+    UtteranceTask &operator=(const UtteranceTask &) = delete;
+    UtteranceTask(const UtteranceTask &) = delete;
+    UtteranceTask(UtteranceTask &&) = default;
+    UtteranceTask &operator=(UtteranceTask &&) = default;
+    UtteranceTask() = default;
+
+    std::shared_ptr<WaveData> wave_data;
+    std::unique_ptr<SubVector<BaseFloat>>
+        h_wave;  // (task.wave_data->Data(), 0)
+    std::string key;
+    int32 samp_offset;
+    CorrelationID corr_id;
+    std::atomic<int> *group_cnt;
+    std::function<void(CompactLattice &)> callback;
+    bool auto_close_after_callback;
+
+    std::unique_ptr<CuMatrix<BaseFloat>>
+        d_features;  // Used only when use_online_features == false
+    std::unique_ptr<CuVector<BaseFloat>>
+        d_ivectors;  // Used only when use_online_features == false
+  };
+
+  bool use_online_features_;
+  int n_input_per_chunk_;
+  std::atomic<uint64_t> corr_id_cnt_;
+
+  // Tasks added to the queue, but not yet used
+  std::queue<UtteranceTask> preprocessing_utt_queue_;
+  std::mutex preprocessing_utt_queue_m_;
+  std::queue<UtteranceTask> outstanding_utt_;
+  std::mutex outstanding_utt_m_;
+
+  // Tasks currently being decoded by the cuda pipeline
+  std::vector<UtteranceTask> current_tasks_;
+
+  // Contains the ID of the tasks that are being completed
+  // (we are decoding their last chunk)
+  std::vector<UtteranceTask> tasks_last_chunk_;
+
+  // Batch sent to online pipeline
+  std::vector<CorrelationID> batch_corr_ids_;
+  std::vector<bool> batch_is_first_chunk_;
+  std::vector<bool> batch_is_last_chunk_;
+  // Used when use_online_features_
+  std::vector<SubVector<BaseFloat>> batch_wave_samples_;
+  // Used when !use_online_features_
+  std::vector<BaseFloat *> batch_features_;
+  int batch_features_frame_stride_;
+  std::vector<BaseFloat *> batch_ivectors_;
+  std::vector<int> batch_n_input_frames_valid_;
+
+  int32 max_batch_size_;
+  // Thread responsible of feeding the online pipeline
+  bool threads_running_;
+  std::thread online_pipeline_control_thread_;
+
+  // Number of tasks currently running
+  std::atomic<int> n_tasks_not_done_;
+
+  // Number of tasks currently running (per group)
+  std::unordered_map<std::string, std::unique_ptr<std::atomic<int>>>
+      n_group_tasks_not_done_;
+  std::mutex n_group_tasks_not_done_m_;
+
+  // If auto_close_after_callback is false, we will store the completed
+  // lattices
+  // there
+  // They will be explicitely deleted by CloseDecodeHandle
+  struct Output {
+    Output() : is_clat_set(false) {}
+    std::atomic<bool> is_clat_set;  // using a separate atomic because
+                                    // std::atomic<std::shared_ptr> only exists
+                                    // with C++20
+    std::shared_ptr<CompactLattice> clat;
+  };
+  std::unique_ptr<OnlineCudaFeaturePipeline> cuda_features_;
+
+  struct HostDeviceVector {
+    cudaEvent_t evt;
+    BaseFloat *h_data;
+    BaseFloat *d_data;
+    size_t size;
+
+    HostDeviceVector()
+        : h_data(NULL),
+          d_data(NULL),
+          size(KALDI_CUDA_DECODER_AUDIO_HOST_DEVICE_BUFFER_SIZE) {
+      cudaEventCreate(&evt);
+      Reallocate(size);
+    }
+
+    virtual ~HostDeviceVector() {
+      Deallocate();
+      cudaEventDestroy(evt);
+    }
+
+    void Reallocate(size_t new_size) {
+      KALDI_ASSERT(new_size > 0);
+      Deallocate();
+      cudaMalloc(&d_data, new_size * sizeof(*d_data));
+      cudaMallocHost(&h_data, new_size * sizeof(*d_data));
+      new_size = size;
+    }
+    void Deallocate() {
+      if (d_data) cudaFree(d_data);
+      if (h_data) cudaFreeHost(h_data);
+    }
+  };
+
+  std::unique_ptr<HostDeviceVector> wave_buffer_, next_wave_buffer_;
+
+ public:
+  BatchedThreadedNnet3CudaPipeline2(
+      const BatchedThreadedNnet3CudaPipeline2Config &config,
+      const fst::Fst<fst::StdArc> &decode_fst,
+      const nnet3::AmNnetSimple &am_nnet, const TransitionModel &trans_model)
+      : config_(config),
+        cuda_online_pipeline_(config.cuda_online_pipeline_opts, decode_fst,
+                              am_nnet, trans_model),
+        use_online_features_(config_.use_online_features),
+        corr_id_cnt_(0),
+        max_batch_size_(config_.cuda_online_pipeline_opts.max_batch_size),
+        threads_running_(true),
+        online_pipeline_control_thread_(
+            &BatchedThreadedNnet3CudaPipeline2::ComputeTasks, this),
+        n_tasks_not_done_(0) {
+    KALDI_ASSERT(
+        "CPU feature extraction is only available when "
+        "use-online-features is set" &&
+        (config_.cuda_online_pipeline_opts.use_gpu_feature_extraction ||
+         config_.use_online_features));
+    batch_corr_ids_.reserve(max_batch_size_);
+    batch_wave_samples_.reserve(max_batch_size_);
+    batch_is_last_chunk_.reserve(max_batch_size_);
+    batch_is_first_chunk_.reserve(max_batch_size_);
+    tasks_last_chunk_.reserve(max_batch_size_);
+    if (use_online_features_) {
+      n_input_per_chunk_ = cuda_online_pipeline_.GetNSampsPerChunk();
+    } else {
+      n_input_per_chunk_ = cuda_online_pipeline_.GetNInputFramesPerChunk();
+      cuda_features_.reset(new OnlineCudaFeaturePipeline(
+          config_.cuda_online_pipeline_opts.feature_opts));
+      wave_buffer_.reset(new HostDeviceVector());
+      next_wave_buffer_.reset(new HostDeviceVector());
+    }
+  }
+
+  ~BatchedThreadedNnet3CudaPipeline2() {
+    threads_running_ = false;
+    online_pipeline_control_thread_.join();
+  }
+
+  // Will decode wave_data. Then when done, will call the callback with
+  // the final lattice. It does not create a handle, so you don't need to
+  // call CloseDecodeHandle, and GetLattice cannot be used with
+  // DecodeWithCallback (the lattice is provided through the callback)
+  // Should be preferred to OpenDecodeHandle/GetLattice/CloseDecodeHandle
+  // when possible The callback function is called in a multithreaded
+  // environment. It must be threadsafe To wait for those tasks to
+  // complete you can use WaitForGroup or WaitForAllTasks
+  void DecodeWithCallback(const std::shared_ptr<WaveData> &wave_data,
+                          const std::function<void(CompactLattice &)> &callback,
+                          const std::string &group = std::string()) {
+    DecodeWithCallback(std::string(), wave_data,
+                       std::unique_ptr<SubVector<BaseFloat>>(), callback,
+                       group);
+  }
+
+  void DecodeWithCallback(const VectorBase<BaseFloat> &wave_data,
+                          float sample_rate,
+                          const std::function<void(CompactLattice &)> &callback,
+                          const std::string &group = std::string()) {
+    KALDI_ASSERT(sample_rate == cuda_online_pipeline_.GetModelFrequency());
+    std::unique_ptr<SubVector<BaseFloat>> h_wave(
+        new SubVector<BaseFloat>(wave_data, 0, wave_data.Dim()));
+    DecodeWithCallback(std::string(), std::shared_ptr<WaveData>(),
+                       std::move(h_wave), callback, group);
+  }
+
+  // Create a Task Group. Tasks can be associated with a group.
+  // It is then possible to sync only on those tasks using WaitForGroup
+  // (instead of WaitForAllTasks)
+  void CreateTaskGroup(const std::string &group);
+  void DestroyTaskGroup(const std::string &group);
+  // Wait for all tasks in that group to complete
+  void WaitForGroup(const std::string &group);
+
+  void WaitForAllTasks();
+
+  // Used for debug
+  void SetSymbolTable(fst::SymbolTable *word_syms) {
+    cuda_online_pipeline_.SetSymbolTable(word_syms);
+  }
+
+ private:
+  void DecodeWithCallback(const std::string &key,
+                          const std::shared_ptr<WaveData> &wave_data,
+                          std::unique_ptr<SubVector<BaseFloat>> &&h_wave,
+                          const std::function<void(CompactLattice &)> &callback,
+                          const std::string &group = std::string());
+  void BuildBatchFromCurrentTasks();
+  void AcquireTasks();
+  void ComputeTasks();
+  void ComputeOfflineFeatures();
+};
+
+}  // end namespace cuda_decoder
+}  // end namespace kaldi.
+
+#endif  // KALDI_CUDA_DECODER_BATCHED_THREADED_CUDA_DECODER_H_
+#endif  // HAVE_CUDA
diff --git a/src/cudadecoder/cuda-decodable-itf.h b/src/cudadecoder/cuda-decodable-itf.h
index 98d0619b6eb..939983dc258 100644
--- a/src/cudadecoder/cuda-decodable-itf.h
+++ b/src/cudadecoder/cuda-decodable-itf.h
@@ -15,6 +15,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+//
+// Important: This file is deprecated and will be removed in a future release
+//
+
 #ifndef KALDI_CUDA_DECODER_DECODABLE_ITF_H
 #define KALDI_CUDA_DECODER_DECODABLE_ITF_H
 
@@ -24,7 +28,7 @@ namespace kaldi {
 namespace cuda_decoder {
 
 class CudaDecodableInterface : public DecodableInterface {
-public:
+ public:
   virtual BaseFloat *GetLogLikelihoodsCudaPointer(int32 subsampled_frame) = 0;
 };
 
diff --git a/src/cudadecoder/cuda-decoder.cc b/src/cudadecoder/cuda-decoder.cc
index b0bcb100ab8..39bb93a087e 100644
--- a/src/cudadecoder/cuda-decoder.cc
+++ b/src/cudadecoder/cuda-decoder.cc
@@ -55,11 +55,11 @@ CudaDecoder::CudaDecoder(const CudaFst &fst, const CudaDecoderConfig &config,
   cudaStreamCreate(&copy_st_);
   // For all the allocating/initializing process
   // We create a special channel
-  // containing the exact state a channel should have when starting a new decode
-  // It contains fst.Start(), the non-emitting tokens created by fst.Start(),
-  // and all the data used by the decoder.
-  // When calling InitDecoding() on a new channel, we simply clone this special
-  // channel into that new channel
+  // containing the exact state a channel should have when starting a new
+  // decode It contains fst.Start(), the non-emitting tokens created by
+  // fst.Start(), and all the data used by the decoder. When calling
+  // InitDecoding() on a new channel, we simply clone this special channel
+  // into that new channel
   ++nchannels_;                       // adding the special initial channel
   init_channel_id_ = nchannels_ - 1;  // Using last one as init_channel_params
   AllocateHostData();
@@ -273,14 +273,13 @@ void CudaDecoder::InitDeviceParams() {
   h_device_params_->init_cost = StdWeight::One().Value();
   h_device_params_->hashmap_capacity = hashmap_capacity_;
   h_device_params_->max_active = max_active_;
-  // For the first static_beam_q_length elements of the queue, we will keep the
-  // beam static
+  // For the first static_beam_q_length elements of the queue, we will
+  // keep the beam static
   adaptive_beam_static_segment_ =
       aux_q_capacity_ / KALDI_CUDA_DECODER_ADAPTIVE_BEAM_STATIC_SEGMENT;
-  // For the last adaptive_beam_q_length elements of the queue, we will decrease
-  // the beam, segment by segment
-  // For more information, please refer to the definition of GetAdaptiveBeam in
-  // cuda-decoder-kernels.cu
+  // For the last adaptive_beam_q_length elements of the queue, we will
+  // decrease the beam, segment by segment For more information, please
+  // refer to the definition of GetAdaptiveBeam in cuda-decoder-kernels.cu
   int32 adaptive_beam_q_length =
       (aux_q_capacity_ - adaptive_beam_static_segment_);
   int32 adaptive_beam_bin_width =
@@ -358,6 +357,7 @@ void CudaDecoder::ComputeInitialChannel() {
 
   CopyLaneCountersToHostSync();
   PostProcessingMainQueue();
+  ConcatenateData();
   CopyLaneCountersToHostSync();
 
   const int32 main_q_end =
@@ -376,7 +376,8 @@ void CudaDecoder::ComputeInitialChannel() {
 }
 
 void CudaDecoder::InitDecoding(const std::vector<ChannelId> &channels) {
-  // Cloning the init_channel_id_ channel into all channels in the channels vec
+  // Cloning the init_channel_id_ channel into all channels in the
+  // channels vec
   const int nlanes_used = channels.size();
   // Getting *h_kernel_params ready to use
   LoadChannelsStateToLanes(channels);
@@ -418,12 +419,13 @@ void CudaDecoder::InitDecoding(const std::vector<ChannelId> &channels) {
     h_all_argmin_cost_[ichannel] = {-1, 0.0f};
     frame_offsets_[ichannel].clear();
     frame_offsets_[ichannel].push_back(n_initial_tokens);
-    if (thread_pool_)
-      thread_pool_->enqueue(THREAD_POOL_HIGH_PRIORITY,
-                            &CudaDecoder::InitDecodingH2HCopies, this,
-                            ichannel);
-    else
-      InitDecodingH2HCopies(ichannel);
+    // TODO put it back
+    // if (thread_pool_) {
+    //  thread_pool_->post([ichannel, this] {
+    //  InitDecodingH2HCopies(ichannel);
+    //  });
+    //} else
+    InitDecodingH2HCopies(ichannel);
   }
 }
 
@@ -544,10 +546,9 @@ void CudaDecoder::MoveConcatenatedCopyToVector(
 
 void CudaDecoder::ApplyMaxActiveAndReduceBeam(enum QUEUE_ID queue_id) {
   // Checking if we should activate max active for the current frame
-  // once it is active, it is active for the whole frame (for all non emitting
-  // iterations)
-  // If at least one lane queue is bigger than max_active,
-  // we'll apply a topk on that queue (k=max_active_)
+  // once it is active, it is active for the whole frame (for all non
+  // emitting iterations) If at least one lane queue is bigger than
+  // max_active, we'll apply a topk on that queue (k=max_active_)
   bool use_aux_q = (queue_id == AUX_Q);
   ComputeCostsHistogramKernel(KaldiCudaDecoderNumBlocks(nlanes_used_),
                               KALDI_CUDA_DECODER_1D_BLOCK, compute_st_,
@@ -558,32 +559,6 @@ void CudaDecoder::ApplyMaxActiveAndReduceBeam(enum QUEUE_ID queue_id) {
       compute_st_, *h_device_params_, *h_kernel_params_, use_aux_q);
 }
 
-int32 CudaDecoder::NumFramesToDecode(
-    const std::vector<ChannelId> &channels,
-    std::vector<CudaDecodableInterface *> &decodables, int32 max_num_frames) {
-  int32 nframes_to_decode = INT_MAX;
-  // std::vector<int> debug_ntokens;
-  // std::vector<int> debug_narcs;
-  for (int32 ilane = 0; ilane < nlanes_used_; ++ilane) {
-    const ChannelId ichannel = channels[ilane];
-    const int32 num_frames_decoded = num_frames_decoded_[ichannel];
-    KALDI_ASSERT(num_frames_decoded >= 0 &&
-                 "You must call InitDecoding() before AdvanceDecoding()");
-    int32 num_frames_ready = decodables[ilane]->NumFramesReady();
-    // num_frames_ready must be >= num_frames_decoded, or else
-    // the number of frames ready must have decreased (which doesn't
-    // make sense) or the decodable object changed between calls
-    // (which isn't allowed).
-    KALDI_ASSERT(num_frames_ready >= num_frames_decoded);
-    int32 channel_nframes_to_decode = num_frames_ready - num_frames_decoded;
-    nframes_to_decode = std::min(nframes_to_decode, channel_nframes_to_decode);
-  }
-  if (max_num_frames >= 0)
-    nframes_to_decode = std::min(nframes_to_decode, max_num_frames);
-
-  return nframes_to_decode;
-}
-
 void CudaDecoder::ExpandArcsEmitting() {
   ExpandArcsKernel<true>(KaldiCudaDecoderNumBlocks(nlanes_used_),
                          KALDI_CUDA_DECODER_1D_BLOCK, compute_st_,
@@ -658,8 +633,8 @@ void CudaDecoder::CopyMainQueueDataToHost() {
   cudaEventRecord(concatenated_data_ready_evt_, compute_st_);
   cudaStreamWaitEvent(copy_st_, concatenated_data_ready_evt_,
                       0);  // the copies on copy_st will wait on compute_st_
-  cudaEventSynchronize(
-      lane_offsets_ready_evt_);  // we need the total size of each segments
+  cudaEventSynchronize(lane_offsets_ready_evt_);  // we need the total
+                                                  // size of each segments
   LaunchD2HCopies();
 
   // Making sure the previous H2H copies are done
@@ -753,108 +728,134 @@ void CudaDecoder::ConcatenateData() {
 void CudaDecoder::AdvanceDecoding(
     const std::vector<ChannelId> &channels,
     std::vector<CudaDecodableInterface *> &decodables, int32 max_num_frames) {
-  if (channels.size() == 0) return;  // nothing to do
+  int nframes_to_decode = INT_MAX;
+  for (int32 ilane = 0; ilane < channels.size(); ++ilane) {
+    const ChannelId ichannel = channels[ilane];
+    const int32 num_frames_decoded = num_frames_decoded_[ichannel];
+    KALDI_ASSERT(num_frames_decoded >= 0 &&
+                 "You must call InitDecoding() before AdvanceDecoding()");
+    int32 num_frames_ready = decodables[ilane]->NumFramesReady();
+    // num_frames_ready must be >= num_frames_decoded, or else
+    // the number of frames ready must have decreased (which doesn't
+    // make sense) or the decodable object changed between calls
+    // (which isn't allowed).
+    KALDI_ASSERT(num_frames_ready >= num_frames_decoded);
+    int32 channel_nframes_to_decode = num_frames_ready - num_frames_decoded;
+    nframes_to_decode = std::min(nframes_to_decode, channel_nframes_to_decode);
+  }
+  if (max_num_frames >= 0)
+    nframes_to_decode = std::min(nframes_to_decode, max_num_frames);
+
+  std::vector<std::pair<ChannelId, BaseFloat *>> lanes_assignements;
+  for (int f = 0; f < nframes_to_decode; ++f) {
+    lanes_assignements.clear();
+    for (int32 ilane = 0; ilane < channels.size(); ++ilane) {
+      const ChannelId ichannel = channels[ilane];
+      int32 iframe = num_frames_decoded_[ichannel];
+      BaseFloat *ptr = decodables[ilane]->GetLogLikelihoodsCudaPointer(iframe);
+      lanes_assignements.push_back({ichannel, ptr});
+    }
+    AdvanceDecoding(lanes_assignements);
+  }
+}
+
+void CudaDecoder::AdvanceDecoding(
+    const std::vector<std::pair<ChannelId, BaseFloat *>> &lanes_assignements) {
+  if (lanes_assignements.size() == 0) return;  // nothing to do
   // Context switch : Loading the channels state in lanes
+
+  // Looping over the frames that we will compute
+  // Loglikelihoods from the acoustic model
+  // Setting the loglikelihoods pointers for that frame
+  std::vector<ChannelId> channels;  // TODO
+  channels.reserve(lanes_assignements.size());
+  for (LaneId ilane = 0; ilane < lanes_assignements.size(); ++ilane) {
+    ChannelId ichannel = lanes_assignements[ilane].first;
+    channels.push_back(ichannel);
+    channel_to_compute_[ilane] = ichannel;
+    h_lanes_counters_.lane(ilane)->loglikelihoods =
+        lanes_assignements[ilane].second;
+  }
   LoadChannelsStateToLanes(channels);
   KALDI_ASSERT(nlanes_used_ > 0);
+  cudaMemcpyAsync(d_lanes_counters_.MutableData(), h_lanes_counters_.lane(0),
+                  nlanes_used_ * sizeof(*h_lanes_counters_.lane(0)),
+                  cudaMemcpyHostToDevice, compute_st_);
+  // compute_st_ will wait for nnet3 to complete
+  cudaEventRecord(nnet3_done_evt_, cudaStreamPerThread);
+  cudaStreamWaitEvent(compute_st_, nnet3_done_evt_, 0);
 
-  // We'll decode nframes_to_decode, such as all channels have at least that
-  // number
-  // of frames available
-  int32 nframes_to_decode =
-      NumFramesToDecode(channels, decodables, max_num_frames);
-
-  // Looping over the frames that we will compute
-  for (int32 iframe = 0; iframe < nframes_to_decode; ++iframe) {
-    // Loglikelihoods from the acoustic model
-    // Setting the loglikelihoods pointers for that frame
-    for (LaneId ilane = 0; ilane < nlanes_used_; ++ilane) {
-      ChannelId ichannel = channel_to_compute_[ilane];
-      int32 frame = num_frames_decoded_[ichannel];
-      h_lanes_counters_.lane(ilane)->loglikelihoods =
-          decodables[ilane]->GetLogLikelihoodsCudaPointer(frame);
-    }
-    cudaMemcpyAsync(d_lanes_counters_.MutableData(), h_lanes_counters_.lane(0),
-                    nlanes_used_ * sizeof(*h_lanes_counters_.lane(0)),
-                    cudaMemcpyHostToDevice, compute_st_);
-    // compute_st_ will wait for nnet3 to complete
-    cudaEventRecord(nnet3_done_evt_, cudaStreamPerThread);
-    cudaStreamWaitEvent(compute_st_, nnet3_done_evt_, 0);
-
-    // Estimating cutoff using argmin from last frame
-    ResetForFrameAndEstimateCutoffKernel(
-        KaldiCudaDecoderNumBlocks(1, nlanes_used_), KALDI_CUDA_DECODER_1D_BLOCK,
-        compute_st_, *h_device_params_, *h_kernel_params_);
-    // Reset max active status. If necessary, ApplyMaxActiveAndReduceBeam will
-    // switch it back on
-    compute_max_active_ = false;
-
-    // Processing emitting arcs. We've done the preprocess stage at the end of
-    // the previous frame
-    ExpandArcsEmitting();
-    // We'll loop until we have a small enough number of non-emitting arcs
-    // in the token queue. We'll then break the loop
-    for (int i = 0; i < KALDI_CUDA_DECODER_N_NON_EMITTING_MAIN_ITERATIONS;
-         ++i) {
-      // If one of the aux_q contains more than max_active_ tokens,
-      // we'll reduce the beam to only keep max_active_ tokens
-      ApplyMaxActiveAndReduceBeam(AUX_Q);
-      // Prune the aux_q. Apply the latest beam (using the one from
-      // ApplyMaxActiveAndReduceBeam if triggered)
-      // move the survival tokens to the main queue
-      // and do the preprocessing necessary for the next ExpandArcs
-      PruneAndPreprocess();
-
-      // "heavy duty" kernel for non-emitting. The long tail of small
-      // non-emitting iterations will be done in
-      // FinalizeProcessNonEmittingKernel
-      ExpandArcsNonEmitting();
-    }
+  // Estimating cutoff using argmin from last frame
+  ResetForFrameAndEstimateCutoffKernel(
+      KaldiCudaDecoderNumBlocks(1, nlanes_used_), KALDI_CUDA_DECODER_1D_BLOCK,
+      compute_st_, *h_device_params_, *h_kernel_params_);
+  // Reset max active status. If necessary, ApplyMaxActiveAndReduceBeam
+  // will switch it back on
+  compute_max_active_ = false;
+
+  // Processing emitting arcs. We've done the preprocess stage at the end
+  // of the previous frame
+  ExpandArcsEmitting();
+  // We'll loop until we have a small enough number of non-emitting arcs
+  // in the token queue. We'll then break the loop
+  for (int i = 0; i < KALDI_CUDA_DECODER_N_NON_EMITTING_MAIN_ITERATIONS; ++i) {
+    // If one of the aux_q contains more than max_active_ tokens,
+    // we'll reduce the beam to only keep max_active_ tokens
     ApplyMaxActiveAndReduceBeam(AUX_Q);
+    // Prune the aux_q. Apply the latest beam (using the one from
+    // ApplyMaxActiveAndReduceBeam if triggered)
+    // move the survival tokens to the main queue
+    // and do the preprocessing necessary for the next ExpandArcs
     PruneAndPreprocess();
-    // Finalizing process non emitting. Takes care of the long tail,
-    // the final iterations with a small numbers of arcs. Do the work inside a
-    // single CTA (per lane),
-    FinalizeProcessNonEmittingKernel(KaldiCudaDecoderNumBlocks(1, nlanes_used_),
-                                     KALDI_CUDA_DECODER_LARGEST_1D_BLOCK,
-                                     compute_st_, *h_device_params_,
-                                     *h_kernel_params_);
-
-    // We now have our final token main queues for that frame
-
-    // Post processing the tokens for that frame
-    // - do the preprocess necessary for the next emitting expand (will happen
-    // with next frame)
-    // - if a state S has more than one token associated to it, generate the
-    // list of those tokens
-    // It allows to backtrack efficiently in GetRawLattice
-    // - compute the extra costs
-    PostProcessingMainQueue();
-
-    // Waiting on previous d2h before writing on same device memory
-    cudaStreamWaitEvent(compute_st_, d2h_copy_extra_prev_tokens_evt_, 0);
-    // Concatenating the data that will be moved to host into large arrays
-    ConcatenateData();
-    // Copying the final lane counters for that frame
-    CopyLaneCountersToHostSync();
-    CheckOverflow();
-
-    // Moving the data necessary for GetRawLattice/GetBestPath back to host for
-    // storage
-    CopyMainQueueDataToHost();
-
-    for (LaneId ilane = 0; ilane < nlanes_used_; ++ilane) {
-      const ChannelId ichannel = channel_to_compute_[ilane];
-      // We're done processing that frame
-      ++num_frames_decoded_[ichannel];
-      const int32 main_q_end =
-          h_lanes_counters_.lane(ilane)->main_q_narcs_and_end.y;
-      // Saving frame offsets for GetRawLattice
-      frame_offsets_[ichannel].push_back(frame_offsets_[ichannel].back() +
-                                         main_q_end);
-    }
+
+    // "heavy duty" kernel for non-emitting. The long tail of small
+    // non-emitting iterations will be done in
+    // FinalizeProcessNonEmittingKernel
+    ExpandArcsNonEmitting();
   }
+  ApplyMaxActiveAndReduceBeam(AUX_Q);
+  PruneAndPreprocess();
+  // Finalizing process non emitting. Takes care of the long tail,
+  // the final iterations with a small numbers of arcs. Do the work inside
+  // a single CTA (per lane),
+  FinalizeProcessNonEmittingKernel(KaldiCudaDecoderNumBlocks(1, nlanes_used_),
+                                   KALDI_CUDA_DECODER_LARGEST_1D_BLOCK,
+                                   compute_st_, *h_device_params_,
+                                   *h_kernel_params_);
+
+  // We now have our final token main queues for that frame
+
+  // Post processing the tokens for that frame
+  // - do the preprocess necessary for the next emitting expand (will
+  // happen with next frame)
+  // - if a state S has more than one token associated to it, generate the
+  // list of those tokens
+  // It allows to backtrack efficiently in GetRawLattice
+  // - compute the extra costs
+  PostProcessingMainQueue();
+
+  // Waiting on previous d2h before writing on same device memory
+  cudaStreamWaitEvent(compute_st_, d2h_copy_extra_prev_tokens_evt_, 0);
+  // Concatenating the data that will be moved to host into large arrays
+  ConcatenateData();
+  // Copying the final lane counters for that frame
+  CopyLaneCountersToHostSync();
+  CheckOverflow();
+
+  // Moving the data necessary for GetRawLattice/GetBestPath back to host
+  // for storage
+  CopyMainQueueDataToHost();
 
+  for (LaneId ilane = 0; ilane < nlanes_used_; ++ilane) {
+    const ChannelId ichannel = channel_to_compute_[ilane];
+    // We're done processing that frame
+    ++num_frames_decoded_[ichannel];
+    const int32 main_q_end =
+        h_lanes_counters_.lane(ilane)->main_q_narcs_and_end.y;
+    // Saving frame offsets for GetRawLattice
+    frame_offsets_[ichannel].push_back(frame_offsets_[ichannel].back() +
+                                       main_q_end);
+  }
   SaveChannelsStateFromLanes();
 }
 
@@ -864,26 +865,32 @@ void CudaDecoder::CheckOverflow() {
     bool q_overflow = lane_counters->q_overflow;
     if (q_overflow != OVERFLOW_NONE) {
       // An overflow was prevented in a kernel
-      // The algorithm can still go on but quality of the result can be reduced
-      // (less tokens were generated)
+      // The algorithm can still go on but quality of the
+      // result can be reduced (less tokens were generated)
 
       if ((q_overflow & OVERFLOW_MAIN_Q) == OVERFLOW_MAIN_Q) {
         // overflowed main_q
-        KALDI_WARN
-            << "Preventing overflow of main_q. Continuing "
-            << "execution but the quality of the output may be decreased. "
-            << "To prevent this from happening, please increase the parameter "
-               "--main-q-capacity"
-            << " and/or decrease --max-active";
+        KALDI_WARN << "Preventing overflow of main_q. "
+                      "Continuing "
+                   << "execution but the quality of "
+                      "the output may be decreased. "
+                   << "To prevent this from happening, "
+                      "please increase the "
+                      "parameter "
+                      "--main-q-capacity"
+                   << " and/or decrease --max-active";
       }
       if ((q_overflow & OVERFLOW_AUX_Q) == OVERFLOW_AUX_Q) {
         // overflowed aux_q
-        KALDI_WARN
-            << "Preventing overflow of aux_q. Continuing "
-            << "execution but the quality of the output may be decreased. "
-            << "To prevent this from happening, please increase the parameter "
-               "--aux-q-capacity"
-            << " and/or decrease --beam";
+        KALDI_WARN << "Preventing overflow of aux_q. "
+                      "Continuing "
+                   << "execution but the quality of "
+                      "the output may be decreased. "
+                   << "To prevent this from happening, "
+                      "please increase the "
+                      "parameter "
+                      "--aux-q-capacity"
+                   << " and/or decrease --beam";
       }
 
       KALDI_ASSERT(lane_counters->main_q_narcs_and_end.y < main_q_capacity_);
@@ -921,27 +928,26 @@ void CudaDecoder::GetBestCost(const std::vector<ChannelId> &channels,
   };
   int32 max_main_q_end = GetMaxForAllLanes(func_main_q_end);
 
-  // Step1 : Finding the best cost in the last token queue, with and without
-  // final costs.
-  // Also saving the indexes of those min.
+  // Step1 : Finding the best cost in the last token queue, with and
+  // without final costs. Also saving the indexes of those min.
   GetBestCostStep1Kernel(
       KaldiCudaDecoderNumBlocks(max_main_q_end, nlanes_used_),
       KALDI_CUDA_DECODER_1D_BLOCK, compute_st_, *h_device_params_,
       *h_kernel_params_, use_final_costs, StdWeight::Zero().Value());
 
-  // Step2: Now that we now what the minimum cost is, we list all tokens within
+  // Step2: Now that we now what the minimum cost is, we list all tokens
+  // within
   // [min_cost; min_cost+lattice_beam]
-  // min_cost takes into account the final costs if use_final_costs is true,
-  // AND if a final state is is present in the last token queue
+  // min_cost takes into account the final costs if use_final_costs is
+  // true, AND if a final state is is present in the last token queue
   GetBestCostStep2Kernel(
       KaldiCudaDecoderNumBlocks(max_main_q_end, nlanes_used_),
       KALDI_CUDA_DECODER_1D_BLOCK, compute_st_, *h_device_params_,
       *h_kernel_params_, use_final_costs, StdWeight::Zero().Value());
 
-  // Step3 : Moves some data to host. We are moving the data that couldn't be
-  // moved
-  // directly in step 2, e.g. results of atomics (we don't know which one is
-  // last)
+  // Step3 : Moves some data to host. We are moving the data that couldn't
+  // be moved directly in step 2, e.g. results of atomics (we don't know
+  // which one is last)
   GetBestCostStep3Kernel(
       KaldiCudaDecoderNumBlocks(max_main_q_end, nlanes_used_),
       KALDI_CUDA_DECODER_1D_BLOCK, compute_st_, *h_device_params_,
@@ -965,8 +971,8 @@ void CudaDecoder::GetBestCost(const std::vector<ChannelId> &channels,
     int32 arg = minarg.y;
     // Saving both in output
     argmins->push_back({arg, min_cost});
-    // Whether or not the last token queue contains at least one token
-    // associated with a final FST state
+    // Whether or not the last token queue contains at least one
+    // token associated with a final FST state
     has_reached_final->push_back(
         h_lanes_counters_.lane(ilane)->has_reached_final);
     // Number of tokens within [min_cost; min_cost+lattice_beam]
@@ -998,8 +1004,8 @@ void CudaDecoder::GetBestPath(const std::vector<ChannelId> &channels,
     const ChannelId ichannel = channels[ilane];
     const int32 token_with_best_cost = argmins_[ilane].first;
     std::unique_lock<std::mutex> channel_lk(channel_lock_[ichannel]);
-    // If that token in that frame f is available, then all tokens in that frame
-    // f are available
+    // If that token in that frame f is available, then all tokens
+    // in that frame f are available
     WaitForH2HCopies();
     const bool isfinal = has_reached_final_[ilane];
     TokenId token_idx = token_with_best_cost;
@@ -1018,7 +1024,8 @@ void CudaDecoder::GetBestPath(const std::vector<ChannelId> &channels,
       int32 arc_idx;
       TokenId prev_token_idx;
       if (token.IsUniqueTokenForStateAndFrame()) {
-        // If we have only one, it is an arc with extra_cost == 0
+        // If we have only one, it is an arc with
+        // extra_cost == 0
         arc_idx = token.arc_idx;
         prev_token_idx = token.prev_token;
       } else {
@@ -1030,8 +1037,10 @@ void CudaDecoder::GetBestPath(const std::vector<ChannelId> &channels,
           CostType arc_extra_cost =
               h_all_tokens_extra_prev_tokens_extra_and_acoustic_cost_[ichannel]
                                                                      [offset +
-                                                                      i].x;
-          // Picking one arc on the best path (extra_cost == 0)
+                                                                      i]
+                                                                         .x;
+          // Picking one arc on the best path
+          // (extra_cost == 0)
           if (arc_extra_cost == 0.0f) {
             InfoToken list_token =
                 h_all_tokens_extra_prev_tokens_[ichannel][offset + i];
@@ -1145,10 +1154,11 @@ void CudaDecoder::AddFinalTokensToLattice(
   // Total number of tokens for that utterance. Used in
   // GetLatticeStateInternalId
   const int32 total_ntokens = h_all_tokens_info_[ichannel].size();
-  // Reading the overall best_cost for that utterance's last frame. Was set by
-  // GetBestCost
+  // Reading the overall best_cost for that utterance's last frame. Was
+  // set by GetBestCost
   const CostType best_cost = h_all_argmin_cost_[ichannel].second;
-  // Iterating through tokens associated with a final state in the last frame
+  // Iterating through tokens associated with a final state in the last
+  // frame
   for (auto &p : h_all_final_tokens_list_[ichannel]) {
     // This final token has a final cost of final_token_cost
     CostType final_token_cost = p.second;
@@ -1169,47 +1179,48 @@ void CudaDecoder::AddFinalTokensToLattice(
     decltype(curr_f_raw_lattice_state->end()) map_it;
     bool inserted;
 
-    // We need to create the fst_lattice_state linked to our internal id in the
-    // lattice if it doesn't already exists
+    // We need to create the fst_lattice_state linked to our
+    // internal id in the lattice if it doesn't already exists
     // Inserts only if the key doesn't exist in the map
     std::tie(map_it, inserted) = curr_f_raw_lattice_state->insert(
         {state_internal_id, {FLT_MAX, -1, false}});
 
-    // If we've inserted the element, it means that that state didn't exist in
-    // the map
-    // Because this is a final state, we need to do a bit of extra work to add
-    // the final_cost to it
+    // If we've inserted the element, it means that that state
+    // didn't exist in the map Because this is a final state, we
+    // need to do a bit of extra work to add the final_cost to it
     if (inserted) {
-      // We want to figure out which FST state this token is associated to
-      // We don't have that info anymore, it wasn't transfered from the GPU
-      // We still need it for final tokens, because we need to know which
-      // final cost to add in the lattice.
-      // To find that original FST state, we need the id of an arc going to
-      // that state,
-      // then we'll look in the graph and figure out next_state[arc_idx]
-      // we just need a valid arc_idx
+      // We want to figure out which FST state this token is
+      // associated to We don't have that info anymore, it
+      // wasn't transfered from the GPU We still need it for
+      // final tokens, because we need to know which final
+      // cost to add in the lattice. To find that original FST
+      // state, we need the id of an arc going to that state,
+      // then we'll look in the graph and figure out
+      // next_state[arc_idx] we just need a valid arc_idx
       int32 arc_idx;
       if (final_token.IsUniqueTokenForStateAndFrame()) {
         // If unique, we can directly use this arc_idx
         arc_idx = final_token.arc_idx;
       } else {
-        // If we have multiple tokens associated to that fst state, just pick
-        // the first one
-        // from the list
+        // If we have multiple tokens associated to that
+        // fst state, just pick the first one from the
+        // list
         int32 offset, size;
         std::tie(offset, size) = final_token.GetSameFSTStateTokensList();
         InfoToken prev_token =
             h_all_tokens_extra_prev_tokens_[ichannel][offset];
         arc_idx = prev_token.arc_idx;
       }
-      // Creating the state associated with our internal id in the lattice
+      // Creating the state associated with our internal id in
+      // the lattice
       OutputLatticeState fst_lattice_final_state = fst_out->AddState();
       map_it->second.fst_lattice_state = fst_lattice_final_state;
       q_curr_frame_todo->push_back({final_token_idx, final_token});
 
       if (h_all_has_reached_final_[ichannel]) {
-        // If we have reached final states, adding the final cost
-        // We now have a valid arc_idx. We can read the FST state
+        // If we have reached final states, adding the
+        // final cost We now have a valid arc_idx. We
+        // can read the FST state
         StateId fst_next_state = fst_.h_arc_nextstate_[arc_idx];
 
         fst_out->SetFinal(fst_lattice_final_state,
@@ -1242,14 +1253,14 @@ void CudaDecoder::AddArcToLattice(
   // We will now add this arc to the output lattice
   // We know the destination state of the arc (to_fst_lattice_state)
   // We need to figure out its source
-  // And propagate the extra cost from the destination to the source of that arc
-  // (we go backward)
+  // And propagate the extra cost from the destination to the source of
+  // that arc (we go backward)
   OutputLatticeState from_fst_lattice_state;
   // Having the predecessor in the previous frame
   // <=> that token is associated to an emiting arc
   bool emitting = (list_prev_token_idx < curr_frame_offset);
-  // Checking if the source of that arc is the start state (original state at
-  // the beginning of the decode)
+  // Checking if the source of that arc is the start state (original state
+  // at the beginning of the decode)
   if (list_prev_token_idx != 0) {
     // Selecting the right map
     // - emitting arc -> previous frame map
@@ -1280,13 +1291,12 @@ void CudaDecoder::AddArcToLattice(
       // We found a new min
       CostType diff = (prev_token_extra_cost - this_arc_prev_token_extra_cost);
       // If the change is large enough,
-      // and if the state that we're writing to was already closed,
-      // then we need to replay that frame.
-      // if the source state is already closed it means we've
-      // read its extra_cost value. Now we're writing again to it.
-      // We have to do the first read again, to get the updated
-      // value
-      // that's why we're replaying that frame
+      // and if the state that we're writing to was already
+      // closed, then we need to replay that frame. if the
+      // source state is already closed it means we've read
+      // its extra_cost value. Now we're writing again to it.
+      // We have to do the first read again, to get the
+      // updated value that's why we're replaying that frame
       // (between frames everything is in topological order)
       if (diff > extra_cost_min_delta_ && from_map_it->second.is_state_closed) {
         *must_replay_frame = true;
@@ -1295,7 +1305,8 @@ void CudaDecoder::AddArcToLattice(
       from_map_it->second.token_extra_cost = prev_token_extra_cost;
     }
 
-    // Reading the OutputLatticeState of the source state in the output lattice
+    // Reading the OutputLatticeState of the source state in the
+    // output lattice
     from_fst_lattice_state = from_map_it->second.fst_lattice_state;
   } else {
     from_fst_lattice_state =
@@ -1340,7 +1351,7 @@ void CudaDecoder::GetTokenRawLatticeData(
 }
 
 void CudaDecoder::GetSameFSTStateTokenList(
-    ChannelId ichannel, InfoToken token, InfoToken **tok_beg,
+    ChannelId ichannel, InfoToken &token, InfoToken **tok_beg,
     float2 **extra_extra_and_acoustic_cost_beg, int32 *nsame) {
   // We now need to consider all tokens related to that (iframe,
   // fst_state)
@@ -1437,7 +1448,8 @@ void CudaDecoder::SwapPrevAndCurrLatticeMap(
   if (iframe > 0) {
     KALDI_ASSERT(!q_curr_frame_todo->empty());
     if (!dbg_found_best_path) {
-      KALDI_WARN << "Warning didn't find exact best path in GetRawLattice";
+      KALDI_WARN << "Warning didn't find exact best path in "
+                    "GetRawLattice";
     }
   }
 }
@@ -1473,19 +1485,18 @@ void CudaDecoder::ConcurrentGetRawLatticeSingleChannel(const ChannelId ichannel,
   // Allocating the datastructures that we need
 
   // [prev|curr]_f_raw_lattice_state
-  // Used to get information about a lattice state (i.e. a (iframe, fst_state)
-  // pair)
-  // using its LatticeStateInternalId (its ID inside of the decoder)
-  // It gives us the OutputLatticeState (its ID in the output lattice)
-  // alongside with the extra_cost of that state in the lattice
+  // Used to get information about a lattice state (i.e. a (iframe,
+  // fst_state) pair) using its LatticeStateInternalId (its ID inside of
+  // the decoder) It gives us the OutputLatticeState (its ID in the output
+  // lattice) alongside with the extra_cost of that state in the lattice
   // Those maps are used to build the external lattice using what we know
   // internally
-  // Using one map per frame. We always know to which frame a token belongs.
-  // Using one big map slows everything down
+  // Using one map per frame. We always know to which frame a token
+  // belongs. Using one big map slows everything down
   std::unordered_map<LatticeStateInternalId, RawLatticeState>
       prev_f_raw_lattice_state, curr_f_raw_lattice_state;
-  // We want the unicity of each arc_idx for one frame. Important because we
-  // can replay a frame (and possibly add multiple time the same arc)
+  // We want the unicity of each arc_idx for one frame. Important because
+  // we can replay a frame (and possibly add multiple time the same arc)
   std::unordered_set<int32> f_arc_idx_added;
   // When backtracking, we read tokens in the current frame (in
   // q_curr_frame_todo_),
@@ -1499,10 +1510,11 @@ void CudaDecoder::ConcurrentGetRawLatticeSingleChannel(const ChannelId ichannel,
   TokenId best_cost_idx;
   {
     std::lock_guard<std::mutex> channel_lk(channel_lock_[ichannel]);
-    h_all_tokens_info_.shrink_to_fit();
-    h_all_tokens_acoustic_cost_.shrink_to_fit();
-    h_all_tokens_extra_prev_tokens_.shrink_to_fit();
-    h_all_tokens_extra_prev_tokens_extra_and_acoustic_cost_.shrink_to_fit();
+    h_all_tokens_info_[ichannel].shrink_to_fit();
+    h_all_tokens_acoustic_cost_[ichannel].shrink_to_fit();
+    h_all_tokens_extra_prev_tokens_[ichannel].shrink_to_fit();
+    h_all_tokens_extra_prev_tokens_extra_and_acoustic_cost_[ichannel]
+        .shrink_to_fit();
     best_cost_idx = h_all_argmin_cost_[ichannel].first;
   }
   KALDI_ASSERT(
@@ -1512,10 +1524,11 @@ void CudaDecoder::ConcurrentGetRawLatticeSingleChannel(const ChannelId ichannel,
   const int32 nframes = NumFramesDecoded(ichannel);
   // Making sure that this token is available for this channel.
   // We're going to read storage data from this channel. Locking it
-  // If that token in that frame f is available, then all tokens in that frame
-  // f are available
+  // If that token in that frame f is available, then all tokens in that
+  // frame f are available
   WaitForH2HCopies();
   std::unique_lock<std::mutex> channel_lk(channel_lock_[ichannel]);
+
   // Total number of tokens generated by the utterance on channel ichannel
   const int32 total_ntokens = h_all_tokens_info_[ichannel].size();
 
@@ -1535,38 +1548,40 @@ void CudaDecoder::ConcurrentGetRawLatticeSingleChannel(const ChannelId ichannel,
   // For each frame we're going to process tokens that need to be inserted
   // into the output lattice
   // and add their predecessors to the todo list
-  // iframe == -1 contains the start state and the first non emitting tokens.
-  // It is not linked to a real frame
+  // iframe == -1 contains the start state and the first non emitting
+  // tokens. It is not linked to a real frame
   for (int32 iframe = nframes - 1; iframe >= -1; --iframe) {
-    // Tokens for the current frame were inserted after this offset in the
-    // token list
+    // Tokens for the current frame were inserted after this offset
+    // in the token list
     const int32 curr_frame_offset =
         (iframe >= 0) ? frame_offsets_[ichannel][iframe] : 0;
 
     // bool must_replay_frame
-    // In some cases we can update an extra_cost that has already been used
-    // For instance we process arcs in that order :
-    // 1) a -> b, which updates extra_cost[b] using extra_cost[a]
-    // 2) c -> a, which updates extra-cost[a] (using extra_cost[c])
-    // because the arcs were not considered in topological order, we need to
+    // In some cases we can update an extra_cost that has already
+    // been used For instance we process arcs in that order : 1) a
+    // -> b, which updates extra_cost[b] using extra_cost[a] 2) c ->
+    // a, which updates extra-cost[a] (using extra_cost[c]) because
+    // the arcs were not considered in topological order, we need to
     // run
     // again the step 1,
-    // to get the correct extra_cost[b] (using the latest extra_cost[a])
-    // However, we only re-run the step 1 if the value extra_cost[a] has
-    // changed more than extra_cost_min_delta_
+    // to get the correct extra_cost[b] (using the latest
+    // extra_cost[a]) However, we only re-run the step 1 if the
+    // value extra_cost[a] has changed more than
+    // extra_cost_min_delta_
     bool must_replay_frame;
 
-    // dbg_found_best_path is used in an useful assert, making sure the best
-    // path is still there for each frame
-    // if something went wrong in the kernels, it's not likely we respect that
+    // dbg_found_best_path is used in an useful assert, making sure
+    // the best path is still there for each frame if something went
+    // wrong in the kernels, it's not likely we respect that
     // property out of luck
     bool dbg_found_best_path = false;
     do {
       must_replay_frame = false;
       // Reading something to do. We are pushing stuff back in
       // q_curr_frame_todo while reading it,
-      // so it's important to always read q_curr_frame_todo_.size() directly
-      // not using a queue, because we may need to recompute the frame (if
+      // so it's important to always read
+      // q_curr_frame_todo_.size() directly not using a queue,
+      // because we may need to recompute the frame (if
       // must_replay_frame is true)
       for (int32 u = 0; u < q_curr_frame_todo.size(); ++u) {
         TokenId token_idx;
@@ -1585,24 +1600,24 @@ void CudaDecoder::ConcurrentGetRawLatticeSingleChannel(const ChannelId ichannel,
         InfoToken *tok_beg;
         float2 *extra_extra_and_acoustic_cost_beg;
         int32 nsamestate;
-        // Getting the list of the tokens linked to the same FST state, in the
-        // same frame
-        // In the GPU decoder a token is linked to a single arc, but we can
-        // generate
-        // multiple token for a same fst_nextstate in the same frame.
-        // In the CPU decoder we would use the forward_links list to store
-        // everything in the same metatoken
-        // GetSameFSTStateTokenList returns the list of tokens linked to the
-        // same FST state than token
-        // (in the current frame)
+        // Getting the list of the tokens linked to the
+        // same FST state, in the same frame In the GPU
+        // decoder a token is linked to a single arc,
+        // but we can generate multiple token for a same
+        // fst_nextstate in the same frame. In the CPU
+        // decoder we would use the forward_links list
+        // to store everything in the same metatoken
+        // GetSameFSTStateTokenList returns the list of
+        // tokens linked to the same FST state than
+        // token (in the current frame)
         GetSameFSTStateTokenList(ichannel, token, &tok_beg,
                                  &extra_extra_and_acoustic_cost_beg,
                                  &nsamestate);
 
-        // dbg_found_zero used for debugging. For each FST state, we have a
-        // token with the
-        // best cost for that FST state
-        // that token has an extra_cost of 0.0f. This is a sanity check
+        // dbg_found_zero used for debugging. For each
+        // FST state, we have a token with the best cost
+        // for that FST state that token has an
+        // extra_cost of 0.0f. This is a sanity check
         bool dbg_found_zero = false;
         for (int32 iprev = 0; iprev < nsamestate; ++iprev) {
           InfoToken list_prev_token;
@@ -1634,9 +1649,10 @@ void CudaDecoder::ConcurrentGetRawLatticeSingleChannel(const ChannelId ichannel,
       }
 
       if (must_replay_frame) {
-        // We need to replay the frame. Because all states will be read again,
-        // we can reopen them (and they will be closed again when being read
-        // from again)
+        // We need to replay the frame. Because all
+        // states will be read again, we can reopen them
+        // (and they will be closed again when being
+        // read from again)
         for (auto it = curr_f_raw_lattice_state.begin();
              it != curr_f_raw_lattice_state.end(); ++it) {
           it->second.is_state_closed = false;
@@ -1644,8 +1660,8 @@ void CudaDecoder::ConcurrentGetRawLatticeSingleChannel(const ChannelId ichannel,
       }
     } while (must_replay_frame);
 
-    // Done processing this frame. Swap the datastructures, move on to
-    // previous frame (we go --iframe)
+    // Done processing this frame. Swap the datastructures, move on
+    // to previous frame (we go --iframe)
     SwapPrevAndCurrLatticeMap(iframe, dbg_found_best_path, &q_curr_frame_todo,
                               &q_prev_frame_todo, &curr_f_raw_lattice_state,
                               &prev_f_raw_lattice_state, &f_arc_idx_added);
@@ -1690,9 +1706,8 @@ int32 CudaDecoder::NumFramesDecoded(ChannelId ichannel) const {
 void CudaDecoder::CheckStaticAsserts() {
   // Checking if all constants look ok
 
-  // We need that because we need to be able to do the scan in one pass in the
-  // kernel
-  // update_beam_using_histogram_kernel
+  // We need that because we need to be able to do the scan in one pass in
+  // the kernel update_beam_using_histogram_kernel
   KALDI_ASSERT(KALDI_CUDA_DECODER_HISTO_NBINS < KALDI_CUDA_DECODER_1D_BLOCK);
   KALDI_ASSERT(KALDI_CUDA_DECODER_NONEM_LT_MAX_NARCS > 0);
 }
@@ -1728,7 +1743,8 @@ void CudaDecoder::ComputeH2HCopiesCPUWorker() {
 }
 
 void CudaDecoder::ComputeH2HCopies() {
-  // Waiting for either something to do or the instruction to stop the threads
+  // Waiting for either something to do or the instruction to stop the
+  // threads
   {
     std::unique_lock<std::mutex> n_h2h_lk(n_h2h_main_task_todo_mutex_);
     n_h2h_main_task_todo_cv_.wait(n_h2h_lk, [this] {
@@ -1736,9 +1752,8 @@ void CudaDecoder::ComputeH2HCopies() {
     });
     --n_h2h_main_task_todo_;
   }
-  // If we are done, stop the wait and return now. ComputeH2HCopiesCPUWorker
-  // will also return,
-  // stopping the thread
+  // If we are done, stop the wait and return now.
+  // ComputeH2HCopiesCPUWorker will also return, stopping the thread
   if (!h2h_threads_running_) return;
   // Waiting for the D2H copies. This is threadsafe
   // Step 1: acoustic costs
@@ -1786,8 +1801,8 @@ void CudaDecoder::ComputeH2HCopies() {
         &h_all_tokens_extra_prev_tokens_extra_and_acoustic_cost_);
   }
 
-  // If we're the last cpu thread to complete the current tasks, notify the main
-  // thread
+  // If we're the last cpu thread to complete the current tasks, notify
+  // the main thread
   bool all_done;
   {
     std::lock_guard<std::mutex> lk_not_done(n_h2h_task_not_done_mutex_);
@@ -1798,7 +1813,7 @@ void CudaDecoder::ComputeH2HCopies() {
   }
 }
 
-void CudaDecoder::SetThreadPoolAndStartCPUWorkers(ThreadPool *thread_pool,
+void CudaDecoder::SetThreadPoolAndStartCPUWorkers(ThreadPoolLight *thread_pool,
                                                   int32 nworkers) {
   KALDI_ASSERT(nworkers > 0);
   n_threads_used_ = nworkers;
@@ -1808,7 +1823,7 @@ void CudaDecoder::SetThreadPoolAndStartCPUWorkers(ThreadPool *thread_pool,
                                         this);
 }
 
-}  // end namespace cuda_decoder
+}  // namespace cuda_decoder
 }  // end namespace kaldi
 
 #endif  // HAVE_CUDA == 1
diff --git a/src/cudadecoder/cuda-decoder.h b/src/cudadecoder/cuda-decoder.h
index 4db4424853e..95bc7cac130 100644
--- a/src/cudadecoder/cuda-decoder.h
+++ b/src/cudadecoder/cuda-decoder.h
@@ -21,8 +21,8 @@
 #include "cudadecoder/cuda-decodable-itf.h"
 #include "cudadecoder/cuda-decoder-common.h"
 #include "cudadecoder/cuda-fst.h"
+#include "cudadecoder/thread-pool-light.h"
 #include "nnet3/decodable-online-looped.h"
-#include "thread-pool.h"
 
 #include <cuda_runtime_api.h>
 #include <mutex>
@@ -41,7 +41,7 @@ struct CudaDecoderConfig {
   CudaDecoderConfig()
       : default_beam(15.0),
         lattice_beam(10.0),
-        ntokens_pre_allocated(2000000),
+        ntokens_pre_allocated(1000000),
         main_q_capacity(-1),
         aux_q_capacity(-1),
         max_active(10000) {}
@@ -57,23 +57,32 @@ struct CudaDecoderConfig {
     opts->Register("max-active", &max_active,
                    "At the end of each frame computation, we keep only its "
                    "best max-active tokens. One token is the instantiation of "
-                   "a single arc. Typical values are within the 5k-10k range.");
+                   "a single arc. Typical values are within the 5k-10k "
+                   "range.");
     opts->Register("ntokens-pre-allocated", &ntokens_pre_allocated,
-                   "Advanced - Number of tokens pre-allocated in host buffers. "
+                   "Advanced - Number of tokens pre-allocated in host "
+                   "buffers. "
                    "If this size is exceeded the buffer will reallocate, "
                    "reducing performance.");
     std::ostringstream main_q_capacity_desc;
     main_q_capacity_desc
-        << "Advanced - Capacity of the main queue : Maximum number of "
-           "tokens that can be stored *after* pruning for each frame. "
+        << "Advanced - Capacity of the main queue : Maximum number "
+           "of "
+           "tokens that can be stored *after* pruning for each "
+           "frame. "
            "Lower -> less memory usage, Higher -> More accurate. "
            "Tokens stored in the main queue were already selected "
-           "through a max-active pre-selection. It means that for each "
+           "through a max-active pre-selection. It means that for "
+           "each "
            "emitting/non-emitting iteration, we can add at most "
-           "~max-active tokens to the main queue. Typically only the "
-           "emitting iteration creates a large number of tokens. Using "
-           "main-q-capacity=k*max-active with k=4..10 should be safe. "
-           "If main-q-capacity is too small, we will print a warning "
+           "~max-active tokens to the main queue. Typically only "
+           "the "
+           "emitting iteration creates a large number of tokens. "
+           "Using "
+           "main-q-capacity=k*max-active with k=4..10 should be "
+           "safe. "
+           "If main-q-capacity is too small, we will print a "
+           "warning "
            "but prevent the overflow. The computation can safely "
            "continue, but the quality of the output may decrease "
            "(-1 = set to "
@@ -84,16 +93,26 @@ struct CudaDecoderConfig {
     std::ostringstream aux_q_capacity_desc;
     aux_q_capacity_desc
         << "Advanced - Capacity of the auxiliary queue : Maximum "
-           "number of raw tokens that can be stored *before* pruning "
-           "for each frame. Lower -> less memory usage, Higher -> More "
-           "accurate. During the tokens generation, if we detect that "
-           "we are getting close to saturating that capacity, we will "
-           "reduce the beam dynamically (adaptive beam) to keep only "
-           "the best tokens in the remaining space. If the aux queue "
-           "is still too small, we will print an overflow warning, but "
-           "prevent the overflow. The computation can safely continue, "
-           "but the quality of the output may decrease. We strongly "
-           "recommend keeping aux-q-capacity large (>400k), to avoid "
+           "number of raw tokens that can be stored *before* "
+           "pruning "
+           "for each frame. Lower -> less memory usage, Higher -> "
+           "More "
+           "accurate. During the tokens generation, if we detect "
+           "that "
+           "we are getting close to saturating that capacity, we "
+           "will "
+           "reduce the beam dynamically (adaptive beam) to keep "
+           "only "
+           "the best tokens in the remaining space. If the aux "
+           "queue "
+           "is still too small, we will print an overflow warning, "
+           "but "
+           "prevent the overflow. The computation can safely "
+           "continue, "
+           "but the quality of the output may decrease. We "
+           "strongly "
+           "recommend keeping aux-q-capacity large (>400k), to "
+           "avoid "
            "triggering the adaptive beam and/or the overflow "
            "(-1 = set to "
         << KALDI_CUDA_DECODER_AUX_Q_MAIN_Q_CAPACITIES_FACTOR
@@ -133,122 +152,119 @@ class CudaDecoder {
   // we pick an available channel, call InitDecoding on that channel
   // (with that ChannelId in the channels vector in the arguments)
   // then call AdvanceDecoding whenever frames are ready for the decoder
-  // for that utterance (also passing the same ChannelId to AdvanceDecoding)
+  // for that utterance (also passing the same ChannelId to
+  // AdvanceDecoding)
   //
   // A decoder lane is where the computation actually happens
   // a decoder lane is channel, and perform the actual decoding
   // of that channel.
   // If we have 200 lanes, we can compute 200 utterances (channels)
-  // at the same time. We need many lanes in parallel to saturate the big GPUs
+  // at the same time. We need many lanes in parallel to saturate the big
+  // GPUs
   //
   // An analogy would be lane -> a CPU core, channel -> a software thread
-  // A channel saves the current state of the decoding for a given utterance.
-  // It can be kept idle until more frames are ready to be processed
-  //
-  // We will use as many lanes as necessary to saturate the GPU, but not more.
-  // A lane has an higher memory usage than a channel. If you just want to be
-  // able to
-  // keep more audio channels open at the same time (when I/O is the bottleneck
-  // for instance,
-  // typically in the context of online decoding), you should instead use more
-  // channels.
-  //
-  // A channel is typically way smaller in term of memory usage, and can be used
-  // to oversubsribe lanes in the context of online decoding
-  // For instance, we could choose nlanes=200 because it gives us good
+  // A channel saves the current state of the decoding for a given
+  // utterance. It can be kept idle until more frames are ready to be
+  // processed
+  //
+  // We will use as many lanes as necessary to saturate the GPU, but not
+  // more. A lane has an higher memory usage than a channel. If you just
+  // want to be able to keep more audio channels open at the same time
+  // (when I/O is the bottleneck for instance, typically in the context of
+  // online decoding), you should instead use more channels.
+  //
+  // A channel is typically way smaller in term of memory usage, and can
+  // be used to oversubsribe lanes in the context of online decoding For
+  // instance, we could choose nlanes=200 because it gives us good
   // performance
-  // on a given GPU. It gives us an end-to-end performance of 3000 XRTF. We are
-  // doing online,
-  // so we only get audio at realtime speed for a given utterance/channel.
-  // We then decide to receive audio from 2500 audio channels at the same time
-  // (each at realtime speed),
-  // and as soon as we have frames ready for nlanes=200 channels, we call
+  // on a given GPU. It gives us an end-to-end performance of 3000 XRTF.
+  // We are doing online, so we only get audio at realtime speed for a
+  // given utterance/channel. We then decide to receive audio from 2500
+  // audio channels at the same time (each at realtime speed), and as soon
+  // as we have frames ready for nlanes=200 channels, we call
   // AdvanceDecoding on those channels
   // In that configuration, we have nlanes=200 (for performance), and
   // nchannels=2500 (to have enough audio
   // available at a given time).
-  // Using nlanes=2500 in that configuration would first not be possible (out of
-  // memory), but also not necessary.
-  // Increasing the number of lanes is only useful if it increases performance.
-  // If the GPU is saturated at nlanes=200,
-  // you should not increase that number
+  // Using nlanes=2500 in that configuration would first not be possible
+  // (out of memory), but also not necessary. Increasing the number of
+  // lanes is only useful if it increases performance. If the GPU is
+  // saturated at nlanes=200, you should not increase that number
   CudaDecoder(const CudaFst &fst, const CudaDecoderConfig &config, int32 nlanes,
               int32 nchannels);
 
   // Reads the config from config
   void ReadConfig(const CudaDecoderConfig &config);
-  // Special constructor for nlanes = nchannels. Here for the non-advanced user
-  // Here we can consider nchannels = batch size. If we want to decode 10
-  // utterances at a time,
-  // we can use nchannels = 10
+  // Special constructor for nlanes = nchannels. Here for the non-advanced
+  // user Here we can consider nchannels = batch size. If we want to
+  // decode 10 utterances at a time, we can use nchannels = 10
   CudaDecoder(const CudaFst &fst, const CudaDecoderConfig &config,
               int32 nchannels)
       : CudaDecoder(fst, config, nchannels, nchannels) {}
-  ~CudaDecoder();
+  virtual ~CudaDecoder();
 
   // InitDecoding initializes the decoding, and should only be used if you
   // intend to call AdvanceDecoding() on the channels listed in channels
   void InitDecoding(const std::vector<ChannelId> &channels);
-  // Computes the heavy H2H copies of InitDecoding. Usually launched on the
-  // threadpool
+  // Computes the heavy H2H copies of InitDecoding. Usually launched on
+  // the threadpool
   void InitDecodingH2HCopies(ChannelId ichannel);
   // AdvanceDecoding on a given batch
   // a batch is defined by the channels vector
   // We can compute N channels at the same time (in the same batch)
   // where N = number of lanes, as defined in the constructor
-  // AdvanceDecoding will compute as many frames as possible while running the
-  // full batch
-  // when at least one channel has no more frames ready to be computed,
-  // AdvanceDecoding returns
-  // The user then decides what to do, i.e.:
+  // AdvanceDecoding will compute as many frames as possible while running
+  // the full batch when at least one channel has no more frames ready to
+  // be computed, AdvanceDecoding returns The user then decides what to
+  // do, i.e.:
   //
   // 1) either remove the empty channel from the channels list
   // and call again AdvanceDecoding
   // 2) or swap the empty channel with another one that has frames ready
   // and call again AdvanceDecoding
   //
-  // Solution 2) should be preferred because we need to run full, big batches to
-  // saturate the GPU
+  // Solution 2) should be preferred because we need to run full, big
+  // batches to saturate the GPU
   //
   // If max_num_frames is >= 0 it will decode no more than
   // that many frames.
+  void AdvanceDecoding(
+      const std::vector<std::pair<ChannelId, BaseFloat *>> &lanes_assignements);
+
+  // Version with deprecated API - will be removed at some point
   void AdvanceDecoding(const std::vector<ChannelId> &channels,
                        std::vector<CudaDecodableInterface *> &decodables,
                        int32 max_num_frames = -1);
 
   // Returns the number of frames already decoded in a given channel
   int32 NumFramesDecoded(ChannelId ichannel) const;
-  // GetBestPath gets the one-best decoding traceback. If "use_final_probs" is
-  // true
-  // AND we reached a final state, it limits itself to final states;
-  // otherwise it gets the most likely token not taking into account
-  // final-probs.
+  // GetBestPath gets the one-best decoding traceback. If
+  // "use_final_probs" is true AND we reached a final state, it limits
+  // itself to final states; otherwise it gets the most likely token not
+  // taking into account final-probs.
   void GetBestPath(const std::vector<ChannelId> &channels,
                    std::vector<Lattice *> &fst_out_vec,
                    bool use_final_probs = true);
   // It is possible to use a threadsafe version of GetRawLattice, which is
   // ConcurrentGetRawLatticeSingleChannel()
   // Which will do the heavy CPU work associated with GetRawLattice
-  // It is necessary to first call PrepareForGetRawLattice *on the main thread*
-  // on the channels.
-  // The main thread is the one we use to call all other functions, like
-  // InitDecoding or AdvanceDecoding
-  // We usually call it "cuda control thread", but it is a CPU thread
-  // For example:
-  // on main cpu thread : Call PrepareForGetRawLattice on channel 8,6,3
-  // then:
-  // on some cpu thread : Call ConcurrentGetRawLatticeSingleChannel on channel 3
-  // on some cpu thread : Call ConcurrentGetRawLatticeSingleChannel on channel 8
-  // on some cpu thread : Call ConcurrentGetRawLatticeSingleChannel on channel 6
+  // It is necessary to first call PrepareForGetRawLattice *on the main
+  // thread* on the channels. The main thread is the one we use to call
+  // all other functions, like InitDecoding or AdvanceDecoding We usually
+  // call it "cuda control thread", but it is a CPU thread For example: on
+  // main cpu thread : Call PrepareForGetRawLattice on channel 8,6,3 then:
+  // on some cpu thread : Call ConcurrentGetRawLatticeSingleChannel on
+  // channel 3 on some cpu thread : Call
+  // ConcurrentGetRawLatticeSingleChannel on channel 8 on some cpu thread
+  // : Call ConcurrentGetRawLatticeSingleChannel on channel 6
   void PrepareForGetRawLattice(const std::vector<ChannelId> &channels,
                                bool use_final_probs);
   void ConcurrentGetRawLatticeSingleChannel(ChannelId ichannel,
                                             Lattice *fst_out);
 
-  // GetRawLattice gets the lattice decoding traceback (using the lattice-beam
-  // in the CudaConfig parameters).
-  // If "use_final_probs" is true
-  // AND we reached a final state, it limits itself to final states;
+  // GetRawLattice gets the lattice decoding traceback (using the
+  // lattice-beam in the CudaConfig parameters). If "use_final_probs" is
+  // true AND we reached a final state, it limits itself to final states;
   // otherwise it gets the most likely token not taking into account
   // final-probs.
   void GetRawLattice(const std::vector<ChannelId> &channels,
@@ -259,20 +275,21 @@ class CudaDecoder {
   // finding the minimum cost
   // We list all tokens that have a cost within [best; best+lattice_beam]
   // in list_lattice_tokens.
-  // We alsos set has_reached_final[ichannel] to true if token associated to a
-  // final state
-  // exists in the last token queue of that channel
+  // We alsos set has_reached_final[ichannel] to true if token associated
+  // to a final state exists in the last token queue of that channel
   void GetBestCost(
       const std::vector<ChannelId> &channels, bool isfinal,
       std::vector<std::pair<int32, CostType>> *argmins,
       std::vector<std::vector<std::pair<int, float>>> *list_lattice_tokens,
       std::vector<bool> *has_reached_final);
+
   // (optional) Giving the decoder access to the cpu thread pool
-  // We will use it to compute specific CPU work, such as InitDecodingH2HCopies
-  // For recurrent CPU work, such as ComputeH2HCopies, we will use dedicated CPU
-  // threads
-  // We will launch nworkers of those threads
-  void SetThreadPoolAndStartCPUWorkers(ThreadPool *thread_pool, int32 nworkers);
+  // We will use it to compute specific CPU work, such as
+  // InitDecodingH2HCopies For recurrent CPU work, such as
+  // ComputeH2HCopies, we will use dedicated CPU threads We will launch
+  // nworkers of those threads
+  void SetThreadPoolAndStartCPUWorkers(ThreadPoolLight *thread_pool,
+                                       int32 nworkers);
 
  private:
   // Data allocation. Called in constructor
@@ -291,50 +308,41 @@ class CudaDecoder {
   void SetChannelsInKernelParams(const std::vector<ChannelId> &channels);
   void ResetChannelsInKernelParams();
   // Context-switch functions
-  // Used to perform the context-switch of load/saving the state of a channels
-  // into a lane. When a channel will be executed on a lane, we load that
-  // channel into that lane (same idea than when we load a software threads into
-  // the registers of a CPU)
+  // Used to perform the context-switch of load/saving the state of a
+  // channels into a lane. When a channel will be executed on a lane, we
+  // load that channel into that lane (same idea than when we load a
+  // software threads into the registers of a CPU)
   void LoadChannelsStateToLanes(const std::vector<ChannelId> &channels);
   void SaveChannelsStateFromLanes();
-  // We compute the decodes by batch. Each decodable in the batch has a
-  // different number of frames ready
-  // We compute the min number of frames ready (so that the full batch is
-  // executing). If max_num_frames
-  // is > 0, we apply that ceiling to the NumFramesToDecode.
-  int32 NumFramesToDecode(const std::vector<ChannelId> &channels,
-                          std::vector<CudaDecodableInterface *> &decodables,
-                          int32 max_num_frames);
   // Expand the arcs, emitting stage. Must be called after
   // a preprocess_in_place, which happens in PostProcessingMainQueue.
   // ExpandArcsEmitting is called first when decoding a frame,
-  // using the preprocessing that happened at the end of the previous frame,
-  // in PostProcessingMainQueue
+  // using the preprocessing that happened at the end of the previous
+  // frame, in PostProcessingMainQueue
   void ExpandArcsEmitting();
-  // ExpandArcs, non-emitting stage. Must be called after PruneAndPreprocess.
+  // ExpandArcs, non-emitting stage. Must be called after
+  // PruneAndPreprocess.
   void ExpandArcsNonEmitting();
   // If we have more than max_active_ tokens in the queue (either after an
   // expand, or at the end of the frame)
-  // we will compute a new beam that will only keep a number of tokens as close
-  // as possible to max_active_ tokens
-  // (that number is >= max_active_) (soft topk)
-  // All ApplyMaxActiveAndReduceBeam is find the right beam for that topk and
-  // set it.
-  // We need to then call PruneAndPreprocess (explicitly pruning tokens with
-  // cost > beam)
-  // Or PostProcessingMainQueue (ignoring tokens with cost > beam in the next
+  // we will compute a new beam that will only keep a number of tokens as
+  // close as possible to max_active_ tokens (that number is >=
+  // max_active_) (soft topk) All ApplyMaxActiveAndReduceBeam is find the
+  // right beam for that topk and set it. We need to then call
+  // PruneAndPreprocess (explicitly pruning tokens with cost > beam) Or
+  // PostProcessingMainQueue (ignoring tokens with cost > beam in the next
   // frame)
   void ApplyMaxActiveAndReduceBeam(enum QUEUE_ID queue_id);
-  // Called after an ExpandArcs. Prune the aux_q (output of the ExpandArcs),
-  // move the survival tokens to the main_q, do the preprocessing at the same
-  // time
-  // We don't need it after the last ExpandArcsNonEmitting.
+  // Called after an ExpandArcs. Prune the aux_q (output of the
+  // ExpandArcs), move the survival tokens to the main_q, do the
+  // preprocessing at the same time We don't need it after the last
+  // ExpandArcsNonEmitting.
   void PruneAndPreprocess();
   // Once the non-emitting is done, the main_q is final for that frame.
-  // We now generate all the data associated with that main_q, such as listing
-  // the different tokens sharing the same token.next_state
-  // we also preprocess for the ExpandArcsEmitting of the next frame
-  // Once PostProcessingMainQueue, all working data is back to its original
+  // We now generate all the data associated with that main_q, such as
+  // listing the different tokens sharing the same token.next_state we
+  // also preprocess for the ExpandArcsEmitting of the next frame Once
+  // PostProcessingMainQueue, all working data is back to its original
   // state, to make sure we're ready for the next context switch
   void PostProcessingMainQueue();
   // Moving the relevant data to host, ie the data that will be needed in
@@ -344,62 +352,56 @@ class CudaDecoder {
   // CheckOverflow
   // If a kernel sets the flag h_q_overflow, we send a warning to stderr
   // Overflows are detected and prevented on the device. It only means
-  // that we've discarded the tokens that were created after the queue was full
-  // That's why we only send a warning. It is not a fatal error
+  // that we've discarded the tokens that were created after the queue was
+  // full That's why we only send a warning. It is not a fatal error
   void CheckOverflow();
-  // Evaluates the function func for each lane, returning the max of all return
-  // values
-  // (func returns int32)
-  // Used for instance to ge the max number of arcs for all lanes
-  // func is called with h_lanes_counters_[ilane] for each lane.
-  // h_lanes_counters_
-  // must be ready to be used when calling GetMaxForAllLanes (you might want to
+  // Evaluates the function func for each lane, returning the max of all
+  // return values (func returns int32) Used for instance to ge the max
+  // number of arcs for all lanes func is called with
+  // h_lanes_counters_[ilane] for each lane. h_lanes_counters_ must be
+  // ready to be used when calling GetMaxForAllLanes (you might want to
   // call
-  // CopyLaneCountersToHost[A|]sync to make sure everything is ready first)
+  // CopyLaneCountersToHost[A|]sync to make sure everything is ready
+  // first)
   int32 GetMaxForAllLanes(std::function<int32(const LaneCounters &)> func);
   // Copy the lane counters back to host, async or sync
-  // The lanes counters contain all the information such as main_q_end (number
-  // of tokens in the main_q)
-  // main_q_narcs (number of arcs) during the computation. That's why we
-  // frequently copy it back to host
-  // to know what to do next
+  // The lanes counters contain all the information such as main_q_end
+  // (number of tokens in the main_q) main_q_narcs (number of arcs) during
+  // the computation. That's why we frequently copy it back to host to
+  // know what to do next
   void CopyLaneCountersToHostAsync();
   void CopyLaneCountersToHostSync();
-  // The selected tokens for each frame will be copied back to host. We will
-  // store them on host memory, and we wil use them to create the final lattice
-  // once we've reached the last frame
-  // We will also copy information on those tokens that we've generated on the
-  // device, such as which tokens are associated to the same FST state in the
-  // same frame, or their extra cost.
-  // We cannot call individuals Device2Host copies for each channel, because it
-  // would lead to a lot of small copies, reducing performance. Instead we
-  // concatenate all channels data into a single
-  // continuous array, copy that array to host, then unpack it to the individual
-  // channel vectors
-  // The first step (pack then copy to host, async) is done in
-  // ConcatenateData
-  // The second step is done in LaunchD2H and sLaunchH2HCopies
-  // A sync on cudaStream st has to happen between the two functions to make
-  // sure that the copy is done
+  // The selected tokens for each frame will be copied back to host. We
+  // will store them on host memory, and we wil use them to create the
+  // final lattice once we've reached the last frame We will also copy
+  // information on those tokens that we've generated on the device, such
+  // as which tokens are associated to the same FST state in the same
+  // frame, or their extra cost. We cannot call individuals Device2Host
+  // copies for each channel, because it would lead to a lot of small
+  // copies, reducing performance. Instead we concatenate all channels
+  // data into a single continuous array, copy that array to host, then
+  // unpack it to the individual channel vectors The first step (pack then
+  // copy to host, async) is done in ConcatenateData The second step is
+  // done in LaunchD2H and sLaunchH2HCopies A sync on cudaStream st has to
+  // happen between the two functions to make sure that the copy is done
   //
   // Each lane contains X elements to be copied, where X = func(ilane)
-  // That data is contained in the array (pointer, X), with pointer = src[ilane]
-  // It will be concatenated in d_concat on device, then copied async into
-  // h_concat
-  // That copy is launched on stream st
-  // The offset of the data of each lane in the concatenate array is saved in
+  // That data is contained in the array (pointer, X), with pointer =
+  // src[ilane] It will be concatenated in d_concat on device, then copied
+  // async into h_concat That copy is launched on stream st The offset of
+  // the data of each lane in the concatenate array is saved in
   // *lanes_offsets_ptr
   // it will be used for unpacking in MoveConcatenatedCopyToVector
   //
   // func is called with h_lanes_counters_[ilane] for each lane.
   // h_lanes_counters_
-  // must be ready to be used when calling GetMaxForAllLanes (you might want to
-  // call
-  // CopyLaneCountersToHost[A|]sync to make sure everything is ready first)
-  // Concatenate data on device before calling the D2H copies
+  // must be ready to be used when calling GetMaxForAllLanes (you might
+  // want to call CopyLaneCountersToHost[A|]sync to make sure everything
+  // is ready first) Concatenate data on device before calling the D2H
+  // copies
   void ConcatenateData();
-  // Start the D2H copies used to send data back to host at the end of each
-  // frames
+  // Start the D2H copies used to send data back to host at the end of
+  // each frames
   void LaunchD2HCopies();
   // ComputeH2HCopies
   // At the end of each frame, we copy data back to host
@@ -408,8 +410,8 @@ class CudaDecoder {
   // This is done by ComputeH2HCopies
   void ComputeH2HCopies();
   // Takes care of preparing the data for ComputeH2HCopies
-  // and check whether we can use the threadpool or we have to do the work on
-  // the current thread
+  // and check whether we can use the threadpool or we have to do the work
+  // on the current thread
   void LaunchH2HCopies();
   // Function called by the CPU worker threads
   // Calls ComputeH2HCopies when triggered
@@ -426,8 +428,8 @@ class CudaDecoder {
   // Computes a set of static asserts on the static values
   // In theory we should do them at compile time
   void CheckStaticAsserts();
-  // Can be called in GetRawLattice to do a bunch of deep asserts on the data
-  // Slow, so disabled by default
+  // Can be called in GetRawLattice to do a bunch of deep asserts on the
+  // data Slow, so disabled by default
   void DebugValidateLattice();
 
   //
@@ -439,19 +441,19 @@ class CudaDecoder {
   const CudaFst fst_;
   // Counters used by a decoder lane
   // Contains all the single values generated during computation,
-  // such as the current size of the main_q, the number of arcs currently in
-  // that queue
-  // We load data from the channel state during context-switch (for instance the
-  // size of the last token queue for that channel)
+  // such as the current size of the main_q, the number of arcs currently
+  // in that queue We load data from the channel state during
+  // context-switch (for instance the size of the last token queue for
+  // that channel)
   HostLaneMatrix<LaneCounters> h_lanes_counters_;
   // Counters of channels
-  // Contains all the single values saved to remember the state of a channel
-  // not used during computation. Those values are loaded/saved into/from a lane
-  // during context switching
+  // Contains all the single values saved to remember the state of a
+  // channel not used during computation. Those values are loaded/saved
+  // into/from a lane during context switching
   ChannelCounters *h_channels_counters_;
-  // Contain the various counters used by lanes/channels, such as main_q_end,
-  // main_q_narcs. On device memory (equivalent of h_channels_counters on
-  // device)
+  // Contain the various counters used by lanes/channels, such as
+  // main_q_end, main_q_narcs. On device memory (equivalent of
+  // h_channels_counters on device)
   DeviceChannelMatrix<ChannelCounters> d_channels_counters_;
   DeviceLaneMatrix<LaneCounters> d_lanes_counters_;
   // Number of lanes and channels, as defined in the constructor arguments
@@ -463,20 +465,19 @@ class CudaDecoder {
   // - the auxiliary queue
   //
   // The auxiliary queue is used to store the raw output of ExpandArcs.
-  // We then prune that aux queue (and apply max-active) and move the survival
-  // tokens in the main queue.
-  // Tokens stored in the main q can then be used to generate new tokens (using
-  // ExpandArcs)
-  // We also generate more information about what's in the main_q at the end of
-  // a frame (in PostProcessingMainQueue)
+  // We then prune that aux queue (and apply max-active) and move the
+  // survival tokens in the main queue. Tokens stored in the main q can
+  // then be used to generate new tokens (using ExpandArcs) We also
+  // generate more information about what's in the main_q at the end of a
+  // frame (in PostProcessingMainQueue)
   //
   // As a reminder, here's the data structure of a token :
   //
   // struct Token { state, cost, prev_token, arc_idx }
   //
   // Please keep in mind that this structure is also used in the context
-  // of lattice decoding. We are not storing a list of forward links like in the
-  // CPU decoder. A token stays an instanciation of an single arc.
+  // of lattice decoding. We are not storing a list of forward links like
+  // in the CPU decoder. A token stays an instanciation of an single arc.
   //
   // For performance reasons, we split the tokens in three parts :
   // { state } , { cost }, { prev_token, arc_idx }
@@ -484,55 +485,52 @@ class CudaDecoder {
   // For instance, d_main_q_state[i], d_main_q_cost[i], d_main_q_info[i]
   // all refer to the same token (at index i)
   // The data structure InfoToken contains { prev_token, arc_idx }
-  // We also store the acoustic costs independently in d_main_q_acoustic_cost_
+  // We also store the acoustic costs independently in
+  // d_main_q_acoustic_cost_
   //
   // The data is eiher linked to a channel, or to a lane.
   //
   // Channel data (DeviceChannelMatrix):
   //
-  // The data linked with a channel contains the data of frame i we need to
-  // remember
-  // to compute frame i+1. It is the list of tokens from frame i, with some
-  // additional info
-  // (ie the prefix sum of the emitting arcs degrees from those tokens).
-  // We are only storing d_main_q_state_and_cost_ as channel data because that's
-  // all we need in a token to compute
-  // frame i+1. We don't need token.arc_idx or token.prev_token.
-  // The reason why we also store that prefix sum is because we do the emitting
-  // preprocessing
-  // at the end of frame i. The reason for that is that we need infos from the
-  // hashmap to do that preprocessing.
-  // The hashmap is always cleared at the end of a frame. So we need to do the
-  // preprocessing at the end of frame i,
-  // and then save d_main_q_degrees_prefix_sum_. d_main_q_arc_offsets is
-  // generated also during preprocessing.
+  // The data linked with a channel contains the data of frame i we need
+  // to remember to compute frame i+1. It is the list of tokens from frame
+  // i, with some additional info (ie the prefix sum of the emitting arcs
+  // degrees from those tokens). We are only storing
+  // d_main_q_state_and_cost_ as channel data because that's all we need
+  // in a token to compute frame i+1. We don't need token.arc_idx or
+  // token.prev_token. The reason why we also store that prefix sum is
+  // because we do the emitting preprocessing at the end of frame i. The
+  // reason for that is that we need infos from the hashmap to do that
+  // preprocessing. The hashmap is always cleared at the end of a frame.
+  // So we need to do the preprocessing at the end of frame i, and then
+  // save d_main_q_degrees_prefix_sum_. d_main_q_arc_offsets is generated
+  // also during preprocessing.
   //
   // Lane data (DeviceLaneMatrix):
   //
-  // The lane data is everything we use during computation, but which we reset
-  // at the end of each frame.
-  // For instance we use a hashmap at some point during the computation, but at
-  // the end of each frame we reset it. That
-  // way that hashmap is able to compute whichever channel the next time
-  // AdvanceDecoding is called. The reasons why we do that is :
-  //
-  // - We use context switching. Before and after every frames, we can do a
-  // context switching. Which means that a lane cannot save a channel's state
-  // in any way once AdvanceDecoding returns. e.g., during a call of
-  // AdvanceDecoding, ilane=2 may compute 5 frames from channel=57 (as defined
-  // in the std::vector<ChannelId> channels).
-  // In the next call, the same ilane=2 may compute 10 frames from channel=231.
-  // A lane data has to be reset to its original state at the end of each
+  // The lane data is everything we use during computation, but which we
+  // reset at the end of each frame. For instance we use a hashmap at some
+  // point during the computation, but at the end of each frame we reset
+  // it. That way that hashmap is able to compute whichever channel the
+  // next time AdvanceDecoding is called. The reasons why we do that is :
+  //
+  // - We use context switching. Before and after every frames, we can do
+  // a context switching. Which means that a lane cannot save a channel's
+  // state in any way once AdvanceDecoding returns. e.g., during a call of
+  // AdvanceDecoding, ilane=2 may compute 5 frames from channel=57 (as
+  // defined in the std::vector<ChannelId> channels). In the next call,
+  // the same ilane=2 may compute 10 frames from channel=231. A lane data
+  // has to be reset to its original state at the end of each
   // AdvanceDecoding call.
-  // If somehow some data has to be saved, it needs to be declared as channel
-  // data.
-  //
-  // - The reason why we make the distinction between lane and channel data (in
-  // theory everything could be consider channel data), is because
-  // a lane uses more memory than a channel. In the context of online decoding,
-  // we need to create a lot channels, and we need them to be as small as
-  // possible in memory.
-  // Everything that can be reused between channels is stored as lane data.
+  // If somehow some data has to be saved, it needs to be declared as
+  // channel data.
+  //
+  // - The reason why we make the distinction between lane and channel
+  // data (in theory everything could be consider channel data), is
+  // because a lane uses more memory than a channel. In the context of
+  // online decoding, we need to create a lot channels, and we need them
+  // to be as small as possible in memory. Everything that can be reused
+  // between channels is stored as lane data.
 
   //
   // Channel data members:
@@ -544,10 +542,9 @@ class CudaDecoder {
   // preprocess_in_place in PostProcessingMainQueue)
   DeviceChannelMatrix<int32> d_main_q_degrees_prefix_sum_;
   // d_main_q_arc_offsets[i] = fst_.arc_offsets[d_main_q_state[i]]
-  // we pay the price for the random memory accesses of fst_.arc_offsets in the
-  // preprocess kernel
-  // we cache the results in d_main_q_arc_offsets which will be read in a
-  // coalesced fashion in expand
+  // we pay the price for the random memory accesses of fst_.arc_offsets
+  // in the preprocess kernel we cache the results in d_main_q_arc_offsets
+  // which will be read in a coalesced fashion in expand
   DeviceChannelMatrix<int32> d_main_q_arc_offsets_;
 
   //
@@ -566,43 +563,43 @@ class CudaDecoder {
   DeviceLaneMatrix<CostType> d_main_q_acoustic_cost_;
   // At the end of a frame, we use a hashmap to detect the tokens that are
   // associated with the same FST state S
-  // We do it that the very end, to only use the hashmap on post-prune, post-max
-  // active tokens
+  // We do it that the very end, to only use the hashmap on post-prune,
+  // post-max active tokens
   DeviceLaneMatrix<HashmapValueT> d_hashmap_values_;
   // Reminder: in the GPU lattice decoder, a token is always associated
   // to a single arc. Which means that multiple tokens in the same frame
   // can be associated with the same FST state.
   //
-  // We are NOT listing those duplicates as ForwardLinks in an unique meta-token
-  // like in the CPU lattice decoder
+  // We are NOT listing those duplicates as ForwardLinks in an unique
+  // meta-token like in the CPU lattice decoder
   //
   // When more than one token is associated to a single FST state,
-  // we will list those tokens into another list : d_main_q_extra_prev_tokens
-  // we will also save data useful in such a case, such as the extra_cost of a
-  // token compared to the best for that state
+  // we will list those tokens into another list :
+  // d_main_q_extra_prev_tokens we will also save data useful in such a
+  // case, such as the extra_cost of a token compared to the best for that
+  // state
   DeviceLaneMatrix<InfoToken> d_main_q_extra_prev_tokens_;
   DeviceLaneMatrix<float2> d_main_q_extra_and_acoustic_cost_;
   // Histogram. Used to perform the histogram of the token costs
   // in the main_q. Used to perform a soft topk of the main_q (max-active)
   DeviceLaneMatrix<int32> d_histograms_;
-  // When filling the hashmap in PostProcessingMainQueue, we create a hashmap
-  // value for each FST state
-  // presents in the main_q (if at least one token is associated with that
-  // state)
-  // d_main_q_state_hash_idx_[token_idx] is the index of the state token.state
-  // in the hashmap
-  // Stored into a FSTStateHashIndex, which is actually a int32.
-  // FSTStateHashIndex should only
-  // be accessed through [Get|Set]FSTStateHashIndex, because it uses the bit
-  // sign to also remember if that token is the representative of that state.
-  // If only one token is associated with S, its representative will be itself
+  // When filling the hashmap in PostProcessingMainQueue, we create a
+  // hashmap value for each FST state presents in the main_q (if at least
+  // one token is associated with that state)
+  // d_main_q_state_hash_idx_[token_idx] is the index of the state
+  // token.state in the hashmap Stored into a FSTStateHashIndex, which is
+  // actually a int32. FSTStateHashIndex should only be accessed through
+  // [Get|Set]FSTStateHashIndex, because it uses the bit sign to also
+  // remember if that token is the representative of that state. If only
+  // one token is associated with S, its representative will be itself
   DeviceLaneMatrix<FSTStateHashIndex> d_main_q_state_hash_idx_;
   // local_idx of the extra cost list for a state
-  // For a given state S, first token associated with S will have local_idx=0
-  // the second one local_idx=1, etc. The order of the local_idxs is random
+  // For a given state S, first token associated with S will have
+  // local_idx=0 the second one local_idx=1, etc. The order of the
+  // local_idxs is random
   DeviceLaneMatrix<int32> d_main_q_n_extra_prev_tokens_local_idx_;
-  // Where to write the extra_prev_tokens in the d_main_q_extra_prev_tokens_
-  // queue
+  // Where to write the extra_prev_tokens in the
+  // d_main_q_extra_prev_tokens_ queue
   DeviceLaneMatrix<int32> d_main_q_extra_prev_tokens_prefix_sum_;
   // Used when computing the prefix_sums in preprocess_in_place. Stores
   // the local_sums per CTA
@@ -616,13 +613,11 @@ class CudaDecoder {
   DeviceLaneMatrix<InfoToken> d_extra_prev_tokens_concat_matrix_;
   DeviceLaneMatrix<CostType> d_acoustic_cost_concat_matrix_;
   DeviceLaneMatrix<InfoToken> d_infotoken_concat_matrix_;
-  // We will list in d_list_final_tokens_in_main_q all tokens within [min_cost;
-  // min_cost+lattice_beam]
-  // It is used when calling GetBestCost
-  // We only use an interface here because we will actually reuse data from
-  // d_aux_q_state_and_cost
-  // We are done using the aux_q when GetBestCost is called, so we can reuse
-  // that memory
+  // We will list in d_list_final_tokens_in_main_q all tokens within
+  // [min_cost; min_cost+lattice_beam] It is used when calling GetBestCost
+  // We only use an interface here because we will actually reuse data
+  // from d_aux_q_state_and_cost We are done using the aux_q when
+  // GetBestCost is called, so we can reuse that memory
   HostLaneMatrix<int2> h_list_final_tokens_in_main_q_;
   // Parameters used by the kernels
   // DeviceParams contains all the parameters that won't change
@@ -652,8 +647,8 @@ class CudaDecoder {
   // Static segment of the adaptive beam. Cf InitDeviceParams
   int32 adaptive_beam_static_segment_;
   // The first index of all the following vectors (or vector<vector>)
-  // is the ChannelId. e.g., to get the number of frames decoded in channel 2,
-  // look into num_frames_decoded_[2].
+  // is the ChannelId. e.g., to get the number of frames decoded in
+  // channel 2, look into num_frames_decoded_[2].
 
   // Keep track of the number of frames decoded in the current file.
   std::vector<int32> num_frames_decoded_;
@@ -673,8 +668,9 @@ class CudaDecoder {
                                           // channel)
   bool worker_threads_running_;
   // For each channel, set by PrepareForGetRawLattice
-  // argmin cost, list of the tokens within [best_cost;best_cost+lattice_beam]
-  // and if we've reached a final token. Set by PrepareForGetRawLattice.
+  // argmin cost, list of the tokens within
+  // [best_cost;best_cost+lattice_beam] and if we've reached a final
+  // token. Set by PrepareForGetRawLattice.
   std::vector<std::pair<int32, CostType>> h_all_argmin_cost_;
   std::vector<std::vector<std::pair<int, float>>> h_all_final_tokens_list_;
   std::vector<bool> h_all_has_reached_final_;
@@ -714,12 +710,11 @@ class CudaDecoder {
   //
   // A lattice state is defined by the pair (iframe, fst_state)
   // A token is associated to a lattice state (iframe, token.next_state)
-  // Multiple token in the same frame can be associated to the same lattice
-  // state
-  // (they all go to the same token.next_state)
-  // We need to quickly identify what is the lattice state of a token.
-  // We are able to do that through GetLatticeStateInternalId(token),
-  // which returns the internal unique ID for each lattice state for a token
+  // Multiple token in the same frame can be associated to the same
+  // lattice state (they all go to the same token.next_state) We need to
+  // quickly identify what is the lattice state of a token. We are able to
+  // do that through GetLatticeStateInternalId(token), which returns the
+  // internal unique ID for each lattice state for a token
   //
   // When we build the output lattice, we a get new lattice state
   // output_lattice_state = fst_out->AddState()
@@ -733,32 +728,28 @@ class CudaDecoder {
                                                    TokenId token_idx,
                                                    InfoToken token);
   // Keeping track of a variety of info about states in the lattice
-  // - token_extra_cost. A path going from the current lattice_state to the
-  // end has an extra cost
-  // compared to the best path (which has an extra cost of 0).
-  // token_extra_cost is the minimum of the extra_cost of all paths going from
-  // the current lattice_state
-  // to the final frame.
-  // - fst_lattice_state is the StateId of the lattice_state in fst_out (in
-  // the output lattice). lattice_state is an internal state used in
+  // - token_extra_cost. A path going from the current lattice_state to
+  // the end has an extra cost compared to the best path (which has an
+  // extra cost of 0). token_extra_cost is the minimum of the extra_cost
+  // of all paths going from the current lattice_state to the final frame.
+  // - fst_lattice_state is the StateId of the lattice_state in fst_out
+  // (in the output lattice). lattice_state is an internal state used in
   // GetRawLattice.
   // - is_state_closed is true if the token_extra_cost has been read by
   // another token. It means that the
-  // token_extra_cost value has been used, and if we modify token_extra_cost
-  // again, we may need to recompute the current frame (so that everyone uses
-  // the latest
-  // token_extra_cost value)
+  // token_extra_cost value has been used, and if we modify
+  // token_extra_cost again, we may need to recompute the current frame
+  // (so that everyone uses the latest token_extra_cost value)
   struct RawLatticeState {
     CostType token_extra_cost;
     OutputLatticeState fst_lattice_state;
     bool is_state_closed;
   };
-  // extra_cost_min_delta_ used in the must_replay_frame situation. Please read
-  // comments
-  // associated with must_replay_frame in GetRawLattice to understand what it
-  // does
+  // extra_cost_min_delta_ used in the must_replay_frame situation. Please
+  // read comments associated with must_replay_frame in GetRawLattice to
+  // understand what it does
   CostType extra_cost_min_delta_;
-  ThreadPool *thread_pool_;
+  ThreadPoolLight *thread_pool_;
   std::vector<std::thread> cpu_dedicated_threads_;
   int32 n_threads_used_;
   std::vector<ChannelId> lanes2channels_todo_;
@@ -777,11 +768,10 @@ class CudaDecoder {
   std::condition_variable init_decoding_h2h_done_;
   std::atomic<bool> active_wait_;
   bool h2h_threads_running_;
-  // Using the output from GetBestPath, we add the best tokens (as selected in
-  // GetBestCost)
-  // from the final frame to the output lattice. We also fill the data
-  // structures
-  // (such as q_curr_frame_todo_, or curr_f_raw_lattice_state_) accordingly
+  // Using the output from GetBestPath, we add the best tokens (as
+  // selected in GetBestCost) from the final frame to the output lattice.
+  // We also fill the data structures (such as q_curr_frame_todo_, or
+  // curr_f_raw_lattice_state_) accordingly
   void AddFinalTokensToLattice(
       ChannelId ichannel,
       std::vector<std::pair<TokenId, InfoToken>> *q_curr_frame_todo,
@@ -798,8 +788,8 @@ class CudaDecoder {
       InfoToken *list_prev_token, CostType *this_arc_prev_token_extra_cost,
       CostType *acoustic_cost, OutputLatticeState *lattice_src_state,
       bool *keep_arc, bool *dbg_found_zero);
-  // Add the arc to the lattice. Also updates what needs to be updated in the
-  // GetRawLattice datastructures.
+  // Add the arc to the lattice. Also updates what needs to be updated in
+  // the GetRawLattice datastructures.
   void AddArcToLattice(
       int32 list_arc_idx, TokenId list_prev_token_idx,
       InfoToken list_prev_token, int32 curr_frame_offset,
@@ -822,11 +812,10 @@ class CudaDecoder {
           *curr_f_raw_lattice_state,
       CostType *token_extra_cost, OutputLatticeState *to_fst_lattice_state);
 
-  // A token is an instance of an arc. It goes to a FST state (token.next_state)
-  // Multiple token in the same frame can go to the same FST state.
-  // GetSameFSTStateTokenList
-  // returns that list
-  void GetSameFSTStateTokenList(ChannelId ichannel, InfoToken token,
+  // A token is an instance of an arc. It goes to a FST state
+  // (token.next_state) Multiple token in the same frame can go to the
+  // same FST state. GetSameFSTStateTokenList returns that list
+  void GetSameFSTStateTokenList(ChannelId ichannel, InfoToken &token,
                                 InfoToken **tok_beg,
                                 float2 **arc_extra_cost_beg, int32 *nprevs);
 
diff --git a/src/cudadecoder/decodable-cumatrix.cc b/src/cudadecoder/decodable-cumatrix.cc
index d7c1d0359a5..a4362c83b9d 100644
--- a/src/cudadecoder/decodable-cumatrix.cc
+++ b/src/cudadecoder/decodable-cumatrix.cc
@@ -16,9 +16,13 @@
  * limitations under the License.
  */
 
+//
+// Important: This file is deprecated and will be removed in a future release
+//
+
 #if HAVE_CUDA == 1
 
-#include "decodable-cumatrix.h"
+#include "cudadecoder/decodable-cumatrix.h"
 
 namespace kaldi {
 namespace cuda_decoder {
@@ -48,8 +52,8 @@ int32 DecodableCuMatrixMapped::NumIndices() const {
 }
 
 // returns cuda pointer to nnet3 output
-BaseFloat *
-DecodableCuMatrixMapped::GetLogLikelihoodsCudaPointer(int32 subsampled_frame) {
+BaseFloat *DecodableCuMatrixMapped::GetLogLikelihoodsCudaPointer(
+    int32 subsampled_frame) {
   BaseFloat *frame_nnet3_out =
       (BaseFloat *)likes_->Data() +
       (subsampled_frame - frame_offset_) * likes_->Stride();
diff --git a/src/cudadecoder/decodable-cumatrix.h b/src/cudadecoder/decodable-cumatrix.h
index d34079cc9c7..7f42151ed0f 100644
--- a/src/cudadecoder/decodable-cumatrix.h
+++ b/src/cudadecoder/decodable-cumatrix.h
@@ -16,6 +16,10 @@
  * limitations under the License.
  */
 
+//
+// Important: This file is deprecated and will be removed in a future release
+//
+
 #ifndef KALDI_CUDA_DECODER_DECODABLE_CUMATRIX_H_
 #define KALDI_CUDA_DECODER_DECODABLE_CUMATRIX_H_
 
@@ -31,10 +35,11 @@ namespace cuda_decoder {
   an interface similar to the Decodable Interface
   */
 class DecodableCuMatrixMapped : public CudaDecodableInterface {
-public:
-  // This constructor creates an object that will not delete "likes" when done.
-  // the frame_offset is the frame the row 0 of 'likes' corresponds to, would be
-  // greater than one if this is not the first chunk of likelihoods.
+ public:
+  // This constructor creates an object that will not delete "likes" when
+  // done. the frame_offset is the frame the row 0 of 'likes' corresponds
+  // to, would be greater than one if this is not the first chunk of
+  // likelihoods.
   DecodableCuMatrixMapped(const TransitionModel &tm,
                           const CuMatrixBase<BaseFloat> &likes,
                           int32 frame_offset = 0);
@@ -56,8 +61,8 @@ class DecodableCuMatrixMapped : public CudaDecodableInterface {
   // returns cuda pointer to nnet3 output
   virtual BaseFloat *GetLogLikelihoodsCudaPointer(int32 subsampled_frame);
 
-private:
-  const TransitionModel &trans_model_; // for tid to pdf mapping
+ private:
+  const TransitionModel &trans_model_;  // for tid to pdf mapping
   const CuMatrixBase<BaseFloat> *likes_;
 
   int32 frame_offset_;
diff --git a/src/cudadecoder/thread-pool-light.h b/src/cudadecoder/thread-pool-light.h
new file mode 100644
index 00000000000..7a1c2adb8e2
--- /dev/null
+++ b/src/cudadecoder/thread-pool-light.h
@@ -0,0 +1,169 @@
+// cudadecoder/cuda-decoder.h
+//
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun, Justin Luitjens, Ryan Leary
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_CUDA_DECODER_THREAD_POOL_LIGHT_H_
+#define KALDI_CUDA_DECODER_THREAD_POOL_LIGHT_H_
+
+#define KALDI_CUDA_DECODER_THREAD_POOL_QUEUE_FULL_WAIT_FOR_US 1000
+
+#include <atomic>
+#include <thread>
+#include <vector>
+#include "util/stl-utils.h"
+
+namespace kaldi {
+namespace cuda_decoder {
+
+struct ThreadPoolLightTask {
+  void (*func_ptr)(void *, uint64_t, uint64_t);
+  void *obj_ptr;
+  uint64_t arg1;
+  uint64_t arg2;
+};
+
+template <int QUEUE_SIZE>
+// Single producer, multiple consumer
+class ThreadPoolLightSPMCQueue {
+  static const unsigned int QUEUE_MASK = QUEUE_SIZE - 1;
+  std::vector<ThreadPoolLightTask> tasks_;
+  std::atomic<int> back_;
+  std::atomic<int> front_;
+  int inc(int curr) { return ((curr + 1) & QUEUE_MASK); }
+
+ public:
+  ThreadPoolLightSPMCQueue() {
+    KALDI_ASSERT(QUEUE_SIZE > 1);
+    bool is_power_of_2 = ((QUEUE_SIZE & (QUEUE_SIZE - 1)) == 0);
+    KALDI_ASSERT(is_power_of_2);  // validity of QUEUE_MASK
+    tasks_.resize(QUEUE_SIZE);
+    front_.store(0);
+    back_.store(0);
+  }
+
+  bool TryPush(const ThreadPoolLightTask &task) {
+    int back = back_.load(std::memory_order_relaxed);
+    int next = inc(back);
+    if (next == front_.load(std::memory_order_acquire)) {
+      return false;  // queue is full
+    }
+    tasks_[back] = task;
+    back_.store(next, std::memory_order_release);
+
+    return true;
+  }
+
+  bool TryPop(ThreadPoolLightTask *front_task) {
+    while (true) {
+      int front = front_.load(std::memory_order_relaxed);
+      if (front == back_.load(std::memory_order_acquire))
+        return false;  // queue is empty
+      *front_task = tasks_[front];
+      if (front_.compare_exchange_weak(front, inc(front),
+                                       std::memory_order_release))
+        return true;
+    }
+  }
+};
+
+class ThreadPoolLightWorker {
+  // Multi consumer queue, because worker can steal work
+  ThreadPoolLightSPMCQueue<512> queue_;
+  // If this thread has no more work to do, it will try to steal work from
+  // other
+  std::unique_ptr<std::thread> thread_;
+  bool run_thread_;
+  ThreadPoolLightTask curr_task_;
+  std::shared_ptr<ThreadPoolLightWorker> other_;
+
+  void Work() {
+    while (run_thread_) {
+      if (queue_.TryPop(&curr_task_) || other_->TrySteal(&curr_task_)) {
+        // Not calling func_ptr as a member function,
+        // because we need to specialize the arguments
+        // anyway (we may want to ignore arg2, for
+        // instance) Using a wrapper func
+        (curr_task_.func_ptr)(curr_task_.obj_ptr, curr_task_.arg1,
+                              curr_task_.arg2);
+      } else {
+        usleep(1000);  // TODO
+      }
+    }
+  }
+
+ protected:
+  // Another worker can steal a task from this queue
+  // This is done so that a very long task computed by one thread does not
+  // hold the entire threadpool to complete a time-sensitive task
+  bool TrySteal(ThreadPoolLightTask *task) { return queue_.TryPop(task); }
+
+ public:
+  ThreadPoolLightWorker() : run_thread_(true), other_(NULL) {}
+  virtual ~ThreadPoolLightWorker() { Stop(); }
+  bool TryPush(const ThreadPoolLightTask &task) { return queue_.TryPush(task); }
+  void SetOtherWorkerToStealFrom(
+      const std::shared_ptr<ThreadPoolLightWorker> other) {
+    other_ = other;
+  }
+  void Start() {
+    KALDI_ASSERT("Please call SetOtherWorkerToStealFrom() first" && other_);
+    thread_.reset(new std::thread(&ThreadPoolLightWorker::Work, this));
+  }
+  void Stop() {
+    run_thread_ = false;
+    thread_->join();
+  }
+};
+
+class ThreadPoolLight {
+  std::vector<std::shared_ptr<ThreadPoolLightWorker>> workers_;
+  int curr_iworker_;  // next call on tryPush will post work on this
+                      // worker
+  int nworkers_;
+
+ public:
+  ThreadPoolLight(int32 nworkers = std::thread::hardware_concurrency())
+      : curr_iworker_(0), nworkers_(nworkers) {
+    KALDI_ASSERT(nworkers > 1);
+    workers_.resize(nworkers);
+    for (int i = 0; i < workers_.size(); ++i)
+      workers_[i] = std::make_shared<ThreadPoolLightWorker>();
+
+    for (int i = 0; i < workers_.size(); ++i) {
+      int iother = (i + nworkers / 2) % nworkers;
+      workers_[i]->SetOtherWorkerToStealFrom(workers_[iother]);
+      workers_[i]->Start();
+    }
+  }
+
+  bool TryPush(const ThreadPoolLightTask &task) {
+    if (!workers_[curr_iworker_]->TryPush(task)) return false;
+    ++curr_iworker_;
+    if (curr_iworker_ == nworkers_) curr_iworker_ = 0;
+    return true;
+  }
+
+  void Push(const ThreadPoolLightTask &task) {
+    // Could try another curr_iworker_
+    while (!TryPush(task))
+      usleep(KALDI_CUDA_DECODER_THREAD_POOL_QUEUE_FULL_WAIT_FOR_US);
+  }
+};
+
+}  // end namespace cuda_decoder
+}  // end namespace kaldi
+
+#endif  // KALDI_CUDA_DECODER_THREAD_POOL_H_
diff --git a/src/cudadecoder/thread-pool.h b/src/cudadecoder/thread-pool.h
index 12cd27da462..8c864ddb6b9 100644
--- a/src/cudadecoder/thread-pool.h
+++ b/src/cudadecoder/thread-pool.h
@@ -1,6 +1,6 @@
 // cudadecoder/thread-pool.h
 // Source:  https://github.com/progschj/ThreadPool
-// Modified to add a priority queue 
+// Modified to add a priority queue
 // Ubtained under this license:
 /*
 Copyright (c) 2012 Jakob Progsch, Václav Zeman
@@ -25,9 +25,14 @@ freely, subject to the following restrictions:
    distribution.
 */
 
-#ifndef KALDI_CUDA_DECODER_THREAD_POOL_H_
-#define KALDI_CUDA_DECODER_THREAD_POOL_H_
+//
+// Important: This file is deprecated and will be removed in a future release
+//
 
+#ifndef KALDI_CUDA_DECODER_DEPRECATED_THREAD_POOL_H_
+#define KALDI_CUDA_DECODER_DEPRECATED_THREAD_POOL_H_
+
+#include <climits>
 #include <condition_variable>
 #include <functional>
 #include <future>
@@ -42,10 +47,14 @@ namespace kaldi {
 namespace cuda_decoder {
 
 // C++ indexes enum 0,1,2...
-enum ThreadPoolPriority  { THREAD_POOL_LOW_PRIORITY, THREAD_POOL_NORMAL_PRIORITY, THREAD_POOL_HIGH_PRIORITY };
+enum ThreadPoolPriority {
+  THREAD_POOL_LOW_PRIORITY,
+  THREAD_POOL_NORMAL_PRIORITY,
+  THREAD_POOL_HIGH_PRIORITY
+};
 
 class ThreadPool {
-public:
+ public:
   ThreadPool(size_t);
   template <class F, class... Args>
   auto enqueue(ThreadPoolPriority priority, F &&f, Args &&... args)
@@ -60,10 +69,11 @@ class ThreadPool {
   std::vector<std::thread> workers;
   // the task queue
   struct Task {
-	  std::function<void()> func;
-          // Ordered first by priority, then FIFO order
-          // tasks created first will have a higher priority_with_fifo.second
-          std::pair<ThreadPoolPriority, long long> priority_with_fifo;
+    std::function<void()> func;
+    // Ordered first by priority, then FIFO order
+    // tasks created first will have a higher
+    // priority_with_fifo.second
+    std::pair<ThreadPoolPriority, long long> priority_with_fifo;
   };
   friend bool operator<(const ThreadPool::Task &lhs,
                         const ThreadPool::Task &rhs);
@@ -91,7 +101,7 @@ inline ThreadPool::ThreadPool(size_t threads)
       for (;;) {
         Task task;
 
-	{
+        {
           std::unique_lock<std::mutex> lock(this->queue_mutex);
           this->condition.wait(
               lock, [this] { return this->stop || !this->tasks.empty(); });
@@ -99,8 +109,8 @@ inline ThreadPool::ThreadPool(size_t threads)
           if (!tasks.empty()) {
             task = std::move(this->tasks.top());
             this->tasks.pop();
+          }
         }
-	}
         task.func();
       }
     });
@@ -110,7 +120,8 @@ inline ThreadPool::ThreadPool(size_t threads)
 template <class F, class... Args>
 auto ThreadPool::enqueue(F &&f, Args &&... args)
     -> std::future<typename std::result_of<F(Args...)>::type> {
-  return enqueue(THREAD_POOL_NORMAL_PRIORITY, std::forward<F>(f), std::forward<Args>(args)...);
+  return enqueue(THREAD_POOL_NORMAL_PRIORITY, std::forward<F>(f),
+                 std::forward<Args>(args)...);
 }
 
 // add new work item to the pool
@@ -127,8 +138,7 @@ auto ThreadPool::enqueue(ThreadPoolPriority priority, F &&f, Args &&... args)
     std::unique_lock<std::mutex> lock(queue_mutex);
 
     // don't allow enqueueing after stopping the pool
-    if (stop)
-      throw std::runtime_error("enqueue on stopped ThreadPool");
+    if (stop) throw std::runtime_error("enqueue on stopped ThreadPool");
     Task task;
     task.func = [func]() { (*func)(); };
     long long task_fifo_id = task_counter--;
@@ -150,12 +160,10 @@ inline ThreadPool::~ThreadPool() {
     stop = true;
   }
   condition.notify_all();
-  for (std::thread &worker : workers)
-    worker.join();
+  for (std::thread &worker : workers) worker.join();
 }
 
 }  // end namespace cuda_decoder
 }  // end namespace kaldi
 
-
 #endif  // KALDI_CUDA_DECODER_THREAD_POOL_H_
diff --git a/src/cudadecoderbin/Makefile b/src/cudadecoderbin/Makefile
index 6a31a52ceca..9c7ec7837a7 100644
--- a/src/cudadecoderbin/Makefile
+++ b/src/cudadecoderbin/Makefile
@@ -1,26 +1,37 @@
 all:
-
+		
 include ../kaldi.mk
 
 ifeq ($(CUDA), true)
+ifneq ($(WITH_CUDADECODER), 0)
+
+# Make sure we have CUDA_ARCH from kaldi.mk,
+ifndef CUDA_ARCH
+  $(error CUDA_ARCH is undefined, run 'src/configure')
+endif
 
 LDFLAGS += $(CUDA_LDFLAGS)
 LDLIBS += $(CUDA_LDLIBS)
 
-BINFILES = batched-wav-nnet3-cuda
+BINFILES = batched-wav-nnet3-cuda batched-wav-nnet3-cuda2 batched-wav-nnet3-cuda-online
 
 OBJFILES =
 
 TESTFILES =
 
 ADDLIBS = ../cudadecoder/kaldi-cudadecoder.a  ../cudafeat/kaldi-cudafeat.a \
-../online2/kaldi-online2.a ../ivector/kaldi-ivector.a \
-../nnet3/kaldi-nnet3.a ../chain/kaldi-chain.a ../nnet2/kaldi-nnet2.a \
-../cudamatrix/kaldi-cudamatrix.a ../decoder/kaldi-decoder.a \
-../lat/kaldi-lat.a ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a \
-../feat/kaldi-feat.a ../transform/kaldi-transform.a \
-../gmm/kaldi-gmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \
-../matrix/kaldi-matrix.a ../base/kaldi-base.a
+			../online2/kaldi-online2.a ../ivector/kaldi-ivector.a \
+			../nnet3/kaldi-nnet3.a ../chain/kaldi-chain.a ../nnet2/kaldi-nnet2.a \
+			../cudamatrix/kaldi-cudamatrix.a ../decoder/kaldi-decoder.a \
+			../lat/kaldi-lat.a ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a \
+			../feat/kaldi-feat.a ../transform/kaldi-transform.a \
+			../gmm/kaldi-gmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \
+			../matrix/kaldi-matrix.a ../base/kaldi-base.a
+
+else
+all:
+		$(warning "Not building cudadecoder extension -- to build with it, configure with --with-cudadecoder[=true]")
+endif
 
 endif
 
diff --git a/src/cudadecoderbin/batched-wav-nnet3-cuda-online.cc b/src/cudadecoderbin/batched-wav-nnet3-cuda-online.cc
new file mode 100644
index 00000000000..f27eb54be6e
--- /dev/null
+++ b/src/cudadecoderbin/batched-wav-nnet3-cuda-online.cc
@@ -0,0 +1,423 @@
+// cudadecoderbin/batched-wav-nnet3-cuda-online.cc
+//
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+
+#include <cuda.h>
+#include <cuda_profiler_api.h>
+#include <nvToolsExt.h>
+#include <iomanip>
+#include <sstream>
+#include "cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.h"
+#include "cudamatrix/cu-allocator.h"
+#include "fstext/fstext-lib.h"
+#include "lat/lattice-functions.h"
+#include "nnet3/am-nnet-simple.h"
+#include "nnet3/nnet-utils.h"
+#include "util/kaldi-thread.h"
+
+using namespace kaldi;
+using namespace cuda_decoder;
+
+//
+// Binary for the online pipeline BatchedThreadedNnet3CudaOnlinePipeline
+// Can serve both as a benchmarking tool and an example on how to call
+// BatchedThreadedNnet3CudaOnlinePipeline
+//
+
+// Prints some statistics based on latencies stored in latencies
+void PrintLatencyStats(std::vector<double> &latencies) {
+  if (latencies.empty()) return;
+  double total = std::accumulate(latencies.begin(), latencies.end(), 0.);
+  double avg = total / latencies.size();
+  std::sort(latencies.begin(), latencies.end());
+
+  double nresultsf = static_cast<double>(latencies.size());
+  size_t per90i = static_cast<size_t>(std::floor(90. * nresultsf / 100.));
+  size_t per95i = static_cast<size_t>(std::floor(95. * nresultsf / 100.));
+  size_t per99i = static_cast<size_t>(std::floor(99. * nresultsf / 100.));
+
+  double lat_90 = latencies[per90i];
+  double lat_95 = latencies[per95i];
+  double lat_99 = latencies[per99i];
+
+  KALDI_LOG << "Latencies (s):\tAvg\t\t90%\t\t95%\t\t99%";
+  KALDI_LOG << std::fixed << std::setprecision(3) << "\t\t\t" << avg << "\t\t"
+            << lat_90 << "\t\t" << lat_95 << "\t\t" << lat_99;
+}
+
+// time with arbitrary reference
+double inline gettime_monotonic() {
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  double time = ts.tv_sec;
+  time += (double)(ts.tv_nsec) / 1e9;
+  return time;
+}
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace fst;
+
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+
+    const char *usage =
+        "Reads in wav file(s) and simulates online "
+        "decoding with "
+        "neural nets\n"
+        "(nnet3 setup).  Note: some configuration values "
+        "and inputs "
+        "are\n"
+        "set via config files whose filenames are passed "
+        "as "
+        "options\n"
+        "\n"
+        "Usage: batched-wav-nnet3-cuda [options] "
+        "<nnet3-in> "
+        "<fst-in> "
+        "<wav-rspecifier> <lattice-wspecifier>\n";
+
+    std::string word_syms_rxfilename;
+
+    bool write_lattice = true;
+    int num_todo = -1;
+    int niterations = 3;
+    int num_streaming_channels = 2000;
+    ParseOptions po(usage);
+    po.Register("write-lattice", &write_lattice,
+                "Output lattice to a file. Setting to "
+                "false is useful when "
+                "benchmarking");
+    po.Register("word-symbol-table", &word_syms_rxfilename,
+                "Symbol table for words [for debug output]");
+    po.Register("file-limit", &num_todo,
+                "Limits the number of files that are processed by "
+                "this driver. "
+                "After N files are processed the remaining files "
+                "are ignored. "
+                "Useful for profiling");
+    po.Register("iterations", &niterations,
+                "Number of times to decode the corpus. Output will "
+                "be written "
+                "only once.");
+    po.Register("num-parallel-streaming-channels", &num_streaming_channels,
+                "Number of channels streaming in parallel");
+
+    // Multi-threaded CPU and batched GPU decoder
+    BatchedThreadedNnet3CudaOnlinePipelineConfig batched_decoder_config;
+    CuDevice::RegisterDeviceOptions(&po);
+    RegisterCuAllocatorOptions(&po);
+    batched_decoder_config.Register(&po);
+
+    po.Read(argc, argv);
+    batched_decoder_config.num_channels = std::max(
+        batched_decoder_config.num_channels, 2 * num_streaming_channels);
+
+    if (po.NumArgs() != 4) {
+      po.PrintUsage();
+      return 1;
+    }
+
+    g_cuda_allocator.SetOptions(g_allocator_options);
+    CuDevice::Instantiate().SelectGpuId("yes");
+    CuDevice::Instantiate().AllowMultithreading();
+
+    std::string nnet3_rxfilename = po.GetArg(1), fst_rxfilename = po.GetArg(2),
+                wav_rspecifier = po.GetArg(3), clat_wspecifier = po.GetArg(4);
+    TransitionModel trans_model;
+    nnet3::AmNnetSimple am_nnet;
+
+    // read transition model and nnet
+    bool binary;
+    Input ki(nnet3_rxfilename, &binary);
+    trans_model.Read(ki.Stream(), binary);
+    am_nnet.Read(ki.Stream(), binary);
+    SetBatchnormTestMode(true, &(am_nnet.GetNnet()));
+    SetDropoutTestMode(true, &(am_nnet.GetNnet()));
+    nnet3::CollapseModel(nnet3::CollapseModelConfig(), &(am_nnet.GetNnet()));
+
+    CompactLatticeWriter clat_writer(clat_wspecifier);
+    std::mutex clat_writer_m;
+
+    fst::Fst<fst::StdArc> *decode_fst =
+        fst::ReadFstKaldiGeneric(fst_rxfilename);
+
+    BatchedThreadedNnet3CudaOnlinePipeline cuda_pipeline(
+        batched_decoder_config, *decode_fst, am_nnet, trans_model);
+
+    delete decode_fst;
+
+    fst::SymbolTable *word_syms = NULL;
+    if (word_syms_rxfilename != "") {
+      if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename)))
+        KALDI_ERR << "Could not read symbol "
+                     "table from file "
+                  << word_syms_rxfilename;
+      else {
+        //        cuda_pipeline.SetSymbolTable(word_syms);
+      }
+    }
+
+    int32 num_task_submitted = 0, num_err = 0;
+    double tot_like = 0.0;
+    int64 num_frames = 0;
+    double total_audio_not_starved = 0;
+    double total_compute_time_not_starved = 0;
+
+    int chunk_length = cuda_pipeline.GetNSampsPerChunk();
+    double chunk_seconds = cuda_pipeline.GetSecondsPerChunk();
+    double seconds_per_sample = chunk_seconds / chunk_length;
+
+    // pre-loading data
+    // we don't want to measure I/O
+    double total_audio = 0;
+    SequentialTableReader<WaveHolder> wav_reader(wav_rspecifier);
+    std::vector<std::shared_ptr<WaveData>> all_wav;
+    std::vector<std::string> all_wav_keys;
+    {
+      std::cout << "Loading eval dataset..." << std::flush;
+      for (; !wav_reader.Done(); wav_reader.Next()) {
+        std::string utt = wav_reader.Key();
+        std::shared_ptr<WaveData> wave_data = std::make_shared<WaveData>();
+        wave_data->Swap(&wav_reader.Value());
+        all_wav.push_back(wave_data);
+        all_wav_keys.push_back(utt);
+        total_audio += wave_data->Duration();
+      }
+      std::cout << "done" << std::endl;
+    }
+    total_audio *= niterations;
+
+    struct Stream {
+      std::shared_ptr<WaveData> wav;
+      BatchedThreadedNnet3CudaOnlinePipeline::CorrelationID corr_id;
+      int offset;
+      double send_next_chunk_at;
+      double *latency_ptr;
+
+      Stream(const std::shared_ptr<WaveData> &_wav,
+             BatchedThreadedNnet3CudaOnlinePipeline::CorrelationID _corr_id,
+             double *_latency_ptr)
+          : wav(_wav), corr_id(_corr_id), offset(0), latency_ptr(_latency_ptr) {
+        send_next_chunk_at = gettime_monotonic();
+      }
+
+      bool operator<(const Stream &other) {
+        return (send_next_chunk_at < other.send_next_chunk_at);
+      }
+    };
+    nvtxRangePush("Global Timer");
+    // starting timer here so we
+    // can measure throughput
+    // without allocation
+    // overheads
+    // using kaldi timer, which starts counting in the
+    // constructor
+    Timer timer;
+    double this_iteration_timer = timer.Elapsed();
+    std::vector<double> iteration_timer;
+    std::vector<std::unique_ptr<Stream>> curr_tasks, next_tasks;
+    curr_tasks.reserve(num_streaming_channels);
+    next_tasks.reserve(num_streaming_channels);
+    size_t all_wav_i = 0;
+    size_t all_wav_max = all_wav.size() * niterations;
+    std::vector<double> latencies(all_wav_max);
+    BatchedThreadedNnet3CudaOnlinePipeline::CorrelationID correlation_id_cnt =
+        0;
+    // Batch sent to online pipeline
+    std::vector<BatchedThreadedNnet3CudaOnlinePipeline::CorrelationID>
+        batch_corr_ids;
+    std::vector<bool> batch_is_first_chunk;
+    std::vector<bool> batch_is_last_chunk;
+    // Used when use_online_ivectors_
+    std::vector<SubVector<BaseFloat>> batch_wave_samples;
+
+    double batch_valid_at = gettime_monotonic();
+    bool pipeline_starved_warning_printed = false;
+    while (true) {
+      int this_iteration_total_samples = 0;
+      batch_valid_at = 0.;
+      while (curr_tasks.size() < num_streaming_channels &&
+             all_wav_i < all_wav_max) {
+        // Creating new tasks
+        uint64_t corr_id = correlation_id_cnt++;
+        size_t all_wav_i_modulo = all_wav_i % (all_wav.size());
+        double *latency_ptr = &latencies[all_wav_i];
+        std::unique_ptr<Stream> ptr(
+            new Stream(all_wav[all_wav_i_modulo], corr_id, latency_ptr));
+        curr_tasks.emplace_back(std::move(ptr));
+
+        // If no channels are available, we will wait up
+        // to INT_MAX microseconds for a channel to
+        // become available. The reason why we can in
+        // theory have no channel available is because a
+        // channel is still in used when the last chunk
+        // has been processed but the lattice is still
+        // being generated This is why we set
+        // batched_decoder_config.num_channels strictly
+        // higher than num_streaming_channels
+        // If we want to ensure that we are never using
+        // more channels than num_streaming_channels, we
+        // can call WaitForLatticeCallbacks after each
+        // DecodeBatch. That way, we know TryInitCorrID
+        // will always have a channel available right
+        // away if batched_decoder_config.num_channels
+        // >= num_streaming_channels
+        KALDI_ASSERT(cuda_pipeline.TryInitCorrID(corr_id, INT_MAX));
+        const std::string &utt = all_wav_keys[all_wav_i_modulo];
+        size_t iteration = all_wav_i / all_wav.size();
+        std::string key =
+            (iteration == 0) ? utt : (std::to_string(iteration) + "-" + utt);
+        cuda_pipeline.SetLatticeCallback(
+            corr_id, [&clat_writer, &clat_writer_m, key, write_lattice,
+                      latency_ptr](CompactLattice &clat) {
+              if (write_lattice) {
+                std::lock_guard<std::mutex> lk(clat_writer_m);
+                clat_writer.Write(key, clat);
+              }
+              double now = gettime_monotonic();
+              *latency_ptr = now - *latency_ptr;
+            });
+        ++all_wav_i;
+        ++num_task_submitted;
+      }
+      // If still empty, done
+      if (curr_tasks.empty()) break;
+
+      std::sort(curr_tasks.begin(), curr_tasks.end());
+
+      for (size_t itask = 0; itask < curr_tasks.size(); ++itask) {
+        Stream &task = *(curr_tasks[itask]);
+
+        SubVector<BaseFloat> data(task.wav->Data(), 0);
+        int32 samp_offset = task.offset;
+        int32 nsamp = data.Dim();
+        int32 samp_remaining = nsamp - samp_offset;
+        int32 num_samp =
+            chunk_length < samp_remaining ? chunk_length : samp_remaining;
+        bool is_last_chunk = (chunk_length >= samp_remaining);
+        SubVector<BaseFloat> wave_part(data, samp_offset, num_samp);
+        bool is_first_chunk = (samp_offset == 0);
+
+        task.offset += num_samp;
+        batch_valid_at = std::max(task.send_next_chunk_at, batch_valid_at);
+        this_iteration_total_samples += num_samp;
+
+        batch_corr_ids.push_back(task.corr_id);
+        batch_is_first_chunk.push_back(is_first_chunk);
+        batch_is_last_chunk.push_back(is_last_chunk);
+        batch_wave_samples.push_back(wave_part);
+
+        if (!is_last_chunk) {
+          next_tasks.push_back(std::move(curr_tasks[itask]));
+        } else {
+          *task.latency_ptr = task.send_next_chunk_at;
+        }
+
+        task.send_next_chunk_at += chunk_seconds;
+        if (batch_corr_ids.size() == batched_decoder_config.max_batch_size ||
+            (itask == (curr_tasks.size() - 1))) {
+          // Wait for batch to be valid
+          double now = gettime_monotonic();
+          double wait_for = batch_valid_at - now;
+          if (wait_for > 0) usleep(wait_for * 1e6);
+
+          cuda_pipeline.DecodeBatch(batch_corr_ids, batch_wave_samples,
+                                    batch_is_first_chunk, batch_is_last_chunk);
+          batch_corr_ids.clear();
+          batch_is_first_chunk.clear();
+          batch_is_last_chunk.clear();
+          batch_wave_samples.clear();
+        }
+      }
+      bool pipeline_starved = (curr_tasks.size() < num_streaming_channels);
+      if (pipeline_starved && !pipeline_starved_warning_printed) {
+        std::cout << "\nNote: Streaming the end of the "
+                     "last "
+                     "utterances. "
+                     "Not enough unprocessed "
+                     "utterances available to stream "
+                  << num_streaming_channels
+                  << " channels in parallel. The "
+                     "pipeline is starved. Will now "
+                     "stream partial batches while "
+                     "still limiting I/O at realtime "
+                     "speed. RTFX will drop. \n"
+                  << std::endl;
+        pipeline_starved_warning_printed = true;
+      }
+      double curr_timer = timer.Elapsed();
+      double diff = curr_timer - this_iteration_timer;
+      this_iteration_timer = curr_timer;
+      double this_iteration_total_seconds =
+          this_iteration_total_samples * seconds_per_sample;
+      if (!pipeline_starved) {
+        total_audio_not_starved += this_iteration_total_seconds;
+        total_compute_time_not_starved += diff;
+      }
+      double this_iteration_rtfx = this_iteration_total_seconds / diff;
+      if (pipeline_starved) std::cout << "STARVED: ";
+      std::cout << "Number of active streaming channels: " << std::setw(5)
+                << curr_tasks.size() << "\tInstant RTFX: " << std::setw(6)
+                << std::fixed << std::setprecision(1) << this_iteration_rtfx
+                << std::endl;
+
+      curr_tasks.swap(next_tasks);
+      next_tasks.clear();
+    }
+    cuda_pipeline.WaitForLatticeCallbacks();
+    nvtxRangePop();
+
+    KALDI_LOG << "Decoded " << num_task_submitted << " utterances, " << num_err
+              << " with errors.";
+    KALDI_LOG << "Overall likelihood per frame was " << (tot_like / num_frames)
+              << " per frame over " << num_frames << " frames.";
+
+    KALDI_LOG << "NON-STARVED:";
+    KALDI_LOG << "\tThis section only concerns the part of the "
+                 "computation "
+                 "where we had enough active utterances to simulate "
+              << num_streaming_channels << " parallel clients. ";
+    KALDI_LOG << "\tIt corresponds to the throughput an online instance "
+                 "can handle with all channels in use.";
+    KALDI_LOG << "\tTotal Compute Time: " << total_compute_time_not_starved;
+    KALDI_LOG << "\tTotal Audio Decoded: " << total_audio_not_starved;
+    KALDI_LOG << "\tRealTimeX: "
+              << total_audio_not_starved / total_compute_time_not_starved;
+
+    KALDI_LOG << "OVERALL:";
+    KALDI_LOG << "\tTotal Utterances Decoded: " << num_task_submitted;
+    KALDI_LOG << "\tTotal Audio Decoded: " << total_audio << " seconds";
+    KALDI_LOG << "\tLatency stats:";
+    PrintLatencyStats(latencies);
+
+    delete word_syms;  // will delete if non-NULL.
+
+    clat_writer.Close();
+
+    cudaDeviceSynchronize();
+
+    return 0;
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}  // main()
+
+#endif  // if HAVE_CUDA == 1
diff --git a/src/cudadecoderbin/batched-wav-nnet3-cuda.cc b/src/cudadecoderbin/batched-wav-nnet3-cuda.cc
index df6810ee2c8..46138116bd8 100644
--- a/src/cudadecoderbin/batched-wav-nnet3-cuda.cc
+++ b/src/cudadecoderbin/batched-wav-nnet3-cuda.cc
@@ -28,7 +28,6 @@
 #include "nnet3/am-nnet-simple.h"
 #include "nnet3/nnet-utils.h"
 #include "util/kaldi-thread.h"
-
 using namespace kaldi;
 using namespace cuda_decoder;
 
@@ -37,6 +36,9 @@ using namespace cuda_decoder;
 // Not using a semaphore because it is usually not necessary to wait
 #define KALDI_CUDA_DECODER_BIN_PIPELINE_FULL_SLEEP ((double)1 / 1e5)
 
+// This pipeline is deprecated and will be removed. Please switch to
+// batched-wav-nnet3-cuda2
+
 void GetDiagnosticsAndPrintOutput(const std::string &utt,
                                   const fst::SymbolTable *word_syms,
                                   const CompactLattice &clat,
@@ -60,9 +62,9 @@ void GetDiagnosticsAndPrintOutput(const std::string &utt,
   GetLinearSymbolSequence(best_path_lat, &alignment, &words, &weight);
   num_frames = alignment.size();
   likelihood = -(weight.Value1() + weight.Value2());
-  *tot_num_frames += num_frames;
-  *tot_like += likelihood;
   {
+    *tot_num_frames += num_frames;
+    *tot_like += likelihood;
     std::lock_guard<std::mutex> lk(*stdout_mutex);
     KALDI_VLOG(2) << "Likelihood per frame for utterance " << utt << " is "
                   << (likelihood / num_frames) << " over " << num_frames
@@ -85,20 +87,19 @@ void GetDiagnosticsAndPrintOutput(const std::string &utt,
 // Called when a task is complete. Will be called by different threads
 // concurrently,
 // so it must be threadsafe
-void FinishOneDecode(
-    const std::string &utt, const std::string &key,
-    const BatchedThreadedNnet3CudaPipelineConfig &batched_decoder_config,
-    const fst::SymbolTable *word_syms, const bool write_lattice,
-    BatchedThreadedNnet3CudaPipeline *cuda_pipeline, int64 *num_frames,
-    double *tot_like, CompactLatticeWriter *clat_writer,
-    std::mutex *clat_writer_mutex, std::mutex *stdout_mutex,
-    CompactLattice &clat) {
+void FinishOneDecode(const std::string &utt, const std::string &key,
+                     const fst::SymbolTable *word_syms,
+                     BatchedThreadedNnet3CudaPipeline *cuda_pipeline,
+                     int64 *num_frames, double *tot_like,
+                     CompactLatticeWriter *clat_writer,
+                     std::mutex *clat_writer_mutex, std::mutex *stdout_mutex,
+                     const bool write_lattice, CompactLattice &clat) {
   nvtxRangePushA("FinishOneDecode");
   GetDiagnosticsAndPrintOutput(utt, word_syms, clat, stdout_mutex, num_frames,
                                tot_like);
   if (write_lattice) {
     std::lock_guard<std::mutex> lk(*clat_writer_mutex);
-    clat_writer->Write(utt, clat);
+    clat_writer->Write(key, clat);
   }
 
   nvtxRangePop();
@@ -113,13 +114,18 @@ int main(int argc, char *argv[]) {
     typedef kaldi::int64 int64;
 
     const char *usage =
-        "Reads in wav file(s) and simulates online decoding with neural nets\n"
-        "(nnet3 setup), with optional iVector-based speaker adaptation and\n"
-        "optional endpointing.  Note: some configuration values and inputs "
+        "Reads in wav file(s) and simulates online decoding with "
+        "neural nets\n"
+        "(nnet3 setup), with optional iVector-based speaker "
+        "adaptation and\n"
+        "optional endpointing.  Note: some configuration values "
+        "and inputs "
         "are\n"
-        "set via config files whose filenames are passed as options\n"
+        "set via config files whose filenames are passed as "
+        "options\n"
         "\n"
-        "Usage: batched-wav-nnet3-cuda [options] <nnet3-in> <fst-in> "
+        "Usage: batched-wav-nnet3-cuda [options] <nnet3-in> "
+        "<fst-in> "
         "<wav-rspecifier> <lattice-wspecifier>\n";
 
     std::string word_syms_rxfilename;
@@ -128,10 +134,10 @@ int main(int argc, char *argv[]) {
     int num_todo = -1;
     int iterations = 1;
     ParseOptions po(usage);
-    std::mutex stdout_mutex, clat_writer_mutex;
+    std::mutex stdout_mutex;
     int pipeline_length = 4000;  // length of pipeline of outstanding requests,
-                                 // this is independent of queue lengths in
-                                 // decoder
+    // this is independent of queue lengths in
+    // decoder
 
     po.Register("write-lattice", &write_lattice,
                 "Output lattice to a file. Setting to false is useful when "
@@ -139,12 +145,13 @@ int main(int argc, char *argv[]) {
     po.Register("word-symbol-table", &word_syms_rxfilename,
                 "Symbol table for words [for debug output]");
     po.Register("file-limit", &num_todo,
-                "Limits the number of files that are processed by this driver. "
-                "After N files are processed the remaining files are ignored. "
+                "Limits the number of files that are processed by "
+                "this driver. "
+                "After N files are processed the remaining files "
+                "are ignored. "
                 "Useful for profiling");
     po.Register("iterations", &iterations,
-                "Number of times to decode the corpus. Output will be written "
-                "only once.");
+                "Number of times to decode the corpus.");
 
     // Multi-threaded CPU and batched GPU decoder
     BatchedThreadedNnet3CudaPipelineConfig batched_decoder_config;
@@ -181,7 +188,8 @@ int main(int argc, char *argv[]) {
     SetDropoutTestMode(true, &(am_nnet.GetNnet()));
     nnet3::CollapseModel(nnet3::CollapseModelConfig(), &(am_nnet.GetNnet()));
 
-    CompactLatticeWriter clat_writer(clat_wspecifier);
+    CompactLatticeWriter clat_writer;
+    std::mutex clat_write_mutex;
 
     fst::Fst<fst::StdArc> *decode_fst =
         fst::ReadFstKaldiGeneric(fst_rxfilename);
@@ -203,8 +211,9 @@ int main(int argc, char *argv[]) {
 
     nvtxRangePush("Global Timer");
 
-    int num_groups_done=0;
+    int num_groups_done = 0;
 
+    clat_writer.Open(clat_wspecifier);
     // starting timer here so we
     // can measure throughput
     // without allocation
@@ -216,9 +225,7 @@ int main(int argc, char *argv[]) {
       std::string task_group = std::to_string(iter);
       num_task_submitted = 0;
       SequentialTableReader<WaveHolder> wav_reader(wav_rspecifier);
-      if (iter > 0)
-        write_lattice =
-            false;  // write the lattices only on the first iteration
+
       for (; !wav_reader.Done(); wav_reader.Next()) {
         nvtxRangePushA("Utterance Iteration");
 
@@ -228,48 +235,60 @@ int main(int argc, char *argv[]) {
 
         std::string utt = wav_reader.Key();
         std::string key = utt;
+
         if (iter > 0) {
-          // make key unique for subsequent iterations
-          key = key + "-" + std::to_string(iter);
+          // make key unique for each iteration
+          key = std::to_string(iter) + "-" + key;
         }
+
         const WaveData &wave_data = wav_reader.Value();
 
         if (iter == 0) {
-          // calculating number of utterances per iteration
-          // calculating total audio time per iteration
+          // calculating number of utterances per
+          // iteration calculating total audio
+          // time per iteration
           total_audio += wave_data.Duration();
         }
 
-        // Creating a function alias for the callback function of that utterance
-        auto finish_one_decode_lamba = [
-            // Capturing the arguments that will change by copy
-            utt, key, write_lattice,
-            // Capturing the const/global args by reference
-            &word_syms, &batched_decoder_config, &cuda_pipeline,
-            &clat_writer_mutex, &stdout_mutex, &clat_writer, &num_frames,
-            &tot_like]
-            // The callback function receive the compact lattice as argument
-            // if determinize_lattice is true, it is a determinized lattice
-            // otherwise, it is a raw lattice converted to compact format
+        // Creating a function alias for the callback
+        // function of that utterance
+        auto finish_one_decode_lamba =
+            [
+                // Capturing the arguments that will
+                // change by copy
+                utt, key,
+                // Capturing the const/global args by
+                // reference
+                &word_syms, &cuda_pipeline, &stdout_mutex, &num_frames,
+                &clat_write_mutex, &clat_writer, &write_lattice, &tot_like]
+            // The callback function receive the compact
+            // lattice as argument if
+            // determinize_lattice is true, it is a
+            // determinized lattice otherwise, it is a
+            // raw lattice converted to compact format
             // through ConvertLattice
             (CompactLattice & clat_in) {
-              // Content of our callback function. Calling the general
-              // FinishOneDecode function with the proper arguments
+              // Content of our callback function.
+              // Calling the general
+              // FinishOneDecode function with the
+              // proper arguments
               FinishOneDecode(
-                  // Captured arguments used to specialize FinishOneDecode for
-                  // this task
-                  utt, key, batched_decoder_config, word_syms, write_lattice,
-                  &cuda_pipeline, &num_frames, &tot_like, &clat_writer,
-                  &clat_writer_mutex, &stdout_mutex,
-                  // Generated lattice that will be passed once the task is
+                  // Captured arguments used to
+                  // specialize FinishOneDecode
+                  // for this task
+                  utt, key, word_syms, &cuda_pipeline, &num_frames, &tot_like,
+                  &clat_writer, &clat_write_mutex, &stdout_mutex, write_lattice,
+                  // Generated lattice that will
+                  // be passed once the task is
                   // complete
                   clat_in);
             };
-        // Adding a new task. Once the output lattice is ready, it will call
-        // finish_one_decode_lamba
-        // Important : finish_one_decode_lamba is called in the threadpool. We
-        // need it to be threadsafe
-        // (use locks around relevant parts, like writing to I/O)
+        // Adding a new task. Once the output lattice is
+        // ready, it will call finish_one_decode_lamba
+        // Important : finish_one_decode_lamba is called
+        // in the threadpool. We need it to be
+        // threadsafe (use locks around relevant parts,
+        // like writing to I/O)
         cuda_pipeline.OpenDecodeHandle(key, wave_data, task_group,
                                        finish_one_decode_lamba);
         num_task_submitted++;
@@ -277,7 +296,7 @@ int main(int argc, char *argv[]) {
         nvtxRangePop();
         if (num_todo != -1 && num_task_submitted >= num_todo) break;
       }  // end utterance loop
-        
+
       std::string group_done;
       // Non-blocking way to check if a group is done
       // returns false if zero groups are ready
@@ -291,12 +310,14 @@ int main(int argc, char *argv[]) {
                   << " RealTimeX: " << total_audio * (iter + 1) / total_time;
         num_groups_done++;
       }
-    }    // end iterations loop
+    }  // end iterations loop
 
     // We've submitted all tasks. Now waiting for them to complete
-    // We could also have called WaitForAllTasks and CloseAllDecodeHandles
-    while (num_groups_done<iterations) {
-      // WaitForAnyGroup is blocking. It will hold until one group is ready
+    // We could also have called WaitForAllTasks and
+    // CloseAllDecodeHandles
+    while (num_groups_done < iterations) {
+      // WaitForAnyGroup is blocking. It will hold until one
+      // group is ready
       std::string group_done = cuda_pipeline.WaitForAnyGroup();
       cuda_pipeline.CloseAllDecodeHandlesForGroup(group_done);
       double total_time = timer.Elapsed();
@@ -322,14 +343,13 @@ int main(int argc, char *argv[]) {
               << " Total Audio: " << total_audio * iterations
               << " RealTimeX: " << total_audio * iterations / total_time;
 
-    delete word_syms;  // will delete if non-NULL.
-
-    clat_writer.Close();
-
     cuda_pipeline.Finalize();
     cudaDeviceSynchronize();
 
+    delete word_syms;  // will delete if non-NULL.
+
     return 0;
+
   } catch (const std::exception &e) {
     std::cerr << e.what();
     return -1;
diff --git a/src/cudadecoderbin/batched-wav-nnet3-cuda2.cc b/src/cudadecoderbin/batched-wav-nnet3-cuda2.cc
new file mode 100644
index 00000000000..83f5b6a0650
--- /dev/null
+++ b/src/cudadecoderbin/batched-wav-nnet3-cuda2.cc
@@ -0,0 +1,203 @@
+// cudadecoderbin/batched-wav-nnet3-cuda2.cc
+//
+// Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+// Hugo Braun
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <atomic>
+#if HAVE_CUDA == 1
+
+#include <cuda.h>
+#include <cuda_profiler_api.h>
+#include <nvToolsExt.h>
+#include <sstream>
+#include "cudadecoder/batched-threaded-nnet3-cuda-pipeline2.h"
+#include "cudamatrix/cu-allocator.h"
+#include "fstext/fstext-lib.h"
+#include "lat/lattice-functions.h"
+#include "nnet3/am-nnet-simple.h"
+#include "nnet3/nnet-utils.h"
+#include "util/kaldi-thread.h"
+
+using namespace kaldi;
+using namespace cuda_decoder;
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace fst;
+
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+
+    const char *usage =
+        "Reads in wav file(s) and decodes them with "
+        "neural nets\n"
+        "(nnet3 setup).  Note: some configuration values "
+        "and inputs "
+        "are\n"
+        "set via config files whose filenames are passed as "
+        "options\n"
+        "\n"
+        "Usage: batched-wav-nnet3-cuda [options] <nnet3-in> "
+        "<fst-in> "
+        "<wav-rspecifier> <lattice-wspecifier>\n";
+
+    std::string word_syms_rxfilename;
+
+    bool write_lattice = true;
+    int num_todo = -1;
+    int iterations = 1;
+    ParseOptions po(usage);
+    po.Register("write-lattice", &write_lattice,
+                "Output lattice to a file. Setting to false is useful when "
+                "benchmarking");
+    po.Register("word-symbol-table", &word_syms_rxfilename,
+                "Symbol table for words [for debug output]");
+    po.Register("file-limit", &num_todo,
+                "Limits the number of files that are processed by "
+                "this driver. "
+                "After N files are processed the remaining files "
+                "are ignored. "
+                "Useful for profiling");
+    po.Register("iterations", &iterations,
+                "Number of times to decode the corpus. Output will "
+                "be written "
+                "only once.");
+
+    // Multi-threaded CPU and batched GPU decoder
+    BatchedThreadedNnet3CudaPipeline2Config batched_decoder_config;
+    CuDevice::RegisterDeviceOptions(&po);
+    RegisterCuAllocatorOptions(&po);
+    batched_decoder_config.Register(&po);
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 4) {
+      po.PrintUsage();
+      return 1;
+    }
+
+    g_cuda_allocator.SetOptions(g_allocator_options);
+    CuDevice::Instantiate().SelectGpuId("yes");
+    CuDevice::Instantiate().AllowMultithreading();
+
+    std::string nnet3_rxfilename = po.GetArg(1), fst_rxfilename = po.GetArg(2),
+                wav_rspecifier = po.GetArg(3), clat_wspecifier = po.GetArg(4);
+    TransitionModel trans_model;
+    nnet3::AmNnetSimple am_nnet;
+
+    // read transition model and nnet
+    bool binary;
+    Input ki(nnet3_rxfilename, &binary);
+    trans_model.Read(ki.Stream(), binary);
+    am_nnet.Read(ki.Stream(), binary);
+    SetBatchnormTestMode(true, &(am_nnet.GetNnet()));
+    SetDropoutTestMode(true, &(am_nnet.GetNnet()));
+    nnet3::CollapseModel(nnet3::CollapseModelConfig(), &(am_nnet.GetNnet()));
+
+    CompactLatticeWriter clat_writer(clat_wspecifier);
+    std::mutex clat_writer_m;
+
+    fst::Fst<fst::StdArc> *decode_fst =
+        fst::ReadFstKaldiGeneric(fst_rxfilename);
+
+    BatchedThreadedNnet3CudaPipeline2 cuda_pipeline(
+        batched_decoder_config, *decode_fst, am_nnet, trans_model);
+
+    delete decode_fst;
+
+    fst::SymbolTable *word_syms = NULL;
+    if (word_syms_rxfilename != "") {
+      if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename)))
+        KALDI_ERR << "Could not read symbol table from file "
+                  << word_syms_rxfilename;
+      else {
+        //        cuda_pipeline.SetSymbolTable(word_syms);
+      }
+    }
+
+    int32 num_task_submitted = 0, num_err = 0;
+    double tot_like = 0.0;
+    int64 num_frames = 0;
+    double total_audio = 0;
+
+    nvtxRangePush("Global Timer");
+    // starting timer here so we
+    // can measure throughput
+    // without allocation
+    // overheads
+    // using kaldi timer, which starts counting in the constructor
+    Timer timer;
+    std::vector<double> iteration_timer;
+    for (int iter = 0; iter < iterations; iter++) {
+      num_task_submitted = 0;
+      SequentialTableReader<WaveHolder> wav_reader(wav_rspecifier);
+      for (; !wav_reader.Done(); wav_reader.Next()) {
+        std::string utt = wav_reader.Key();
+        std::string key = utt;
+        if (iter > 0) key = std::to_string(iter) + "-" + key;
+        std::shared_ptr<WaveData> wave_data = std::make_shared<WaveData>();
+        wave_data->Swap(&wav_reader.Value());
+        if (iter == 0) {
+          // calculating number of utterances per
+          // iteration calculating total audio
+          // time per iteration
+          total_audio += wave_data->Duration();
+        }
+
+        cuda_pipeline.DecodeWithCallback(
+            wave_data, [&clat_writer, &clat_writer_m, key,
+                        write_lattice](CompactLattice &clat) {
+              if (write_lattice) {
+                std::lock_guard<std::mutex> lk(clat_writer_m);
+                clat_writer.Write(key, clat);
+              }
+            });
+
+        num_task_submitted++;
+        if (num_todo != -1 && num_task_submitted >= num_todo) break;
+      }  // end utterance loop
+    }    // end iterations loop
+
+    cuda_pipeline.WaitForAllTasks();
+
+    // number of seconds elapsed since the creation of timer
+    double total_time = timer.Elapsed();
+    nvtxRangePop();
+
+    KALDI_LOG << "Decoded " << num_task_submitted << " utterances, " << num_err
+              << " with errors.";
+    KALDI_LOG << "Overall likelihood per frame was " << (tot_like / num_frames)
+              << " per frame over " << num_frames << " frames.";
+
+    KALDI_LOG << "Overall: "
+              << " Aggregate Total Time: " << total_time
+              << " Total Audio: " << total_audio * iterations
+              << " RealTimeX: " << total_audio * iterations / total_time;
+
+    delete word_syms;  // will delete if non-NULL.
+
+    clat_writer.Close();
+
+    cudaDeviceSynchronize();
+
+    return 0;
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}  // main()
+
+#endif  // if HAVE_CUDA == 1
diff --git a/src/cudafeat/Makefile b/src/cudafeat/Makefile
index 33aca4eedaa..77844987f02 100644
--- a/src/cudafeat/Makefile
+++ b/src/cudafeat/Makefile
@@ -1,18 +1,31 @@
-
-
 all:
-
+		
 include ../kaldi.mk
-ifeq ($(CUDA), true)
-
-TESTFILES = 
 
 ifeq ($(CUDA), true)
-  OBJFILES +=  feature-window-cuda.o feature-spectral-cuda.o feature-online-cmvn-cuda.o \
-							 online-ivector-feature-cuda-kernels.o online-ivector-feature-cuda.o \
-							 online-cuda-feature-pipeline.o
+ifneq ($(WITH_CUDADECODER), 0)
+
+# Make sure we have CUDA_ARCH from kaldi.mk,
+ifndef CUDA_ARCH
+  $(error CUDA_ARCH is undefined, run 'src/configure')
 endif
 
+TESTFILES =
+
+OBJFILES +=  feature-window-cuda.o \
+	feature-spectral-cuda.o \
+	feature-online-cmvn-cuda.o \
+	online-ivector-feature-cuda-kernels.o \
+	online-ivector-feature-cuda.o \
+	online-cuda-feature-pipeline.o \
+	feature-online-batched-spectral-cuda.o \
+	feature-online-batched-spectral-cuda-kernels.o \
+	feature-online-batched-cmvn-cuda.o \
+	feature-online-batched-cmvn-cuda-kernels.o \
+	feature-online-batched-ivector-cuda.o \
+	feature-online-batched-ivector-cuda-kernels.o  \
+	online-batched-feature-pipeline-cuda.o
+
 LIBNAME = kaldi-cudafeat
 
 ADDLIBS = ../feat/kaldi-feat.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \
@@ -23,8 +36,15 @@ LDFLAGS += $(CUDA_LDFLAGS)
 LDLIBS += $(CUDA_LDLIBS)
 
 
+# Implicit rule for kernel compilation
 %.o : %.cu
 	$(CUDATKDIR)/bin/nvcc -c -g $< -o $@ $(CUDA_INCLUDE) $(CUDA_FLAGS) $(CUDA_ARCH) -I../ -I$(OPENFSTINC)
+
+else
+all:
+		$(warning "Not building cudadecoder extension -- to build with it, configure with --with-cudadecoder[=true]")
+endif
+
 endif
 
 include ../makefiles/default_rules.mk
diff --git a/src/cudafeat/feature-online-batched-cmvn-cuda-kernels.cu b/src/cudafeat/feature-online-batched-cmvn-cuda-kernels.cu
new file mode 100644
index 00000000000..1796c1129c7
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-cmvn-cuda-kernels.cu
@@ -0,0 +1,294 @@
+// cudafeat/feature-online-batched-cmvn-cuda-kernels.cu
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <cub/cub.cuh>
+#include "cudafeat/feature-online-batched-cmvn-cuda-kernels.h"
+
+__host__ __device__ inline float2 operator-(const float2 &a, const float2 &b) {
+  float2 retval;
+  retval.x = a.x - b.x;
+  retval.y = a.y - b.y;
+  return retval;
+}
+__host__ __device__ inline float2 operator+(const float2 &a, const float2 &b) {
+  float2 retval;
+  retval.x = a.x + b.x;
+  retval.y = a.y + b.y;
+  return retval;
+}
+
+__device__ inline void atomicAdd(float2 *addr, float2 val) {
+  atomicAdd(reinterpret_cast<float *>(addr), val.x);
+  atomicAdd(reinterpret_cast<float *>(addr) + 1, val.y);
+}
+
+__device__ inline void operator+=(float2 &a, float2 &b) {
+  // overloading +=
+  a.x += b.x;
+  a.y += b.y;
+}
+
+namespace kaldi {
+// threadIdx.x = frame  (up to 1024?)
+// blockIdx.x = feature
+// blockIdx.y = batch id
+__global__ void compute_cmvn_stats_kernel(
+    int32_t feat_dim, int32_t chunk_size, int32_t stats_coarsening_factor,
+    int32_t cmn_window, const float *in_data, int32_t ldi, int32_t stridei,
+    float *stats_data, int32_t lds, const LaneDesc *lanes, int32_t num_lanes) {
+  typedef cub::BlockScan<float2, 1024> BlockScan;
+  __shared__ typename BlockScan::TempStorage temp_storage;
+
+  int32_t lane = blockIdx.y;
+  int32_t feat = blockIdx.x;  // feature for this block
+  int32_t tidx = threadIdx.x;
+
+  // width of a window of stats data
+  int32_t num_fragments = (chunk_size + cmn_window) / stats_coarsening_factor;
+
+  // function to compute window location based on frame
+  auto SIDX = [&](int frame, int feat) {
+    int row = feat;
+    int col = (frame / stats_coarsening_factor) % num_fragments;
+    return row * num_fragments + col;
+  };
+
+  LaneDesc desc = lanes[lane];
+  ChannelId channel = desc.channel;
+  int32_t num_chunk_frames = desc.num_chunk_frames;
+
+  // compute memory offsets for batch
+  float2 *sdata = reinterpret_cast<float2 *>(stats_data + channel * lds);
+
+  // batch is rows, cols is chunk_size x feat_dim, where feat_dim is
+  // padded to ldi
+  const float *idata = in_data + lane * stridei;
+
+  // starting frame of audio
+  int32_t start_frame = desc.current_frame;
+
+  float2 running_sum = {0.0f, 0.0f};
+
+  // load previous running sum if this is not the first frame
+  if (start_frame > 0) running_sum = sdata[SIDX(start_frame - 1, feat)];
+
+  // for each frame compute prefix sum
+  for (int32_t f = 0; f < num_chunk_frames; f += blockDim.x) {
+    int frame = f + tidx;
+
+    float val = 0.0f;
+    if (frame < num_chunk_frames) {
+      // uncoalesced
+      val = idata[frame * ldi + feat];
+    }
+
+    float2 sum = {val, val * val};
+    float2 psum;   // row prefix sum
+    float2 total;  // total count
+
+    BlockScan(temp_storage).InclusiveSum(sum, psum, total);
+
+    // offset by running sum
+    psum = psum + running_sum;
+
+    // increase running sum by new total
+    running_sum = running_sum + total;
+
+    // The last thread of each fragement will write their value to stats
+    bool write = (frame < num_chunk_frames && frame % stats_coarsening_factor ==
+                                                  stats_coarsening_factor - 1);
+
+    // last frame will always write
+    // this fagment may not have full stats
+    // use our frame to fill in those stats
+    if (f == num_chunk_frames - 1) {
+      // This thread will write
+      write = true;
+
+      // number of frames in my fragement with stats
+      int32_t in_frame = f % stats_coarsening_factor + 1;
+      // number of frames int my fragement without stats
+      int32_t not_in_frame = stats_coarsening_factor - in_frame;
+
+      // multiply this frame stats by the number of frames not counted
+      float2 add = make_float2(sum.x * not_in_frame, sum.y * not_in_frame);
+
+      // if the fragment is full add will be (0,0)
+      // Add in stats
+      psum += add;
+    }
+
+    if (write) {
+      // un-coalesced
+      sdata[SIDX(start_frame + frame, feat)] = psum;
+    }
+  }
+}
+
+// For each channel in batch size, compute coarsened stats in rolling
+// window
+void compute_cmvn_stats(int32_t feat_dim, int32_t chunk_size,
+                        int32_t stats_coarsening_factor, int32_t cmn_window,
+                        const float *in_data, int32_t ldi, int32_t stridei,
+                        float *stats_data, int32_t lds, const LaneDesc *lanes,
+                        int32_t num_lanes) {
+  int threads = 1024;
+  dim3 blocks(feat_dim, num_lanes);
+
+  compute_cmvn_stats_kernel<<<blocks, threads>>>(
+      feat_dim, chunk_size, stats_coarsening_factor, cmn_window, in_data, ldi,
+      stridei, stats_data, lds, lanes, num_lanes);
+};
+
+// threadIdx.x = feature (32?)
+// threadIdx.y, blockIdx.x = frame
+// blockIdx.y = batch id
+__global__ void apply_cmvn_kernel(
+    int32_t cmvn_window, bool var_norm, bool mean_norm, int32_t feat_dim,
+    int32_t chunk_size, int32_t stats_coarsening_factor,
+    const float *__restrict__ in_data, int32_t ldi, int32_t stridei,
+    const float *__restrict__ stats_data, int32_t lds,
+    const float *__restrict__ global_stats, int32_t ldg, int32_t global_frames,
+    const float *__restrict__ speaker_stats, int32_t ldss,
+    int32_t speaker_frames, float *out_data, int32_t ldo, int32_t strideo,
+    const LaneDesc *lanes, int32_t num_lanes) {
+  int32_t lane = blockIdx.y;
+  LaneDesc desc = lanes[lane];
+  ChannelId channel = desc.channel;
+
+  // compute memory offsets for batch
+  const float2 *sdata =
+      reinterpret_cast<const float2 *>(stats_data + channel * lds);
+  // batch is rows, cols is chunk_size x feat_dim, where feat_dim is
+  // padded to ldi
+  const float *idata = in_data + lane * stridei;
+  float *odata = out_data + lane * strideo;
+
+  // width of a window of stats data
+  int32_t num_fragments = (chunk_size + cmvn_window) / stats_coarsening_factor;
+
+  // function to compute window location based on frame
+  auto SIDX = [&](int frame, int feat) {
+    int row = feat;
+    int col = (frame / stats_coarsening_factor) % num_fragments;
+    return row * num_fragments + col;
+  };
+
+  int32_t current_frame = desc.current_frame;
+  int32_t num_chunk_frames = desc.num_chunk_frames;
+  for (int f = blockIdx.x * blockDim.y + threadIdx.y; f < num_chunk_frames;
+       f += blockDim.y * gridDim.x) {
+    int frame = current_frame + f;
+
+    for (int feat = threadIdx.x; feat < feat_dim; feat += blockDim.x) {
+      // Compute stats for frame
+      float2 frame_stats = sdata[SIDX(frame, feat)];
+      // load value
+      float val = idata[f * ldi + feat];
+
+      // compute window length
+      float window_length = min(frame + 1, cmvn_window);
+
+      // possibly remove stats -cmvn window away
+      if (frame >= cmvn_window) {
+        float2 old_frame_stats = sdata[SIDX(frame - cmvn_window, feat)];
+        frame_stats = frame_stats - old_frame_stats;
+      }
+
+      // Smooth stats by speaker frames if necessary
+      float smooth_frames = cmvn_window - window_length;
+      if (smooth_frames > 0 && speaker_frames > 0) {
+        float count_from_speaker = min(smooth_frames, (float)speaker_frames);
+        float speaker_count = speaker_stats[feat_dim];
+
+        if (count_from_speaker > 0.0) {
+          float alpha = count_from_speaker / speaker_count;
+
+          frame_stats.x += alpha * speaker_stats[feat];  // update mean
+          frame_stats.y +=
+              alpha * speaker_stats[ldss + feat];  // update variance
+          window_length += alpha * speaker_count;  // update window length
+
+          // recompute smooth frames now that we have speaker stats
+          smooth_frames = cmvn_window - window_length;
+        }
+      }  // end speaker smooth
+
+      // Smooth stats by global frames if necessary
+      if (smooth_frames > 0 && global_frames > 0) {
+        float count_from_global = min(smooth_frames, (float)global_frames);
+        float global_count = global_stats[feat_dim];
+
+        if (count_from_global > 0.0) {
+          float alpha = count_from_global / global_count;
+
+          frame_stats.x += alpha * global_stats[feat];        // update mean
+          frame_stats.y += alpha * global_stats[ldg + feat];  // update variance
+          window_length += alpha * global_count;  // update window length
+        }
+      }  // end global smooth
+
+      float mean = frame_stats.x / window_length;
+      float var = frame_stats.y / window_length - mean * mean;
+
+      float floor = 1e-20;
+      if (var < floor) {
+        // avoid dividing by zero
+        var = floor;
+      }
+      if (!var_norm) {
+        // skip variance normalization
+        var = 1.0f;
+      }
+      if (!mean_norm) {
+        // skip mean normalization
+        mean = 0.0f;
+      }
+
+      // shift by mean and scale by variance
+      float oval = (val - mean) / sqrtf(var);
+
+      odata[f * ldo + feat] = oval;
+    }  // end feat loop
+  }    // end frame loop
+}
+
+void apply_cmvn(int32_t cmvn_window, bool var_norm, bool mean_norm,
+                int32_t feat_dim, int32_t chunk_size,
+                int32_t stats_coarsening_factor, const float *in_data,
+                int32_t ldi, int32_t stridei, const float *stats_data,
+                int32_t lds, const float *global_stats, int32_t ldg,
+                int32_t global_frames, const float *speaker_stats, int32_t ldss,
+                int32_t speaker_frames, float *out_data, int32_t ldo,
+                int32_t strideo, const LaneDesc *lanes, int32_t num_lanes) {
+  // round threads to neared warp
+  int threadsx = 64;
+  int threadsy = 512 / threadsx;
+  dim3 threads(threadsx, threadsy);
+
+  int blocksx = (chunk_size + threadsy - 1) / threadsy;
+  int blocksy = num_lanes;
+  dim3 blocks(blocksx, blocksy);
+
+  apply_cmvn_kernel<<<blocks, threads>>>(
+      cmvn_window, var_norm, mean_norm, feat_dim, chunk_size,
+      stats_coarsening_factor, in_data, ldi, stridei, stats_data, lds,
+      global_stats, ldg, global_frames, speaker_stats, ldss, speaker_frames,
+      out_data, ldo, strideo, lanes, num_lanes);
+}
+
+}  // namespace kaldi
diff --git a/src/cudafeat/feature-online-batched-cmvn-cuda-kernels.h b/src/cudafeat/feature-online-batched-cmvn-cuda-kernels.h
new file mode 100644
index 00000000000..bfa82afb5ce
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-cmvn-cuda-kernels.h
@@ -0,0 +1,44 @@
+// cudafeat/feature-online-batched-cmvn-cuda-kernels.h
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_CUDAFEAT_FEATURE_ONLINE_BATCHED_CMVN_CUDA_KERNELS_H_
+#define KALDI_CUDAFEAT_FEATURE_ONLINE_BATCHED_CMVN_CUDA_KERNELS_H_
+
+#include "cudafeat/lane-desc.h"
+
+namespace kaldi {
+
+// ld{i,o} = size of inner dimension of matrix alloction
+// stride{i,o} = stride between consecutive batch matrices
+
+void compute_cmvn_stats(int32_t feat_dim, int32_t chunk_size,
+                        int32_t stats_coarsening_factor, int32_t cmn_window,
+                        const float *in_data, int32_t ldi, int32_t stridei,
+                        float *stats_data, int32_t lds, const LaneDesc *lanes,
+                        int32_t num_lanes);
+
+void apply_cmvn(int32_t cmvn_window, bool var_norm, bool mean_norm,
+                int32_t feat_dim, int32_t chunk_size,
+                int32_t stats_coarsening_factor, const float *in_data,
+                int32_t ldi, int32_t stridei, const float *stats_data,
+                int32_t lds, const float *global_stats, int32_t ldg,
+                int32_t global_frames, const float *speaker_stats, int32_t ldss,
+                int32_t speaker_frames, float *out_data, int32_t ldo,
+                int32_t strideo, const LaneDesc *lanes, int32_t num_lanes);
+}  // namespace kaldi
+
+#endif
diff --git a/src/cudafeat/feature-online-batched-cmvn-cuda.cc b/src/cudafeat/feature-online-batched-cmvn-cuda.cc
new file mode 100644
index 00000000000..e317ba9fcee
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-cmvn-cuda.cc
@@ -0,0 +1,84 @@
+// cudafeat/feature-online-batched-cmvn-cuda.cc
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cudafeat/feature-online-batched-cmvn-cuda.h"
+#include "cudafeat/feature-online-batched-cmvn-cuda-kernels.h"
+
+namespace kaldi {
+
+CudaOnlineBatchedCmvn::CudaOnlineBatchedCmvn(
+    const OnlineCmvnOptions &opts, const CudaOnlineCmvnState &cmvn_state,
+    int32_t feat_dim, int32_t chunk_size, int32_t num_channels,
+    int32_t stats_coarsening_factor)
+    : opts_(opts),
+      cmvn_state_(cmvn_state),
+      feat_dim_(feat_dim),
+      chunk_size_(chunk_size),
+      num_channels_(num_channels),
+      stats_coarsening_factor_(stats_coarsening_factor) {
+  // This constraint could probably be removed by estimating partial frames
+  KALDI_ASSERT(opts_.cmn_window % stats_coarsening_factor_ == 0);
+  KALDI_ASSERT(chunk_size % stats_coarsening_factor_ == 0);
+
+  // Number of fragements we need to keep around for each chunk
+  num_fragments_ = (chunk_size + opts_.cmn_window) / stats_coarsening_factor_;
+
+  stats_fragments_.Resize(num_channels_, feat_dim * num_fragments_ * 2);
+}
+
+CudaOnlineBatchedCmvn::~CudaOnlineBatchedCmvn() {}
+
+// Computes a chunk of features for each channel included in channels
+void CudaOnlineBatchedCmvn::ComputeFeaturesBatched(
+    int32_t num_lanes, const LaneDesc *lanes,
+    const CuMatrixBase<BaseFloat> &feats_in, CuMatrix<BaseFloat> *feats_out) {
+  if (num_lanes == 0) return;
+
+  // Step 1:
+  // Compute windows sum/sum2 prefix along columns of feets
+  // For audio chunk compute sum, sum2 and fill in stats
+  // need to coarsen data by coarsening factor,
+  // 1 out of coarsening factor threads actually write prefixs out
+  // need to handle rolling window (modular indexing)
+  // if partial frame of audio use mixture of global/current stats to fill in
+  compute_cmvn_stats(feats_in.NumCols(), chunk_size_, stats_coarsening_factor_,
+                     opts_.cmn_window, feats_in.Data(), feats_in.Stride(),
+                     feats_in.Stride() * chunk_size_, stats_fragments_.Data(),
+                     stats_fragments_.Stride(), lanes, num_lanes);
+
+  // Step 2:
+  // Apply CMVN
+  const CuMatrix<float> &gstats = cmvn_state_.global_cmvn_stats;
+  const CuMatrix<float> &sstats = cmvn_state_.speaker_cmvn_stats;
+
+  int global_frames = opts_.global_frames;
+  int speaker_frames = opts_.speaker_frames;
+
+  if (gstats.NumRows() == 0) global_frames = 0;
+  if (sstats.NumRows() == 0) speaker_frames = 0;
+
+  apply_cmvn(opts_.cmn_window, opts_.normalize_variance, opts_.normalize_mean,
+             feats_in.NumCols(), chunk_size_, stats_coarsening_factor_,
+             feats_in.Data(), feats_in.Stride(),
+             feats_in.Stride() * chunk_size_, stats_fragments_.Data(),
+             stats_fragments_.Stride(), gstats.Data(), gstats.Stride(),
+             global_frames, sstats.Data(), sstats.Stride(), speaker_frames,
+             feats_out->Data(), feats_out->Stride(),
+             feats_out->Stride() * chunk_size_, lanes, num_lanes);
+}
+
+}  // namespace kaldi
diff --git a/src/cudafeat/feature-online-batched-cmvn-cuda.h b/src/cudafeat/feature-online-batched-cmvn-cuda.h
new file mode 100644
index 00000000000..f13088ec7f9
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-cmvn-cuda.h
@@ -0,0 +1,68 @@
+// cudafeat/feature-online-batched-cmvn-cuda.h
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_CUDAFEAT_FEATURE_ONLINE_BATCHED_CMVN_CUDA_H_
+#define KALDI_CUDAFEAT_FEATURE_ONLINE_BATCHED_CMVN_CUDA_H_
+
+#include "cudafeat/feature-online-cmvn-cuda.h"
+#include "cudafeat/lane-desc.h"
+#include "cudamatrix/cu-matrix.h"
+#include "feat/online-feature.h"
+
+namespace kaldi {
+
+class CudaOnlineBatchedCmvn {
+ public:
+  CudaOnlineBatchedCmvn(const OnlineCmvnOptions &opts,
+                        const CudaOnlineCmvnState &cmvn_state, int32_t feat_dim,
+                        int32_t chunk_size, int32_t num_channels,
+                        int32_t stats_coarsening_factor);
+
+  ~CudaOnlineBatchedCmvn();
+
+  // Computes a chunk of features for each channel included in lanes
+  void ComputeFeaturesBatched(int32_t num_lanes, const LaneDesc *lanes,
+                              const CuMatrixBase<BaseFloat> &feats_in,
+                              CuMatrix<BaseFloat> *feats_out);
+
+ private:
+  const OnlineCmvnOptions &opts_;
+  const CudaOnlineCmvnState cmvn_state_;
+
+  int32_t feat_dim_;
+  int32_t chunk_size_;
+  int32_t num_channels_;
+
+  // The number of frames for each fragment of stats.
+  // Larger = faster, less memory, but less accurate
+  // Smaller = slower, more memory, but more accurate,
+  // 1 is equivalent to the non-batched version
+  int32_t stats_coarsening_factor_;
+  int32_t num_fragments_;  // window_size / stats_coarsening_factor_
+
+  // This matrix stores prefix sum audio statistics in a rolling
+  // buffer.  The stats are coarsened by stats_coarsening_factor_.
+  // Coarsening reduces memory usage at a potential cost in
+  // accuracy.  Matrix stores both sum and sum2 as float2 but
+  // the matrix type is float as CuMatrix does not support float2.
+  // val.x = sum and val.y = sum^2
+  // Rows = channels, Cols = feat_dim * chunk_size/coarsening factor * 2
+  CuMatrix<float> stats_fragments_;
+};
+
+}  // namespace kaldi
+
+#endif
diff --git a/src/cudafeat/feature-online-batched-ivector-cuda-kernels.cu b/src/cudafeat/feature-online-batched-ivector-cuda-kernels.cu
new file mode 100644
index 00000000000..0b57d6a32ea
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-ivector-cuda-kernels.cu
@@ -0,0 +1,636 @@
+// cudafeat/feature-online-batched-ivector-cuda-kernels.cu
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+#include <cub/cub.cuh>
+#include "cudafeat/feature-online-batched-ivector-cuda-kernels.h"
+#include "cudamatrix/cu-common.h"
+namespace kaldi {
+
+// computes pointwise square of each matrix
+__global__ void square_batched_matrix_kernel(
+    int32_t chunk_frames, int32_t num_cols, const float *feats, int32_t ldf,
+    int32_t stridef, float *feats_sq, int32_t lds, int32_t strides,
+    const LaneDesc *lanes, int32_t num_lanes) {
+  int32_t lane = blockIdx.z;
+
+  feats = feats + lane * stridef;
+  feats_sq = feats_sq + lane * strides;
+
+  for (int i = blockIdx.y * blockDim.y + threadIdx.y; i < chunk_frames;
+       i += blockDim.y * gridDim.y) {
+    for (int j = blockIdx.x * blockDim.x + threadIdx.x; j < num_cols;
+         j += blockDim.x * gridDim.x) {
+      float f = feats[i * ldf + j];
+      feats_sq[i * lds + j] = f * f;
+    }
+  }
+}
+
+void square_batched_matrix(int32_t chunk_frames, int32_t num_cols,
+                           const float *feats, int32_t ldf, int32_t stridef,
+                           float *feats_sq, int32_t lds, int32_t strides,
+                           const LaneDesc *lanes, int32_t num_lanes) {
+  dim3 threads(32, 32);
+  dim3 blocks((num_cols + threads.x - 1) / threads.x,
+              (chunk_frames + threads.y - 1) / threads.y, num_lanes);
+
+  square_batched_matrix_kernel<<<blocks, threads>>>(
+      chunk_frames, num_cols, feats, ldf, stridef, feats_sq, lds, strides,
+      lanes, num_lanes);
+  CU_SAFE_CALL(cudaGetLastError());
+}
+
+// after computing posteriors some rows are invalid because they were created
+// with rows with undefined data.  This kernel zeros those rows out so that
+// they will not contribue to stats.
+__global__ void zero_invalid_posteriors_kernel(
+    int32_t chunk_size, int32_t num_gauss, float *posteriors, int32_t ldp,
+    int32_t stridep, int32_t right, const LaneDesc *lanes, int32_t num_lanes) {
+  int32_t lane = blockIdx.z;
+
+  LaneDesc desc = lanes[lane];
+  int32_t num_chunk_frames = desc.num_chunk_frames;
+  int32_t current_frame = desc.current_frame;
+  bool last = desc.last;
+
+  // last valid frame for reading
+  int32_t num_computed_rows = current_frame + num_chunk_frames;
+
+  // if not the last frame remove right context
+  if (!last) {
+    num_computed_rows -= right;
+  }
+
+  // offset by lane
+  posteriors = posteriors + lane * stridep;
+
+  for (int r = blockIdx.y * blockDim.y + threadIdx.y; r < chunk_size;
+       r += blockDim.y * gridDim.y) {
+    int global_row = current_frame + r - right;
+    if (global_row < 0 || global_row >= num_computed_rows) {
+      // zero this row out
+      for (int c = blockIdx.x * blockDim.x + threadIdx.x; c < num_gauss;
+           c += blockDim.x * gridDim.x) {
+        posteriors[r * ldp + c] = 0.0f;
+      }
+    }
+  }
+}
+
+void zero_invalid_posteriors(int32_t num_chunk_frames, int32_t num_gauss,
+                             float *posteriors, int32_t ldp, int32_t stridep,
+                             int32_t right, const LaneDesc *lanes,
+                             int32_t num_lanes) {
+  dim3 threads(32, 32);
+  dim3 blocks((num_gauss + 31) / 32, (num_chunk_frames + 31) / 32, num_lanes);
+
+  zero_invalid_posteriors_kernel<<<blocks, threads>>>(
+      num_chunk_frames, num_gauss, posteriors, ldp, stridep, right, lanes,
+      num_lanes);
+}
+
+// Meant to be called with blockDim= 32x32
+// takes features in feat and writes them into sfeats while applying
+// the splicing algorithm for the left and right context.
+// input features that are out of range are clamped.
+__global__ void splice_features_batched_kernel(
+    int32_t chunk_size, int32_t feat_dim, int32_t left, int32_t right,
+    const float *__restrict__ feats_in, int32_t ldi, int32_t stridei,
+    const float *__restrict__ feats_stash, int32_t ldst, int32_t stridest,
+    float *__restrict__ feats_out, int32_t ldo, int32_t strideo,
+    const LaneDesc *lanes, int32_t num_lanes) {
+  int32_t lane = blockIdx.y;
+  // output frame index
+  int32_t oframe = blockIdx.x;
+  int32_t tid = threadIdx.x;
+
+  LaneDesc desc = lanes[lane];
+  int32_t num_chunk_frames = desc.num_chunk_frames;
+  int32_t channel = desc.channel;
+  int32_t current_frame = desc.current_frame;
+  bool last = desc.last;
+
+  // offset by lane
+  feats_in = feats_in + lane * stridei;
+  feats_out = feats_out + lane * strideo;
+
+  // offset by channel
+  feats_stash = feats_stash + channel * stridest;
+
+  // offset feature output to process oframe
+  feats_out = feats_out + ldo * oframe;
+
+  // the size of the stash
+  int32_t ssize = left + right;
+  // the size of the window
+  int32_t size = ssize + 1;
+
+  // number of valid frame for reading
+  int32_t num_valid_frames = current_frame + num_chunk_frames;
+
+  // number of valid frames for writing
+  int32_t num_computed_frames = num_valid_frames;
+
+  // if not the last frame remove right context
+  if (!last) {
+    num_computed_frames -= right;
+  }
+
+  // subtract right context from logical frame to delay output
+  int32_t local_frame = oframe - right;
+  int32_t global_frame = current_frame + local_frame;
+
+  // these frames are set to zeros
+  if (global_frame < 0 || global_frame >= num_computed_frames) {
+    for (int i = 0; i < size; i++) {
+      for (int c = tid; c < feat_dim; c += blockDim.x) {
+        feats_out[i * feat_dim + c] = 0.0f;
+      }
+    }
+    return;
+  }
+
+  for (int i = -left; i <= right; i++) {
+    int32_t g_in = global_frame + i;  // global frame index
+    int32_t l_in = local_frame + i;   // local frame index
+
+    // if global row is below zero clamp local to zero
+    if (g_in < 0) l_in = 0;
+
+    // if global row is larger than the number of valid frames
+    if (g_in >= num_valid_frames) {
+      // should only happen on last chunk
+      assert(last);
+      // clamp input
+      l_in = num_chunk_frames - 1;
+    }
+
+    // set default input location
+    const float *feats = feats_in;
+    int32_t ld = ldi;
+
+    // if l < 0 then feats come from the stash
+    if (l_in < 0) {
+      // input is from stash
+      feats = feats_stash;
+      ld = ldst;
+      l_in += ssize;  // offset by stash size
+    }
+
+    // for each column of input in parallel
+    for (int c = tid; c < feat_dim; c += blockDim.x) {
+      // read feature from input row offset by column
+      float val = feats[l_in * ld + c];
+
+      // write feature to output offset by splice index and column
+      feats_out[(i + left) * feat_dim + c] = val;
+    }
+  }
+}
+
+void splice_features_batched(int32_t num_chunk_frames, int32_t feat_dim,
+                             int32_t left, int32_t right, const float *feats,
+                             int32_t ldf, int32_t stridef,
+                             const float *stashed_feats, int32_t ldst,
+                             int32_t stridest, float *spliced_feats,
+                             int32_t lds, int32_t strides,
+                             const LaneDesc *lanes, int32_t num_lanes) {
+  int threads = (feat_dim + 31) / 32 * 32;  // round up to the nearest warp size
+  if (threads > 1024) threads = 1024;       // Max block size is 1024 threads
+
+  dim3 blocks(num_chunk_frames, num_lanes);
+
+  splice_features_batched_kernel<<<blocks, threads>>>(
+      num_chunk_frames, feat_dim, left, right, feats, ldf, stridef,
+      stashed_feats, ldst, stridest, spliced_feats, lds, strides, lanes,
+      num_lanes);
+
+  CU_SAFE_CALL(cudaGetLastError());
+}
+
+__global__ void shift_feats_kernel(int32_t chunk_size, const float *feats,
+                                   int32_t feat_dim, int32_t ldf,
+                                   int32_t stridef, float *stash, int32_t ssize,
+                                   int32_t lds, int32_t strides,
+                                   const LaneDesc *lanes, int32_t num_lanes) {
+  int32_t lane = blockIdx.x;
+  int32_t frame = threadIdx.y;
+  int32_t tid = threadIdx.x;
+
+  LaneDesc desc = lanes[lane];
+  int32_t num_chunk_frames = desc.num_chunk_frames;
+  int32_t channel = desc.channel;
+
+  // offset inputs/outputs
+  feats = feats + lane * stridef;
+  stash = stash + channel * strides;
+
+  // shift stash by nun_chunk_frames
+  if (num_chunk_frames < ssize) {
+    // shift stash by num_chunk_frames
+    int32_t dst_frame = frame;
+    int32_t src_frame = frame + num_chunk_frames;
+
+    // loop over columns of output in parallel but keep
+    // CTA converged for syncthreads
+    for (int i = 0; i < feat_dim; i += blockDim.x) {
+      int c = i + tid;
+
+      float val;
+
+      if (src_frame < ssize) {
+        // read stash values
+        val = stash[src_frame * lds + c];
+      }
+
+      // wait for all reads to complete
+      __syncthreads();
+
+      if (src_frame < ssize) {
+        // write stash values
+        stash[dst_frame * lds + c] = val;
+      }
+    }
+  }
+}
+
+__global__ void stash_feats_kernel(int32_t chunk_size, const float *feats,
+                                   int32_t feat_dim, int32_t ldf,
+                                   int32_t stridef, float *stash, int32_t ssize,
+                                   int32_t lds, int32_t strides,
+                                   const LaneDesc *lanes, int32_t num_lanes) {
+  int32_t lane = blockIdx.y;
+  int32_t frame = blockIdx.x;
+  int32_t tid = threadIdx.x;
+
+  LaneDesc desc = lanes[lane];
+  int32_t num_chunk_frames = desc.num_chunk_frames;
+  int32_t channel = desc.channel;
+
+  if (frame >= num_chunk_frames) return;
+
+  // offset inputs/outputs
+  feats = feats + lane * stridef;
+  stash = stash + channel * strides;
+
+  // r is the input frame to store
+  int32_t r = num_chunk_frames - ssize + frame;
+  if (r >= 0 && r < num_chunk_frames) {
+    // copy feats to stash
+    // for each column of input in parallel
+    for (int c = tid; c < feat_dim; c += blockDim.x) {
+      stash[frame * lds + c] = feats[r * ldf + c];
+    }
+  }
+}
+void stash_feats(int32_t chunk_size, const float *feats, int32_t feat_dim,
+                 int32_t ldf, int32_t stridef, float *stash, int32_t stash_size,
+                 int32_t lds, int32_t strides, const LaneDesc *lanes,
+                 int32_t num_lanes) {
+  {
+    // First we need to shift feats to handle the case where num_chunk_frames
+    // is less than stash size
+
+    KALDI_ASSERT(stash_size <= 32);
+    // This only works if stash size is <= 32 as we rely on __syncthreads()
+    // to avoid read/write hazards when reading/writing in-place
+    dim3 threads(32, 32);
+    dim3 blocks(num_lanes);
+
+    shift_feats_kernel<<<blocks, threads>>>(chunk_size, feats, feat_dim, ldf,
+                                            stridef, stash, stash_size, lds,
+                                            strides, lanes, num_lanes);
+  }
+
+  {
+    int threads =
+        (feat_dim + 31) / 32 * 32;       // round up to the nearest warp size
+    if (threads > 1024) threads = 1024;  // Max block size is 1024 threads
+    dim3 blocks(stash_size, num_lanes);
+
+    // Then we need to copy feats from source into stash
+    stash_feats_kernel<<<blocks, threads>>>(chunk_size, feats, feat_dim, ldf,
+                                            stridef, stash, stash_size, lds,
+                                            strides, lanes, num_lanes);
+  }
+}
+
+// This kernel updates the diagonal of the quadratic terms and
+// element zero of the linear term. Code is meant to match
+// ivector_extractor.cc:
+//   double old_num_frames = num_frames_,
+//          new_num_frames = num_frames_ + tot_weight;
+//   double old_prior_scale = std::max(old_num_frames, max_count_) / max_count_,
+//          new_prior_scale = std::max(new_num_frames, max_count_) / max_count_;
+//   double prior_scale_change = new_prior_scale - old_prior_scale;
+//   if (prior_scale_change != 0.0) {
+//     linear_term_(0) += prior_offset_ * prior_scale_change;
+//     quadratic_term_.AddToDiag(prior_scale_change);
+//   }
+// Extra 1.0f on prior_scale_change is to match ivector_extractor.cc:
+//  linear_term_(0) += prior_offset;
+//  quadratic_term_.AddToDiag(1.0);
+__global__ void batched_update_linear_and_quadratic_terms_kernel(
+    int32_t ivector_dim, float prior_offset, float posterior_scale,
+    int32_t max_count, float *quadratic, int32_t ldq, int32_t strideq,
+    float *linear, int32_t stridel, const LaneDesc *lanes, int32_t num_lanes) {
+  int lane = blockIdx.x;
+  LaneDesc desc = lanes[lane];
+
+  // offset arrays
+  linear = linear + lane * stridel;
+  quadratic = quadratic + lane * strideq;
+
+  // This is always zero because linear and quadratic terms are not
+  // being carried forward.  Thus we don't need to remove old prior
+  // scale.  Keeping the code below so that it logically matches
+  // the CPU code in case someone is looking at this in the future.
+  float old_num_frames = 0;
+  // float old_num_frames = desc.current_frame;
+  float new_num_frames = desc.current_frame + desc.num_chunk_frames;
+
+  // in CPU code the frame counts are scaled by posterior scale
+  new_num_frames *= posterior_scale;
+  old_num_frames *= posterior_scale;
+
+  float prior_scale_change = 1.0f;
+
+  if (max_count != 0.0f) {
+    float old_prior_scale = max(old_num_frames, (float)max_count) / max_count;
+    float new_prior_scale = max(new_num_frames, (float)max_count) / max_count;
+    prior_scale_change += new_prior_scale - old_prior_scale;
+  }
+
+  for (int32_t i = threadIdx.x; i < ivector_dim; i += blockDim.x) {
+    int32_t diag_idx = i * ldq + i;
+    quadratic[diag_idx] += prior_scale_change;
+  }
+
+  if (threadIdx.x == 0) {
+    linear[0] += prior_offset * prior_scale_change;
+  }
+}
+
+void batched_update_linear_and_quadratic_terms(
+    int32_t ivector_dim, float prior_offset, float posterior_scale,
+    int32_t max_count, float *quadratic, int32_t ldq, int32_t strideq,
+    float *linear, int32_t stridel, const LaneDesc *lanes, int32_t num_lanes) {
+  // Only using 1 CTA per lane here  for now as the updates are tiny and this
+  // lets us use syncthreads as a global barrier across the lane
+  batched_update_linear_and_quadratic_terms_kernel<<<num_lanes, 1024>>>(
+      ivector_dim, prior_offset, posterior_scale, max_count, quadratic, ldq,
+      strideq, linear, stridel, lanes, num_lanes);
+
+  CU_SAFE_CALL(cudaGetLastError());
+}
+
+// each CTA performs the multiplications for a specific gauss point
+// sigma is cached in shared memory and used across all lanes this
+// avoids repeated loads of this term saving memory bandwidth
+__global__ void batched_compute_linear_term_kernel(
+    int32_t num_gauss, int32_t feat_dim, int32_t ivector_dim,
+    const float *__restrict__ sigma, int32_t lds, const float *__restrict__ X,
+    int32_t ldx, int32_t stridex, float *linear, int32_t stridel,
+    const LaneDesc *lanes, int32_t num_lanes) {
+  int rows = feat_dim;
+  int cols = ivector_dim;
+  int gid = blockIdx.x;  // gauss point
+
+  // dnyamic shared memory to cache A
+  extern __shared__ float s_A[];
+
+  // Offset sigma to gauss point matrix
+  const float *__restrict__ A_in = sigma + gid * rows * lds;
+
+  // cache A into shared memory
+  for (int r = threadIdx.y; r < rows; r += blockDim.y) {
+    for (int c = threadIdx.x; c < cols; c += blockDim.x) {
+      s_A[r * ivector_dim + c] = A_in[r * lds + c];
+    }
+  }
+  // wait for s_A to be loaded
+  __syncthreads();
+
+  // for each lane in parallel across y CTA dimension
+  for (int lane = threadIdx.y; lane < num_lanes; lane += blockDim.y) {
+    // Offset to input vector to starting column for lane
+    const float *__restrict__ X_in = X + lane * stridex + gid * ldx;
+    // Offset output by lane
+    float *C = linear + lane * stridel;
+    // for each column in parallel across x cta dimension
+    for (int c = threadIdx.x; c < cols; c += blockDim.x) {
+      float sum = 0.0f;
+      // operate on rows in serial
+      for (int r = 0; r < rows; r++) {
+        // Read A from shared memory, X is a broadcast and should hit in cache
+        float val = s_A[r * ivector_dim + c] * X_in[r];
+        sum += val;
+      }
+      atomicAdd(&C[c], sum);
+    }
+  }
+}
+
+void batched_compute_linear_term(int32_t num_gauss, int32_t feat_dim,
+                                 int32_t ivector_dim, float *sigma, int32_t lds,
+                                 float *X, int32_t ldx, int32_t stridex,
+                                 float *linear, int32_t stridel,
+                                 const LaneDesc *lanes, int32_t num_lanes) {
+  // 1 CTA per gauss point
+  dim3 blocks(num_gauss);
+
+  // 128 threads in ivector dimension, 8 threads in num_lanes dimension
+  dim3 threads(128, 8);
+
+  // dynamic shared memory size for caching A
+  size_t shared_size = (ivector_dim * feat_dim) * sizeof(BaseFloat);
+
+  batched_compute_linear_term_kernel<<<blocks, threads, shared_size>>>(
+      num_gauss, feat_dim, ivector_dim, sigma, lds, X, ldx, stridex, linear,
+      stridel, lanes, num_lanes);
+
+  CU_SAFE_CALL(cudaGetLastError());
+}
+
+__global__ void batched_convert_sp_to_dense_kernel(int32_t n, float *A_sp,
+                                                   int32_t strides, float *A,
+                                                   int32_t lda, int32_t stridea,
+                                                   const LaneDesc *lanes,
+                                                   int32_t num_lanes) {
+  int32_t lane = blockIdx.z;
+  // Offset input and output array by lane
+  A_sp = A_sp + lane * strides;
+  A = A + lane * stridea;
+
+  // For each output
+  for (int i = blockIdx.y * blockDim.y + threadIdx.y; i < n;
+       i += blockDim.y * gridDim.y) {
+    for (int j = blockIdx.x * blockDim.x + threadIdx.x; j < n;
+         j += blockDim.x * gridDim.x) {
+      int row, col;
+      if (i <= j) {
+        col = i;
+        row = j;
+      } else {
+        row = i;
+        col = j;
+      }
+
+      int32_t dst_idx = i * lda + j;
+      int32_t src_idx = (row * (row + 1) / 2) + col;
+
+      A[dst_idx] = A_sp[src_idx];
+    }
+  }
+}
+
+void batched_convert_sp_to_dense(int n, float *A_sp, int32_t strides, float *A,
+                                 int32_t lda, int32_t stridea,
+                                 const LaneDesc *lanes, int32_t num_lanes) {
+  dim3 threads(32, 32);
+  int block = (n + 31) / 32;  // blocks in x and y dimensions
+  dim3 blocks(block, block, num_lanes);
+
+  batched_convert_sp_to_dense_kernel<<<blocks, threads>>>(
+      n, A_sp, strides, A, lda, stridea, lanes, num_lanes);
+}
+
+__global__ void batched_sum_posteriors_kernel(
+    int32_t chunk_size, int32_t num_gauss, float *posteriors, int32_t ldp,
+    int32_t stridep, float *gamma, int32_t strideg, float post_scale,
+    const LaneDesc *lanes, int32_t num_lanes) {
+  int32_t lane = blockIdx.y;
+
+  // offset input and output by lane
+  posteriors = posteriors + lane * stridep;
+  gamma = gamma + lane * strideg;
+
+  // for each column in parallel
+  for (int col = blockIdx.x * blockDim.x + threadIdx.x; col < num_gauss;
+       col += blockDim.x * gridDim.x) {
+    // compute sum across rows for this column
+    float sum = 0.0f;
+    for (int row = 0; row < chunk_size; row++) {
+      sum += posteriors[row * ldp + col];
+    }
+
+    // add to output vector
+    gamma[col] = post_scale * sum;
+  }
+}
+
+void batched_sum_posteriors(int32_t chunk_size, int32_t num_gauss,
+                            float *posteriors, int32_t ldp, int32_t stridep,
+                            float *gamma, int32_t strideg, float post_scale,
+                            const LaneDesc *lanes, int32_t num_lanes) {
+  int32_t threads = 128;
+  dim3 blocks((num_gauss + threads - 1) / threads, num_lanes);
+
+  batched_sum_posteriors_kernel<<<blocks, threads>>>(
+      chunk_size, num_gauss, posteriors, ldp, stridep, gamma, strideg,
+      post_scale, lanes, num_lanes);
+}
+
+__global__ void initialize_channels_kernel(int32_t num_gauss, int32_t feat_dim,
+                                           float *gamma, int32_t strideg,
+                                           float *X, int32_t ldx,
+                                           int32_t stridex,
+                                           const LaneDesc *lanes,
+                                           int32_t num_lanes) {
+  int32_t lane = blockIdx.x;
+  LaneDesc desc = lanes[lane];
+  int32_t channel = desc.channel;
+
+  if (desc.current_frame == 0) {
+    // offset to channel
+    gamma = gamma + channel * strideg;
+    X = X + channel * stridex;
+
+    // initialize stashes to zero
+    for (int i = threadIdx.y * blockDim.x + threadIdx.x; i < num_gauss;
+         i += blockDim.y * blockDim.x) {
+      gamma[i] = 0.0f;
+    }
+
+    for (int i = threadIdx.y; i < num_gauss; i += blockDim.y) {
+      for (int j = threadIdx.x; j < feat_dim; j += blockDim.x) {
+        X[i * ldx + j] = 0.0f;
+      }
+    }
+  }
+}
+
+void initialize_channels(int32_t num_gauss, int32_t feat_dim, float *gamma,
+                         int32_t strideg, float *X, int32_t ldx,
+                         int32_t stridex, const LaneDesc *lanes,
+                         int32_t num_lanes) {
+  dim3 threads(32, 32);
+  int32_t blocks = num_lanes;
+
+  initialize_channels_kernel<<<blocks, threads>>>(
+      num_gauss, feat_dim, gamma, strideg, X, ldx, stridex, lanes, num_lanes);
+}
+
+__global__ void apply_and_update_stash_kernel(
+    int32_t num_gauss, int32_t feat_dim, float *gamma, float *gamma_stash,
+    int32_t strideg, float *X, int32_t ldx, int32_t stridex, float *X_stash,
+    int32_t lds, int32_t strides, const LaneDesc *lanes, int32_t num_lanes) {
+  int32_t lane = blockIdx.x;
+  LaneDesc desc = lanes[lane];
+  int32_t channel = desc.channel;
+
+  // offset to lane
+  gamma = gamma + lane * strideg;
+  X = X + lane * stridex;
+
+  // offset to channel
+  gamma_stash = gamma_stash + channel * strideg;
+  X_stash = X_stash + channel * strides;
+
+  // add gamma and stash together then store in both
+  // use both x and y threads in the block for this
+  for (int i = threadIdx.y * blockDim.x + threadIdx.x; i < num_gauss;
+       i += blockDim.y * blockDim.x) {
+    float val = gamma_stash[i] + gamma[i];
+    gamma_stash[i] = gamma[i] = val;
+  }
+
+  // add x and stash together then store in both
+  for (int i = threadIdx.y; i < num_gauss; i += blockDim.y) {
+    for (int j = threadIdx.x; j < feat_dim; j += blockDim.x) {
+      float val = X[i * ldx + j] + X_stash[i * lds + j];
+      X[i * ldx + j] = X_stash[i * lds + j] = val;
+    }
+  }
+}
+
+void apply_and_update_stash(int32_t num_gauss, int32_t feat_dim, float *gamma,
+                            float *gamma_stash, int32_t strideg, float *X,
+                            int32_t ldx, int32_t stridex, float *X_stash,
+                            int32_t lds, int32_t strides, const LaneDesc *lanes,
+                            int32_t num_lanes) {
+  dim3 threads(32, 32);
+  int32_t blocks = num_lanes;
+
+  apply_and_update_stash_kernel<<<blocks, threads>>>(
+      num_gauss, feat_dim, gamma, gamma_stash, strideg, X, ldx, stridex,
+      X_stash, lds, strides, lanes, num_lanes);
+}
+
+}  // end namespace kaldi
+#endif
diff --git a/src/cudafeat/feature-online-batched-ivector-cuda-kernels.h b/src/cudafeat/feature-online-batched-ivector-cuda-kernels.h
new file mode 100644
index 00000000000..b0dffd7f8cc
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-ivector-cuda-kernels.h
@@ -0,0 +1,89 @@
+// cudafeat/feature-online-batched-ivector-cuda-kernels.h
+//
+// Copyright (c) 202020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef CUDAFEAT_FEATURE_ONLINE_BATCHED_IVECTOR_CUDA_KERNELS_H
+#define CUDAFEAT_FEATURE_ONLINE_BATCHED_IVECTOR_CUDA_KERNELS_H
+
+#if HAVE_CUDA == 1
+#include <cudafeat/lane-desc.h>
+namespace kaldi {
+
+// Kernel naming conventions:
+//   ld*:  leading dimension allocation size for a matrix
+//      Thus to compute the address of a matrix you use this formula:
+//         matrix_pointer + row * ld + col
+//   stride*:  offset to the next batch matrix or vector
+//    Thus to compute the matrix pointer of a matrix you use this formula:
+//         matrix_pointer = base_pointer + batch_number * stride
+
+void zero_invalid_posteriors(int32_t num_chunk_frames, int32_t num_gauss,
+                             float *posteriors, int32_t ldp, int32_t stridep,
+                             int32_t right, const LaneDesc *lanes,
+                             int32_t num_lanes);
+
+void splice_features_batched(int32_t num_chunk_frames, int32_t feat_dim,
+                             int32_t left, int32_t right, const float *feats,
+                             int32_t ldf, int32_t stridef,
+                             const float *stashed_feats, int32_t ldst,
+                             int32_t stridest, float *spliced_feats,
+                             int32_t lds, int32_t strides,
+                             const LaneDesc *lanes, int32_t num_lanes);
+
+void stash_feats(int32_t chunk_size, const float *feats, int32_t feat_dim,
+                 int32_t ldf, int32_t stridef, float *stash, int32_t stash_size,
+                 int32_t lds, int32_t strides, const LaneDesc *lanes,
+                 int32_t num_lanes);
+
+void batched_update_linear_and_quadratic_terms(
+    int32_t n, float prior_offset, float posterior_scale, int32_t max_count,
+    float *quadratic, int32_t ldq, int32_t strideq, float *linear,
+    int32_t stridel, const LaneDesc *lanes, int32_t num_lanes);
+
+void square_batched_matrix(int32_t chunk_frames, int32_t num_cols,
+                           const float *feats, int32_t ldf, int32_t stridef,
+                           float *feats_sq, int32_t lds, int32_t stides,
+                           const LaneDesc *lanes, int32_t num_lanes);
+
+void batched_compute_linear_term(int32_t num_gauss_, int32_t feat_dim_,
+                                 int32_t ivector_dim_, float *sigma,
+                                 int32_t lds, float *X, int32_t ldx,
+                                 int32_t stridex, float *linear,
+                                 int32_t stridel, const LaneDesc *lanes,
+                                 int32_t num_lanes);
+
+void batched_convert_sp_to_dense(int n, float *A_sp, int32_t strides, float *A,
+                                 int32_t lda, int32_t stridea,
+                                 const LaneDesc *lanes, int32_t num_lanes);
+
+void batched_sum_posteriors(int32_t chunk_size, int32_t num_gauss,
+                            float *posteriors, int32_t ldp, int32_t stridep,
+                            float *gamma, int32_t strideg, float post_scale,
+                            const LaneDesc *lanes, int32_t num_lanes);
+
+void initialize_channels(int32_t num_gauss, int32_t feat_dim, float *gamma,
+                         int32_t strideg, float *X, int32_t ldx, int32_t stridx,
+                         const LaneDesc *lanes, int32_t num_lanes);
+
+void apply_and_update_stash(int32_t num_gauss, int32_t feat_dim, float *gamma,
+                            float *gamma_stash, int32_t strideg, float *X,
+                            int32_t ldx, int32_t stridex, float *X_stash,
+                            int32_t lds, int32_t strides, const LaneDesc *lanes,
+                            int32_t num_lanes);
+
+}  // namespace kaldi
+
+#endif
+#endif
diff --git a/src/cudafeat/feature-online-batched-ivector-cuda.cc b/src/cudafeat/feature-online-batched-ivector-cuda.cc
new file mode 100644
index 00000000000..e85c9e7c90b
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-ivector-cuda.cc
@@ -0,0 +1,390 @@
+// cudafeat/feature-online-batched-ivector-cuda.cc
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cudafeat/feature-online-batched-ivector-cuda.h"
+#include "cudafeat/feature-online-batched-ivector-cuda-kernels.h"
+
+namespace kaldi {
+BatchedIvectorExtractorCuda::BatchedIvectorExtractorCuda(
+    const OnlineIvectorExtractionConfig &config, int32_t chunk_size,
+    int32_t num_lanes, int32_t num_channels)
+    : cmvn_(NULL),
+      chunk_size_(chunk_size),
+      max_lanes_(num_lanes),
+      num_channels_(num_channels) {
+#if CUDA_VERSION < 9010
+  // some components require newer cuda versions.  If you see this error
+  // upgrade to a more recent CUDA version.
+  KALDI_ERR << "BatchedIvectorExtractorCuda requires CUDA 9.1 or newer.";
+#endif
+  info_.Init(config);
+  Read(config);
+
+  naive_cmvn_state_ = OnlineCmvnState(info_.global_cmvn_stats);
+  // TODO parameterize coarsening factor?
+  cmvn_ = new CudaOnlineBatchedCmvn(info_.cmvn_opts, naive_cmvn_state_,
+                                    feat_dim_, chunk_size_, num_channels_, 1);
+  cu_lda_.Resize(info_.lda_mat.NumRows(), info_.lda_mat.NumCols());
+  cu_lda_.CopyFromMat(info_.lda_mat);
+
+  // The last col in the LDA matrix may be an affine offset
+  // copy that column to offset_ now.  This may or may not be used
+  // when getting the features later
+  offset_.Resize(cu_lda_.NumRows());
+  offset_.CopyColFromMat(cu_lda_, cu_lda_.NumCols() - 1);
+
+  int left = info_.splice_opts.left_context;
+  int right = info_.splice_opts.right_context;
+  int size = right + left + 1;
+
+  // resize temporary memory
+  feats_stash_.Resize(num_channels_ * (left + right), feat_dim_, kUndefined);
+  norm_feats_stash_.Resize(num_channels_ * (left + right), feat_dim_,
+                           kUndefined);
+  spliced_feats_.Resize(num_lanes * chunk_size, feat_dim_ * size, kUndefined);
+  tmp_feats_.Resize(num_lanes * chunk_size, feat_dim_, kUndefined);
+  posteriors_.Resize(num_lanes * chunk_size, num_gauss_, kUndefined);
+
+  gamma_.Resize(num_lanes * num_gauss_, kUndefined);
+  gamma_stash_.Resize(num_channels * num_gauss_, kUndefined);
+
+  X_.Resize(num_lanes * num_gauss_, feat_dim_, kUndefined);
+  X_stash_.Resize(num_channels * num_gauss_, feat_dim_, kUndefined);
+
+  linear_.Resize(num_lanes * ivector_dim_);
+  sp_quadratic_.Resize(num_lanes, ivector_dim_ * (ivector_dim_ + 1) / 2);
+  quadratic_.Resize(num_lanes, ivector_dim_ * ivector_dim_);
+
+  d_infoArray_ = static_cast<int *>(
+      CuDevice::Instantiate().Malloc(num_lanes * sizeof(int)));
+  quad_array_ = static_cast<BaseFloat **>(
+      CuDevice::Instantiate().Malloc(num_lanes * sizeof(BaseFloat *)));
+  ivec_array_ = static_cast<BaseFloat **>(
+      CuDevice::Instantiate().Malloc(num_lanes * sizeof(BaseFloat *)));
+
+  std::vector<BaseFloat *> h_quad_array(num_lanes), h_ivec_array(num_lanes);
+  int32_t qstride = quadratic_.Stride();
+  int32_t istride = ivector_dim_;
+  for (int lane = 0; lane < num_lanes; lane++) {
+    h_quad_array[lane] = quadratic_.Data() + lane * qstride;
+    h_ivec_array[lane] = linear_.Data() + lane * istride;
+  }
+  cudaMemcpyAsync(quad_array_, &h_quad_array[0],
+                  sizeof(BaseFloat *) * num_lanes, cudaMemcpyHostToDevice,
+                  cudaStreamPerThread);
+  cudaMemcpyAsync(ivec_array_, &h_ivec_array[0],
+                  sizeof(BaseFloat *) * num_lanes, cudaMemcpyHostToDevice,
+                  cudaStreamPerThread);
+}
+BatchedIvectorExtractorCuda::~BatchedIvectorExtractorCuda() {
+  delete cmvn_;
+  CuDevice::Instantiate().Free(d_infoArray_);
+  CuDevice::Instantiate().Free(quad_array_);
+  CuDevice::Instantiate().Free(ivec_array_);
+}
+
+void BatchedIvectorExtractorCuda::Read(
+    const kaldi::OnlineIvectorExtractionConfig &config) {
+  // read ubm
+  DiagGmm gmm;
+  ReadKaldiObject(config.diag_ubm_rxfilename, &gmm);
+  ubm_gconsts_.Resize(gmm.NumGauss());
+  ubm_gconsts_.CopyFromVec(gmm.gconsts());
+  ubm_means_inv_vars_.Resize(gmm.NumGauss(), gmm.Dim());
+  ubm_means_inv_vars_.CopyFromMat(gmm.means_invvars());
+  ubm_inv_vars_.Resize(gmm.NumGauss(), gmm.Dim());
+  ubm_inv_vars_.CopyFromMat(gmm.inv_vars());
+  num_gauss_ = gmm.NumGauss();
+
+  // read extractor (copied from ivector/ivector-extractor.cc)
+  bool binary;
+  Input input(config.ivector_extractor_rxfilename, &binary);
+  Matrix<float> w;
+  Vector<float> w_vec;
+  std::vector<Matrix<float> > ie_M;
+  std::vector<SpMatrix<float> > ie_Sigma_inv;
+
+  ExpectToken(input.Stream(), binary, "<IvectorExtractor>");
+  ExpectToken(input.Stream(), binary, "<w>");
+  w.Read(input.Stream(), binary);
+  ExpectToken(input.Stream(), binary, "<w_vec>");
+  w_vec.Read(input.Stream(), binary);
+  ExpectToken(input.Stream(), binary, "<M>");
+  int32 size;
+  ReadBasicType(input.Stream(), binary, &size);
+  KALDI_ASSERT(size > 0);
+  ie_M.resize(size);
+  for (int32 i = 0; i < size; i++) {
+    ie_M[i].Read(input.Stream(), binary);
+  }
+  ExpectToken(input.Stream(), binary, "<SigmaInv>");
+  ie_Sigma_inv.resize(size);
+  for (int32 i = 0; i < size; i++) {
+    ie_Sigma_inv[i].Read(input.Stream(), binary);
+  }
+  ExpectToken(input.Stream(), binary, "<IvectorOffset>");
+  ReadBasicType(input.Stream(), binary, &prior_offset_);
+  ExpectToken(input.Stream(), binary, "</IvectorExtractor>");
+
+  // compute derived variables
+  ivector_dim_ = ie_M[0].NumCols();
+  feat_dim_ = ie_M[0].NumRows();
+
+  ie_Sigma_inv_M_f_.Resize(num_gauss_ * feat_dim_, ivector_dim_, kUndefined);
+
+  ie_U_.Resize(num_gauss_, ivector_dim_ * (ivector_dim_ + 1) / 2);
+
+  SpMatrix<float> tmp_sub_U(ivector_dim_);
+  Matrix<float> tmp_Sigma_inv_M(feat_dim_, ivector_dim_);
+  for (int32 i = 0; i < num_gauss_; i++) {
+    // compute matrix ie_Sigma_inv_M[i]
+    tmp_sub_U.AddMat2Sp(1, ie_M[i], kTrans, ie_Sigma_inv[i], 0);
+    SubVector<float> tmp_U_vec(tmp_sub_U.Data(),
+                               ivector_dim_ * (ivector_dim_ + 1) / 2);
+    ie_U_.Row(i).CopyFromVec(tmp_U_vec);
+
+    tmp_Sigma_inv_M.AddSpMat(1, ie_Sigma_inv[i], ie_M[i], kNoTrans, 0);
+
+    // copy into global matrix
+    CuSubMatrix<float> window(ie_Sigma_inv_M_f_, i * feat_dim_, feat_dim_, 0,
+                              ivector_dim_);
+    window.CopyFromMat(tmp_Sigma_inv_M);
+  }
+}
+
+void BatchedIvectorExtractorCuda::GetIvectors(
+    const CuMatrixBase<BaseFloat> &feats, CuVectorBase<BaseFloat> *ivectors,
+    const LaneDesc *lanes, int32_t num_lanes) {
+  InitializeChannels(lanes, num_lanes);
+
+  // normalized pipeline
+  {
+    // cmvn feats and store in tmp_feats_
+    cmvn_->ComputeFeaturesBatched(num_lanes, lanes, feats, &tmp_feats_);
+
+    // splice normalized feats
+    SpliceFeats(tmp_feats_, norm_feats_stash_, &spliced_feats_, lanes,
+                num_lanes);
+
+    // Stash feats
+    StashFeats(tmp_feats_, &norm_feats_stash_, lanes, num_lanes);
+
+    // LDA transform spliced feats back into tmp_feats
+    LDATransform(spliced_feats_, &tmp_feats_, lanes, num_lanes);
+
+    // compute posteriors based normalized lda feats
+    ComputePosteriors(tmp_feats_, lanes, num_lanes);
+  }
+
+  // non-normalized pipeline
+  {
+    // splice non-normalized feats into spliced feats_
+    SpliceFeats(feats, feats_stash_, &spliced_feats_, lanes, num_lanes);
+
+    // Stash feats
+    StashFeats(feats, &feats_stash_, lanes, num_lanes);
+
+    // LDA transform spliced feats back into tmp_feats
+    LDATransform(spliced_feats_, &tmp_feats_, lanes, num_lanes);
+  }
+
+  // compute ivector stats
+  ComputeIvectorStats(tmp_feats_, lanes, num_lanes);
+
+  // compute ivectors for the stats
+  ComputeIvectorsFromStats(ivectors, lanes, num_lanes);
+}
+
+void BatchedIvectorExtractorCuda::InitializeChannels(const LaneDesc *lanes,
+                                                     int32_t num_lanes) {
+  initialize_channels(num_gauss_, feat_dim_, gamma_stash_.Data(), num_gauss_,
+                      X_stash_.Data(), X_stash_.Stride(),
+                      X_stash_.Stride() * num_gauss_, lanes, num_lanes);
+}
+
+void BatchedIvectorExtractorCuda::SpliceFeats(
+    const CuMatrixBase<BaseFloat> &feats,
+    const CuMatrix<BaseFloat> &feats_stash, CuMatrix<BaseFloat> *spliced_feats,
+    const LaneDesc *lanes, int32_t num_lanes) {
+  int left = info_.splice_opts.left_context;
+  int right = info_.splice_opts.right_context;
+
+  splice_features_batched(
+      chunk_size_, feat_dim_, left, right, feats.Data(), feats.Stride(),
+      feats.Stride() * chunk_size_, feats_stash.Data(), feats_stash.Stride(),
+      feats_stash.Stride() * (left + right), spliced_feats->Data(),
+      spliced_feats->Stride(), spliced_feats->Stride() * chunk_size_, lanes,
+      num_lanes);
+}
+
+void BatchedIvectorExtractorCuda::StashFeats(
+    const CuMatrixBase<BaseFloat> &feats, CuMatrix<BaseFloat> *feats_stash,
+    const LaneDesc *lanes, int32_t num_lanes) {
+  int left = info_.splice_opts.left_context;
+  int right = info_.splice_opts.right_context;
+
+  stash_feats(chunk_size_, feats.Data(), feat_dim_, feats.Stride(),
+              feats.Stride() * chunk_size_, feats_stash->Data(), left + right,
+              feats_stash->Stride(), feats_stash->Stride() * (left + right),
+              lanes, num_lanes);
+}
+
+void BatchedIvectorExtractorCuda::LDATransform(const CuMatrix<BaseFloat> &feats,
+                                               CuMatrix<BaseFloat> *lda_feats,
+                                               const LaneDesc *lanes,
+                                               int32_t num_lanes) {
+  if (feats.NumCols() == cu_lda_.NumCols()) {
+    // linear transformation
+    lda_feats->AddMatMat(1.0, feats, kNoTrans, cu_lda_, kTrans, 0.0);
+  } else {
+    // affine transformation
+    lda_feats->CopyRowsFromVec(offset_);
+    lda_feats->AddMatMat(1.0, feats, kNoTrans, cu_lda_, kTrans, 1.0);
+  }
+}
+
+void BatchedIvectorExtractorCuda::ComputePosteriors(CuMatrix<BaseFloat> &feats,
+                                                    const LaneDesc *lanes,
+                                                    int32_t num_lanes) {
+  int right = info_.splice_opts.right_context;
+
+  // inititalize posteriors
+  posteriors_.CopyRowsFromVec(ubm_gconsts_);
+
+  // add in normamalized feats * umb_means_inv
+  posteriors_.AddMatMat(1.0, feats, kNoTrans, ubm_means_inv_vars_, kTrans, 1.0);
+
+  // square feats
+  square_batched_matrix(chunk_size_, feat_dim_, feats.Data(), feats.Stride(),
+                        feats.Stride() * chunk_size_, feats.Data(),
+                        feats.Stride(), feats.Stride() * chunk_size_, lanes,
+                        num_lanes);
+
+  // add in feats .^2 * umb_inv_vars
+  posteriors_.AddMatMat(-0.5, feats, kNoTrans, ubm_inv_vars_, kTrans, 1.0);
+
+  posteriors_.ApplySoftMaxPerRow();
+
+  // At this point some rows of posteriors are invalid because they
+  // didn't have valid input rows.  Zero those out now so that
+  // they don't impact stats
+  zero_invalid_posteriors(
+      chunk_size_, num_gauss_, posteriors_.Data(), posteriors_.Stride(),
+      posteriors_.Stride() * chunk_size_, right, lanes, num_lanes);
+}
+
+void BatchedIvectorExtractorCuda::ComputeIvectorStats(
+    const CuMatrix<BaseFloat> &feats, const LaneDesc *lanes,
+    int32_t num_lanes) {
+  batched_sum_posteriors(chunk_size_, num_gauss_, posteriors_.Data(),
+                         posteriors_.Stride(),
+                         posteriors_.Stride() * chunk_size_, gamma_.Data(),
+                         num_gauss_, info_.posterior_scale, lanes, num_lanes);
+
+#if CUDA_VERSION >= 9010
+  int32_t m = feat_dim_;
+  int32_t n = num_gauss_;
+  int32_t k = chunk_size_;
+  float alpha = info_.posterior_scale;
+  float beta = 0.0f;
+  const float *A = feats.Data();
+  int32_t lda = feats.Stride();
+  int32_t strideA = lda * chunk_size_;
+  const float *B = posteriors_.Data();
+  int32_t ldb = posteriors_.Stride();
+  int32_t strideB = ldb * chunk_size_;
+  float *C = X_.Data();
+  int32_t ldc = X_.Stride();
+  int32_t strideC = ldc * num_gauss_;
+
+  // multiplying X = post * feats
+  CUBLAS_SAFE_CALL(cublasGemmStridedBatchedEx(
+      GetCublasHandle(), CUBLAS_OP_N, CUBLAS_OP_T, m, n, k, &alpha, A,
+      CUDA_R_32F, lda, strideA, B, CUDA_R_32F, ldb, strideB, &beta, C,
+      CUDA_R_32F, ldc, strideC, num_lanes, CUDA_R_32F, CUBLAS_GEMM_DEFAULT))
+#endif
+
+  apply_and_update_stash(
+      num_gauss_, feat_dim_, gamma_.Data(), gamma_stash_.Data(), num_gauss_,
+      X_.Data(), X_.Stride(), X_.Stride() * num_gauss_, X_stash_.Data(),
+      X_stash_.Stride(), X_stash_.Stride() * num_gauss_, lanes, num_lanes);
+}
+
+void BatchedIvectorExtractorCuda::ComputeIvectorsFromStats(
+    CuVectorBase<BaseFloat> *ivectors, const LaneDesc *lanes,
+    int32_t num_lanes) {
+  // Computing Linear Term
+  {
+    // need to set this term to zero because batched_compute_linear_term
+    // uses atomics with a +=
+    linear_.SetZero();
+    batched_compute_linear_term(num_gauss_, feat_dim_, ivector_dim_,
+                                ie_Sigma_inv_M_f_.Data(),
+                                ie_Sigma_inv_M_f_.Stride(), X_.Data(),
+                                X_.Stride(), X_.Stride() * num_gauss_,
+                                linear_.Data(), ivector_dim_, lanes, num_lanes);
+  }  // end linear term
+
+  // Computing Quadratic Term
+  {
+    // Convert  gamma from Vector to Matrix
+    CuSubMatrix<BaseFloat> gamma(gamma_.Data(), num_lanes, num_gauss_,
+                                 num_gauss_);
+    CuSubMatrix<BaseFloat> sp_quadratic(sp_quadratic_.RowRange(0, num_lanes));
+    //  compute quadratic (batch_size x (ivector_dim * (ivector_dim + 1) / 2))
+    sp_quadratic.AddMatMat(1.0f, gamma, kNoTrans, ie_U_, kNoTrans, 0.0f);
+
+    // copy a result sp_quadratic into quadratic_
+    batched_convert_sp_to_dense(
+        ivector_dim_, sp_quadratic_.Data(), sp_quadratic_.Stride(),
+        quadratic_.Data(), ivector_dim_, quadratic_.Stride(), lanes, num_lanes);
+  }
+
+  // compute and apply prior offset to linear and quadraditic terms
+  batched_update_linear_and_quadratic_terms(
+      ivector_dim_, prior_offset_, info_.posterior_scale, info_.max_count,
+      quadratic_.Data(), ivector_dim_, quadratic_.Stride(), linear_.Data(),
+      ivector_dim_, lanes, num_lanes);
+
+#if CUDA_VERSION >= 9010
+  int nrhs = 1;
+  // perform factorization in batched
+  CUSOLVER_SAFE_CALL(cusolverDnSpotrfBatched(
+      GetCusolverDnHandle(), CUBLAS_FILL_MODE_LOWER, ivector_dim_, quad_array_,
+      ivector_dim_, d_infoArray_, num_lanes));
+
+  // solve for rhs in batched
+  CUSOLVER_SAFE_CALL(cusolverDnSpotrsBatched(
+      GetCusolverDnHandle(), CUBLAS_FILL_MODE_LOWER, ivector_dim_, nrhs,
+      quad_array_, ivector_dim_, ivec_array_, ivector_dim_, d_infoArray_,
+      num_lanes));
+#endif
+
+  // cusolver solves in place.  Ivectors are now in linear_
+
+  // Create a submatrix which points to the first element of each ivector
+  CuSubMatrix<BaseFloat> ivector0(linear_.Data(), num_lanes, 1, ivector_dim_);
+  // remove prior
+  ivector0.Add(-prior_offset_);
+
+  // output was written to ivectors_ now copy that into output array
+  cudaMemcpyAsync(ivectors->Data(), linear_.Data(),
+                  ivector_dim_ * num_lanes * sizeof(BaseFloat),
+                  cudaMemcpyDeviceToDevice, cudaStreamPerThread);
+}
+
+}  // namespace kaldi
diff --git a/src/cudafeat/feature-online-batched-ivector-cuda.h b/src/cudafeat/feature-online-batched-ivector-cuda.h
new file mode 100644
index 00000000000..d374abb2fc8
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-ivector-cuda.h
@@ -0,0 +1,152 @@
+// cudafeat/feature-online-batched-ivector-cuda.h
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_CUDAFEAT_BATCHED_FEATURE_ONLINE_IVECTOR_CUDA_H_
+#define KALDI_CUDAFEAT_BATCHED_FEATURE_ONLINE_IVECTOR_CUDA_H_
+
+#include "cudafeat/feature-online-batched-cmvn-cuda.h"
+#include "cudafeat/lane-desc.h"
+#include "cudamatrix/cu-matrix.h"
+#include "cudamatrix/cu-vector.h"
+#include "feat/online-feature.h"
+#include "online2/online-ivector-feature.h"
+
+namespace kaldi {
+
+class BatchedIvectorExtractorCuda {
+ public:
+  BatchedIvectorExtractorCuda(const OnlineIvectorExtractionConfig &config,
+                              int32_t chunk_size, int32_t num_lanes,
+                              int32_t num_channels);
+  ~BatchedIvectorExtractorCuda();
+
+  // This function goes directly from features to an i-vector
+  // which makes the computation easier to port to GPU
+  // and make it run more efficiently
+  //
+  // It is roughly the replacement for the following in kaldi:
+  //
+  // DiagGmm.LogLikelihoods(), VectorToPosteriorEntry()
+  // IvectorExtractorUtteranceStats.AccStats()
+  // IvectorExtractor.GetIvectorDistribution()
+  //
+  // Also note we only do single precision (float)
+  // which will *NOT* give same results as kaldi
+  // i-vector extractor which is double precision
+  // however, in practice, the differences do *NOT*
+  // affect overall accuracy
+  //
+  // This function is thread safe as all class variables
+  // are read-only
+  //
+  void GetIvectors(const CuMatrixBase<BaseFloat> &feats,
+                   CuVectorBase<BaseFloat> *ivectors, const LaneDesc *lanes,
+                   int32_t num_lanes);
+
+  int32 FeatDim() const { return feat_dim_; }
+  int32 IvectorDim() const { return ivector_dim_; }
+  int32 NumGauss() const { return num_gauss_; }
+
+ private:
+  OnlineIvectorExtractionInfo info_;
+
+  BatchedIvectorExtractorCuda(BatchedIvectorExtractorCuda const &);
+  BatchedIvectorExtractorCuda &operator=(BatchedIvectorExtractorCuda const &);
+
+  void Read(const kaldi::OnlineIvectorExtractionConfig &config);
+
+  void InitializeChannels(const LaneDesc *lanes, int32_t num_lanes);
+
+  // Reads from feats, splice based on left/right contex,
+  // and writes to spliced_feats
+  void SpliceFeats(const CuMatrixBase<BaseFloat> &feats,
+                   const CuMatrix<BaseFloat> &feats_stash,
+                   CuMatrix<BaseFloat> *spliced_feats, const LaneDesc *lanes,
+                   int32_t num_lanes);
+
+  // Stores the left context of features for use in the
+  // next chunk.
+  void StashFeats(const CuMatrixBase<BaseFloat> &feats,
+                  CuMatrix<BaseFloat> *feats_stash, const LaneDesc *lanes,
+                  int32_t num_lanes);
+
+  // Performs LDA transform on spliced_feat and writes
+  // to lda_feats
+  void LDATransform(const CuMatrix<BaseFloat> &feats,
+                    CuMatrix<BaseFloat> *lda_feats, const LaneDesc *lanes,
+                    int32_t num_lanes);
+
+  // Computes posteriors_ based on feats.  This
+  // is destructive on feats
+  void ComputePosteriors(CuMatrix<BaseFloat> &feats, const LaneDesc *lanes,
+                         int32_t num_lanes);
+
+  // Computes Ivector stats based on posteriors_ and feats
+  void ComputeIvectorStats(const CuMatrix<BaseFloat> &feats,
+                           const LaneDesc *lanes, int32_t num_lanes);
+
+  // Computes Ivectors based on precomputed stats
+  void ComputeIvectorsFromStats(CuVectorBase<BaseFloat> *ivectors,
+                                const LaneDesc *lanes, int32_t num_lanes);
+
+  CudaOnlineCmvnState naive_cmvn_state_;
+  CudaOnlineBatchedCmvn *cmvn_;
+  int32_t feat_dim_;
+  int32_t ivector_dim_;
+  int32_t num_gauss_;
+
+  // ubm variables
+  CuVector<BaseFloat> ubm_gconsts_;
+  CuMatrix<BaseFloat> ubm_means_inv_vars_;
+  CuMatrix<BaseFloat> ubm_inv_vars_;
+  CuMatrix<BaseFloat> cu_lda_;
+  CuVector<BaseFloat> offset_;
+  // extractor variables
+  CuMatrix<BaseFloat> ie_U_;
+  // Batched matrix which stores this:
+  CuMatrix<BaseFloat> ie_Sigma_inv_M_f_;
+
+  // temporary memory unique per batch element
+  CuMatrix<BaseFloat> spliced_feats_;
+  CuMatrix<BaseFloat> tmp_feats_;
+  CuMatrix<BaseFloat> posteriors_;
+  CuMatrix<BaseFloat> X_;
+  CuVector<BaseFloat> gamma_;
+  CuVector<BaseFloat> linear_;
+  CuMatrix<BaseFloat> quadratic_;
+  CuMatrix<BaseFloat> sp_quadratic_;
+
+  // Stash for features
+  CuMatrix<BaseFloat> feats_stash_;
+  CuMatrix<BaseFloat> norm_feats_stash_;
+  CuMatrix<BaseFloat> X_stash_;
+  CuVector<BaseFloat> gamma_stash_;
+
+  // Buffers used by cusolver
+  int *d_infoArray_;
+  BaseFloat **quad_array_;
+  BaseFloat **ivec_array_;
+
+  float prior_offset_;
+  int32_t chunk_size_;
+  int32_t max_lanes_;
+  int32_t num_channels_;
+};
+
+}  // namespace kaldi
+
+#endif
diff --git a/src/cudafeat/feature-online-batched-spectral-cuda-kernels.cu b/src/cudafeat/feature-online-batched-spectral-cuda-kernels.cu
new file mode 100644
index 00000000000..67cd1704742
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-spectral-cuda-kernels.cu
@@ -0,0 +1,552 @@
+// cudafeature/feature-online-batched-spectral-cuda-kernels.cu
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens, Levi Barnes
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+#include <nvToolsExt.h>
+#include <cub/cub.cuh>
+#endif
+
+#include "cudafeat/feature-online-batched-spectral-cuda-kernels.h"
+#include "cudafeat/lane-desc.h"
+#include "cudamatrix/cu-rand.h"
+
+namespace kaldi {
+
+// Mimics the functionality of mel_banks_compute_kernel
+//  (found in feature-spectral-cuda.cu). The 3rd
+//  dimension (z) of the block grid gives the hardware
+//  "lane". lanes tells us which channel is in this lane,
+//  what current frame and sample are processed in this
+//  batch, etc.
+__global__ void batched_mel_banks_compute_kernel(
+    const LaneDesc *lanes, int32_t n_lanes, int32_t max_chunk_frames,
+    float energy_floor, int32 *offsets, int32 *sizes, float **vecs,
+    const float *feats, int32_t ldf, float *mels, int32_t ldm, bool use_log) {
+  // Specialize WarpReduce for type float
+  typedef cub::WarpReduce<float> WarpReduce;
+  // Allocate WarpReduce shared memory for 8 warps
+  __shared__ typename WarpReduce::TempStorage temp_storage[8];
+
+  // warp will work together to compute sum
+  int tid = threadIdx.x;
+  int wid = threadIdx.y;
+  // blocks in the x dimension take different bins
+  int bin = blockIdx.x;
+  // frame is a combination of blocks in the y dimension and threads in the y
+  // dimension
+  int frame = blockIdx.y * blockDim.y + threadIdx.y;
+  int lane = blockIdx.z;
+
+  LaneDesc desc = lanes[lane];
+  int num_frames = desc.num_chunk_frames;
+
+  // TODO get offsets, sizes, and vecs from laneInfo?
+  int offset = offsets[bin];
+  int size = sizes[bin];
+  const float *v = vecs[bin];
+  const float *w = feats + frame * ldf + lane * max_chunk_frames * ldf + offset;
+
+  // perfom local sum
+  float sum = 0;
+  if (frame < num_frames) {  // exclude frames beyond the end
+    for (int idx = tid; idx < size; idx += 32) {
+      sum += v[idx] * w[idx];
+    }
+  }
+
+  // Sum in cub
+  sum = WarpReduce(temp_storage[wid]).Sum(sum);
+  if (tid == 0 && frame < num_frames) {
+    if (use_log) {
+      // avoid log of zero
+      if (sum < energy_floor) sum = energy_floor;
+      float val = logf(sum);
+      mels[lane * max_chunk_frames * ldm + frame * ldm + bin] = val;
+    } else {
+      mels[lane * max_chunk_frames * ldm + frame * ldm + bin] = sum;
+    }
+  }
+}
+// Mimics the functionality of apply_lifter_and_floor_energy
+//  (found in feature-spectral-cuda.cu) for a chunk of data
+//  from several audio channels. The 2nd dimension
+//  (y) of the block grid gives the hardware "lane".
+//  The lanes array tells us which channel is in this lane,
+//  what current frame and sample are processed in this
+//  batch, etc.
+__global__ void batched_apply_lifter_and_floor_energy_kernel(
+    const LaneDesc *lanes, int32_t n_lanes, int32_t max_chunk_frames,
+    int num_cols, float cepstral_lifter, bool use_energy, float energy_floor,
+    float *log_energy, int32_t ldl, float *lifter_coeffs, float *features,
+    int32_t ldf) {
+  int thread_id = threadIdx.x;
+  int frame = blockIdx.x;
+  int lane = blockIdx.y;
+
+  LaneDesc desc = lanes[lane];
+  if (frame > desc.num_chunk_frames) return;
+
+  float *feats = features + frame * ldf + lane * max_chunk_frames * ldf;
+
+  // apply lifter coefficients
+  if (cepstral_lifter != 0.0f) {
+    for (int c = thread_id; c < num_cols; c += CU1DBLOCK) {
+      float lift = lifter_coeffs[c];
+      float f = feats[c];
+      feats[c] = f * lift;
+    }
+  }
+
+  // Thread 0 for each frame will apply energy
+  if (use_energy && thread_id == 0) {
+    float energy = log_energy[frame + ldl * lane];
+    float log_energy_floor = log(energy_floor);
+
+    if (energy_floor > 0.0f && energy < log_energy_floor) {
+      energy = log_energy_floor;
+    }
+    feats[0] = energy;
+  }
+}
+// Mimics the functionality of process_window_kernel
+//  (found in feature-spectral-cuda.cu) for a chunk of data
+//  from several audio channels. The 2nd dimension
+//  (y) of the block grid gives the hardware "lane".
+//  The lanes array tells us which channel is in this lane,
+//  what current frame and sample are processed in this
+//  batch, etc.
+__global__ void batched_process_window_kernel(
+    const LaneDesc *lanes, int32_t n_lanes, int32_t max_chunk_frames,
+    int frame_length, float dither, float energy_floor, bool remove_dc_offset,
+    float preemph_coeff, bool need_raw_log_energy, float *log_energy_pre_window,
+    int32_t lde, const float *windowing, float *tmp_windows, int32_t ldt,
+    float *windows, int32_t ldw) {
+  // Specialize WarpReduce for type float
+  typedef cub::BlockReduce<float, CU1DBLOCK> BlockReduce;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+
+  int thread_id = threadIdx.x;
+  int row = blockIdx.x;
+  int lane = blockIdx.y;
+
+  LaneDesc desc = lanes[lane];
+  if (row >= desc.num_chunk_frames) return;
+
+  float *tmp_window = tmp_windows + row * ldt + lane * max_chunk_frames * ldt;
+  float *window = windows + row * ldw + lane * max_chunk_frames * ldw;
+
+  __shared__ float ssum;
+
+  float sum = 0;
+  float wdot = 0;
+
+  for (int idx = thread_id; idx < frame_length; idx += CU1DBLOCK) {
+    // tmp_window contains optional dither.  Apply that on read.
+    float wval = window[idx];
+    if (dither != 0.0f) {
+      wval += tmp_window[idx] * dither;
+    }
+    // compute local sum for removing dc offset
+    sum += wval;
+    // compute dot product for log energy
+    wdot += wval * wval;
+
+    float windowing_mul = 1;
+    if (remove_dc_offset == false && preemph_coeff == 0.0f) {
+      // we are done here so set windowing multiplication on write.
+      windowing_mul = windowing[idx];
+    }
+
+    // write dithered output
+    window[idx] = wval * windowing_mul;
+  }
+  __syncthreads();
+  if (remove_dc_offset) {
+    // we will recompute this below
+    wdot = 0.0f;
+    // use cub to reduce
+    sum = BlockReduce(temp_storage).Sum(sum);
+
+    // broadcast sum to entire block
+    if (thread_id == 0) ssum = sum;
+    __syncthreads();
+
+    sum = -ssum / frame_length;
+    for (int idx = thread_id; idx < frame_length; idx += CU1DBLOCK) {
+      float windowing_mul = 1;
+      float *out = window;
+      if (preemph_coeff == 0.0f) {
+        // we are done here so apply windowing
+        windowing_mul = windowing[idx];
+      } else {
+        // write to temp window as we will copy back into window
+        // when doing pre-emphasis
+        out = tmp_window;
+      }
+      // updated window value
+      float wval = window[idx] + sum;
+
+      // compute new dot product with dc offset removed
+      wdot += wval * wval;
+
+      // write output
+      out[idx] = wval * windowing_mul;
+    }
+  }
+  __syncthreads();
+
+  // if pointer is not NULL we will set energy to either
+  // the computed energy or 0 depending on need_raw_log_energy
+  if (log_energy_pre_window != NULL) {
+    float energy = 0.0f;
+
+    if (need_raw_log_energy) {
+      // must sync to use retemp_storage
+      if (remove_dc_offset) __syncthreads();
+      // use cub to reduce
+      wdot = BlockReduce(temp_storage).Sum(wdot);
+
+      energy = max(wdot, energy_floor);
+    }
+
+    if (thread_id == 0) {
+      log_energy_pre_window[row + lane * lde] = log(energy);
+    }
+  }
+
+  // TODO this could be more efficient using shared memory instead of
+  // tmp_window.
+  if (preemph_coeff != 0.0f) {
+    // wait for tmp_window to be computed
+    __threadfence();
+    __syncthreads();
+    // starting thread idx at 0 to keep writes aligned.
+    // unaligned reads are less painful then unaligned writes
+    for (int idx = thread_id; idx < frame_length; idx += CU1DBLOCK) {
+      float wval = tmp_window[idx];
+      float prev_window = wval;
+      if (idx > 0) {
+        prev_window = tmp_window[idx - 1];
+      }
+      // use __fmul_rn to match CPU
+      // window[idx] = (wval - preemph_coeff*prev_window) * windowing[idx];
+      window[idx] =
+          (wval - __fmul_rn(preemph_coeff, prev_window)) * windowing[idx];
+    }
+  }
+}
+
+__host__ __device__ inline int32 FirstSampleOfFrame(int32 frame,
+                                                    int32 frame_shift,
+                                                    int32 window_size,
+                                                    bool snip_edges) {
+  if (snip_edges) {
+    return frame * frame_shift;
+  } else {
+    int32 midpoint_of_frame = frame_shift * frame + frame_shift / 2,
+          beginning_of_frame = midpoint_of_frame - window_size / 2;
+    return beginning_of_frame;
+  }
+}
+
+// Mimics the functionality of extract_window_kernel
+//  (found in feature-spectral-cuda.cu) for a chunk of data
+//  from several audio channels. The 2nd dimension
+//  (y) of the block grid gives the hardware "lane".
+//  The lanes array tells us which channel is in this lane,
+//  what current frame and sample are processed in this
+//  batch, etc.
+//  Extra samples not processed in this chunk are moved to
+//  "stash" where they'll be pre-pended to the next chunk
+//  from this channel
+__global__ void batched_extract_window_kernel(
+    const LaneDesc *lanes, int32_t num_lanes, int32 frame_shift,
+    int32 frame_length, int32 frame_length_padded, bool snip_edges,
+    const BaseFloat *__restrict__ wave, int32_t ldw,
+    BaseFloat *__restrict__ windows, int32_t window_size, int32_t wlda,
+    BaseFloat *stash, int32_t ssize, int32_t lds) {
+  // local frame number
+  int32_t fidx = blockIdx.x;
+  int32_t tidx = threadIdx.x;
+  int32_t lane = blockIdx.y;
+
+  const LaneDesc desc = lanes[lane];
+  ChannelId channel = desc.channel;
+  // This is the current sample that is pointed to by wave
+  int32_t current_sample = desc.current_sample;
+  // current frame we are computing in global space
+  int32_t current_frame = desc.current_frame;
+
+  // global frame number computed by this block
+  int32_t global_frame = current_frame + fidx;
+
+  int32_t num_chunk_samples = desc.num_chunk_samples;
+
+  if (fidx > desc.num_chunk_frames) return;
+
+  // offset input/output by channels or lanes
+  stash = stash + channel * lds;
+  wave = wave + lane * ldw;
+  BaseFloat *window = windows + fidx * wlda + gridDim.x * lane * wlda;
+
+  // This is the first sample needed to compute this frame
+  int32_t start_sample =
+      FirstSampleOfFrame(global_frame, frame_shift, window_size, snip_edges);
+
+  // Sample offset is how much we have to offset our index
+  // into the input wave.
+  int32_t wave_start = start_sample - current_sample;
+
+  // wave_start and wave_end are start and end indexes into 'wave', for the
+  // piece of wave that we're trying to extract.
+  int32_t wave_end = wave_start + frame_length;
+
+  // wave_start will be negative on successive chunks as we need
+  // to grab context from stash.
+  if ((current_frame > 0 || wave_start >= 0) && wave_end <= num_chunk_samples) {
+    // the normal case-- no edge effects to consider.
+    for (int i = tidx; i < frame_length; i += blockDim.x) {
+      int32_t widx = wave_start + i;
+      BaseFloat val;
+      if (widx >= 0) {
+        val = wave[widx];
+      } else {
+        // widx is negative. Add it to the stash size
+        // to get the correct index into the stash
+        int32_t sidx = ssize + widx;
+        val = stash[sidx];
+      }
+      window[i] = val;
+    }
+  } else {
+    // Deal with any end effects by reflection, if needed.  This code will only
+    // be reached for about two frames per utterance, so we don't concern
+    // ourselves excessively with efficiency.
+    for (int s = tidx; s < frame_length; s += blockDim.x) {
+      int32 s_in_wave = wave_start + s;
+      while (s_in_wave < 0 || s_in_wave >= num_chunk_samples) {
+        // reflect around the beginning or end of the wave.
+        // e.g. -1 -> 0, -2 -> 1.
+        // dim -> dim - 1, dim + 1 -> dim - 2.
+        // the code supports repeated reflections, although this
+        // would only be needed in pathological cases.
+        if (s_in_wave < 0)
+          s_in_wave = -s_in_wave - 1;
+        else
+          s_in_wave = 2 * num_chunk_samples - 1 - s_in_wave;
+      }
+      window[s] = wave[s_in_wave];
+    }
+  }
+
+  if (frame_length_padded > frame_length) {
+    for (int i = frame_length + tidx; i < frame_length_padded;
+         i += blockDim.x) {
+      window[i] = 0.0f;
+    }
+  }
+}
+// For each frame
+//   compute logf(dot(signal_frame, signal_frame))
+// This is the batched version. The y-dimension of the grid
+// give the lane number
+__global__ void batched_dot_log_kernel(int32_t max_chunk_frames,
+                                       int32_t frame_length,
+                                       float *signal_frame, int32_t lds,
+                                       float *signal_log_energy, int32_t lde) {
+  // Specialize WarpReduce for type float
+  typedef cub::BlockReduce<float, CU1DBLOCK> BlockReduce;
+  // Allocate WarpReduce shared memory for 8 warps
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+
+  int32_t frame = blockIdx.x;
+  int32_t tid = threadIdx.x;
+  int32_t lane = blockIdx.y;
+
+  float *in = signal_frame + frame * lds + max_chunk_frames * lane * lds;
+  float sum = 0;
+
+  // preform local dot product
+  for (int32_t i = tid; i < frame_length; i += blockDim.x) {
+    float val = in[i];
+    sum += val * val;
+  }
+
+  // reduce using cub
+  sum = BlockReduce(temp_storage).Sum(sum);
+
+  if (threadIdx.x == 0) {
+    signal_log_energy[frame + lane * lde] = logf(sum);
+  }
+}
+
+__global__ void batched_update_stash_kernel(const LaneDesc *lanes,
+                                            int32_t num_lanes,
+                                            const BaseFloat *wave, int32_t ldw,
+                                            BaseFloat *stash, int32_t num_stash,
+                                            int32_t lds) {
+  int32_t lane = blockIdx.x;
+  LaneDesc desc = lanes[lane];
+  int32_t channel = desc.channel;
+  int32_t num_chunk_samples = desc.num_chunk_samples;
+
+  // offset memory by lane or channel
+  wave = wave + lane * ldw;
+  stash = stash + channel * lds;
+
+  int32_t sample_offset = num_chunk_samples - num_stash;
+  for (int i = threadIdx.x; i < num_stash; i += blockDim.x) {
+    int32_t idx = sample_offset + i;
+
+    float val;
+    if (idx < 0) {
+      // data must come from old stash
+      val = stash[idx + num_stash];
+    } else {
+      // data comes from new wave
+      val = wave[idx];
+    }
+
+    __syncthreads();
+
+    stash[i] = val;
+  }
+}
+// Each threadblock computes a different row of the matrix.
+// Threads in the same block compute the row collaboratively.
+// This kernel must be called out of place (A_in!=A_out).
+__global__ void power_spectrum_kernel(int row_length, const float *A_in, int32_t ldi,
+                                      float *A_out, int32_t ldo,
+                                      bool use_power) {
+  int thread_id = threadIdx.x;
+  int block_id = blockIdx.x;
+  const float *Ar = A_in + block_id * ldi;
+  float *Aw = A_out + block_id * ldo;
+
+  int half_length = row_length / 2;
+  for (int idx = thread_id; idx < half_length; idx += CU1DBLOCK) {
+    // ignore special case
+    if (idx == 0) continue;
+
+    float2 val = reinterpret_cast<const float2 *>(Ar)[idx];
+    float ret = val.x * val.x + val.y * val.y;
+    if (use_power) {
+      Aw[idx] = ret;
+    } else {
+      Aw[idx] = sqrtf(ret);
+    }
+  }
+
+  // handle special case
+  if (threadIdx.x == 0) {
+    float real = Ar[0];
+    // cufft puts this at the end, this is different than kaldi does with its
+    // own
+    // internal implementation
+    float im = Ar[row_length];
+
+    if (use_power) {
+      Aw[0] = real * real;
+      Aw[half_length] = im * im;
+    } else {
+      Aw[0] = fabs(real);
+      Aw[half_length] = fabs(im);
+    }
+  }
+}
+
+
+void cuda_power_spectrum(int32_t max_chunk_frames, int32_t num_lanes,
+                         int row_length, const float *A_in, int32_t ldi,
+                         float *A_out, int32_t ldo, bool use_power) {
+  power_spectrum_kernel<<<max_chunk_frames * num_lanes, CU1DBLOCK>>>(
+      row_length, A_in, ldi, A_out, ldo, use_power);
+}
+
+void cuda_mel_banks_compute(const LaneDesc *lanes, int32_t num_lanes,
+                            int32_t max_chunk_frames, int32_t num_bins,
+                            float energy_floor, int32 *offsets, int32 *sizes,
+                            float **vecs, const float *feats, int32_t ldf,
+                            float *mels, int32_t ldm, bool use_log) {
+  dim3 Bl(32, 8);
+  dim3 Gr(num_bins, (max_chunk_frames + Bl.y - 1) / Bl.y, num_lanes);
+  batched_mel_banks_compute_kernel<<<Gr, Bl>>>(
+      lanes, num_lanes, max_chunk_frames, energy_floor, offsets, sizes, vecs,
+      feats, ldf, mels, ldm, use_log);
+}
+
+void cuda_apply_lifter_and_floor_energy(const LaneDesc *lanes,
+                                        int32_t num_lanes,
+                                        int32_t max_chunk_frames, int num_cols,
+                                        float cepstral_lifter, bool use_energy,
+                                        float energy_floor, float *log_energy,
+                                        int32_t ldl, float *lifter_coeffs,
+                                        float *features, int32_t ldf) {
+  dim3 Gr(max_chunk_frames, num_lanes);
+  batched_apply_lifter_and_floor_energy_kernel<<<Gr, CU1DBLOCK>>>(
+      lanes, num_lanes, max_chunk_frames, num_cols, cepstral_lifter, use_energy,
+      energy_floor, log_energy, ldl, lifter_coeffs, features, ldf);
+}
+
+void cuda_process_window(const LaneDesc *lanes, int32_t num_lanes,
+                         int32_t max_chunk_frames, int frame_length,
+                         float dither, float energy_floor,
+                         bool remove_dc_offset, float preemph_coeff,
+                         bool need_raw_log_energy, float *log_energy_pre_window,
+                         int32_t lde, const float *windowing,
+                         float *tmp_windows, int32_t ldt, float *windows,
+                         int32_t ldw) {
+  dim3 Gr(max_chunk_frames, num_lanes);
+  int Bl = CU1DBLOCK;
+  batched_process_window_kernel<<<Gr, Bl>>>(
+      lanes, num_lanes, max_chunk_frames, frame_length, dither, energy_floor,
+      remove_dc_offset, preemph_coeff, need_raw_log_energy,
+      log_energy_pre_window, lde, windowing, tmp_windows, ldt, windows, ldw);
+}
+
+void cuda_extract_window(const LaneDesc *lanes, int32_t num_lanes,
+                         int32_t max_chunk_frames, int32 frame_shift,
+                         int32 frame_length, int32 frame_length_padded,
+                         bool snip_edges, const float *wave, int32_t ldw,
+                         float *windows, int32_t window_size, int32_t wlda,
+                         BaseFloat *stash, int32_t ssize, int32_t lds) {
+  dim3 Gr(max_chunk_frames, num_lanes);
+  int Bl = CU1DBLOCK;
+  batched_extract_window_kernel<<<Gr, Bl>>>(
+      lanes, num_lanes, frame_shift, frame_length, frame_length_padded,
+      snip_edges, wave, ldw, windows, window_size, wlda, stash, ssize, lds);
+}
+
+void cuda_dot_log(int32_t max_chunk_frames, int32_t num_lanes,
+                  int32_t frame_length, float *signal_frame, int32_t lds,
+                  float *signal_log_energy, int32_t lde) {
+  dim3 Gr(max_chunk_frames, num_lanes);
+  batched_dot_log_kernel<<<Gr, CU1DBLOCK>>>(max_chunk_frames, frame_length,
+                                            signal_frame, lds,
+
+                                            signal_log_energy, lde);
+}
+
+void cuda_update_stash(const LaneDesc *lanes, int32_t num_lanes,
+                       const BaseFloat *wave, int32_t ldw, BaseFloat *stash,
+                       int32_t num_stash, int32_t lds) {
+  int Gr = num_lanes;
+  int Bl = 1024;
+  batched_update_stash_kernel<<<Gr, Bl>>>(lanes, num_lanes, wave, ldw, stash,
+                                          num_stash, lds);
+}
+}  // namespace kaldi
diff --git a/src/cudafeat/feature-online-batched-spectral-cuda-kernels.h b/src/cudafeat/feature-online-batched-spectral-cuda-kernels.h
new file mode 100644
index 00000000000..7c99435c304
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-spectral-cuda-kernels.h
@@ -0,0 +1,69 @@
+// cudafeature/feature-online-batched-spectral-cuda-kernels.h
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens, Levi Barnes
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_CUDAFEAT_FEATURE_ONLINE_BATCHED_SPECTRAL_CUDA_KERNELS_H_
+#define KALDI_CUDAFEAT_FEATURE_ONLINE_BATCHED_SPECTRAL_CUDA_KERNELS_H_
+
+#include "cudafeat/lane-desc.h"
+
+namespace kaldi {
+
+void cuda_power_spectrum(int32_t max_chunk_frames, int32_t num_lanes,
+                         int row_length, const float *A_in, int32_t ldi,
+                         float *A_out, int32_t ldo, bool use_power);
+
+void cuda_mel_banks_compute(const LaneDesc *lanes, int32_t n_lanes,
+                            int32_t max_chunk_frames, int32_t num_bins,
+                            float energy_floor, int32_t *offsets,
+                            int32_t *sizes, float **vecs, const float *feats,
+                            int32_t ldf, float *mels, int32_t ldm,
+                            bool use_log);
+
+void cuda_apply_lifter_and_floor_energy(const LaneDesc *lanes,
+                                        int32_t num_lanes,
+                                        int32_t max_chunk_frames, int num_cols,
+                                        float cepstral_lifter, bool use_energy,
+                                        float energy_floor, float *log_energy,
+                                        int32_t ldl, float *lifter_coeffs,
+                                        float *features, int32_t ldf);
+
+void cuda_process_window(const LaneDesc *lanes, int32_t num_lanes,
+                         int32_t max_chunk_frames, int frame_length,
+                         float dither, float energy_floor,
+                         bool remove_dc_offset, float preemph_coeff,
+                         bool need_raw_log_energy, float *log_energy_pre_window,
+                         int32_t lde, const float *windowing,
+                         float *tmp_windows, int32_t ldt, float *windows,
+                         int32_t ldw);
+
+void cuda_extract_window(const LaneDesc *lanes, int32_t num_lanes,
+                         int32_t max_chunk_frames, int32_t frame_shift,
+                         int32_t frame_length, int32_t frame_length_padded,
+                         bool snip_edges, const float *wave, int32_t ldw,
+                         float *windows, int32_t window_size, int32_t wlda,
+                         float *stash, int32_t ssize, int32_t lds);
+
+void cuda_dot_log(int32_t max_chunk_frames, int32_t num_lanes,
+                  int32_t frame_length, float *signal_frame, int32_t lds,
+                  float *signal_log_energy, int32_t lde);
+
+void cuda_update_stash(const LaneDesc *lanes, int32_t num_lanes,
+                       const float *wave, int32_t ldw, float *stash,
+                       int32_t num_stash, int32_t lds);
+
+}  // namespace kaldi
+#endif
diff --git a/src/cudafeat/feature-online-batched-spectral-cuda.cc b/src/cudafeat/feature-online-batched-spectral-cuda.cc
new file mode 100644
index 00000000000..e52a16fa9ab
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-spectral-cuda.cc
@@ -0,0 +1,258 @@
+// cudafeature/feature-online-batched-spectral-cuda.cc
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens, Levi Barnes
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cudafeat/feature-online-batched-spectral-cuda.h"
+#include "cudafeat/feature-online-batched-spectral-cuda-kernels.h"
+
+namespace kaldi {
+
+CudaOnlineBatchedSpectralFeatures::CudaOnlineBatchedSpectralFeatures(
+    const CudaSpectralFeatureOptions &opts, int32_t max_chunk_frames,
+    int32_t num_channels, int32_t max_lanes)
+    : MfccComputer(opts.mfcc_opts),
+      cu_lifter_coeffs_(lifter_coeffs_),
+      cu_dct_matrix_(dct_matrix_),
+      window_function_(opts.mfcc_opts.frame_opts),
+      max_chunk_frames_(max_chunk_frames),
+      num_channels_(num_channels),
+      max_lanes_(max_lanes) {
+  KALDI_ASSERT(max_chunk_frames > 0);
+  const MelBanks *mel_banks = GetMelBanks(1.0);
+  const std::vector<std::pair<int32, Vector<BaseFloat>>> &bins =
+      mel_banks->GetBins();
+  int size = bins.size();
+  bin_size_ = size;
+  std::vector<int32> offsets(size), sizes(size);
+  std::vector<float *> vecs(size);
+  cu_vecs_ = new CuVector<float>[size];
+  for (int i = 0; i < bins.size(); i++) {
+    cu_vecs_[i].Resize(bins[i].second.Dim(), kUndefined);
+    cu_vecs_[i].CopyFromVec(bins[i].second);
+    vecs[i] = cu_vecs_[i].Data();
+    sizes[i] = cu_vecs_[i].Dim();
+    offsets[i] = bins[i].first;
+  }
+  offsets_ = static_cast<int32 *>(
+      CuDevice::Instantiate().Malloc(size * sizeof(int32)));
+  sizes_ = static_cast<int32 *>(
+      CuDevice::Instantiate().Malloc(size * sizeof(int32)));
+  vecs_ = static_cast<float **>(
+      CuDevice::Instantiate().Malloc(size * sizeof(float *)));
+
+  CU_SAFE_CALL(cudaMemcpyAsync(vecs_, &vecs[0], size * sizeof(float *),
+                               cudaMemcpyHostToDevice, cudaStreamPerThread));
+  CU_SAFE_CALL(cudaMemcpyAsync(offsets_, &offsets[0], size * sizeof(int32),
+                               cudaMemcpyHostToDevice, cudaStreamPerThread));
+  CU_SAFE_CALL(cudaMemcpyAsync(sizes_, &sizes[0], size * sizeof(int32),
+                               cudaMemcpyHostToDevice, cudaStreamPerThread));
+  CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
+
+  const FrameExtractionOptions frame_opts = opts.mfcc_opts.frame_opts;
+  frame_length_ = frame_opts.WindowSize();
+  padded_length_ = frame_opts.PaddedWindowSize();
+  fft_length_ = padded_length_ / 2;  // + 1;
+  fft_batch_size_ = 800;
+
+  // place holders to get strides for cufft.  these will be resized correctly
+  // later. The +2 for cufft/fftw requirements of an extra element at the end.
+  // Turning off stride because cufft seems buggy with a stride
+  int32_t fft_num_frames =
+      max_chunk_frames +
+      (fft_batch_size_ - max_chunk_frames_ % fft_batch_size_);
+  cu_windows_.Resize(fft_num_frames * max_lanes_, padded_length_, kUndefined,
+                     kStrideEqualNumCols);
+  //+1 matches cufft/fftw requirements
+  tmp_window_.Resize(fft_num_frames * max_lanes_, padded_length_ + 2,
+                     kUndefined, kStrideEqualNumCols);
+
+  // Pre-allocated memory for power spectra
+  power_spectrum_.Resize(max_chunk_frames_ * max_lanes_, padded_length_ / 2 + 1,
+                         kUndefined);
+  raw_log_energies_.Resize(max_lanes_, max_chunk_frames_, kUndefined);
+  cu_mel_energies_.Resize(max_chunk_frames_ * max_lanes_, bin_size_, 
+                          kUndefined);
+  int32_t max_stash_size =
+      2 * (frame_opts.WindowSize() / 2 + frame_opts.WindowShift());
+  stash_.Resize(num_channels_, max_stash_size);
+
+  stride_ = cu_windows_.Stride();
+  tmp_stride_ = tmp_window_.Stride();
+
+  cufftPlanMany(&plan_, 1, &padded_length_, NULL, 1, stride_, NULL, 1,
+                tmp_stride_ / 2, CUFFT_R2C, fft_batch_size_);
+  cufftSetStream(plan_, cudaStreamPerThread);
+  cumfcc_opts_ = opts;
+}
+
+// ExtractWindow extracts a windowed frame of waveform with a power-of-two,
+// padded size.  It does mean subtraction, pre-emphasis and dithering as
+// requested.
+void CudaOnlineBatchedSpectralFeatures::ExtractWindowsBatched(
+    const LaneDesc *lanes, int32_t num_lanes,
+    const CuMatrixBase<BaseFloat> &wave) {
+  CU_SAFE_CALL(cudaGetLastError());
+  const FrameExtractionOptions &opts = GetFrameOptions();
+  cuda_extract_window(
+      lanes, num_lanes, max_chunk_frames_, opts.WindowShift(),
+      opts.WindowSize(), opts.PaddedWindowSize(), opts.snip_edges, wave.Data(),
+      wave.Stride(), cu_windows_.Data(), opts.WindowSize(),
+      cu_windows_.Stride(), stash_.Data(), stash_.NumCols(), stash_.Stride());
+}
+
+void CudaOnlineBatchedSpectralFeatures::ProcessWindowsBatched(
+    const LaneDesc *lanes, int32_t num_lanes,
+    const FrameExtractionOptions &opts,
+    CuMatrixBase<BaseFloat> *log_energy_pre_window) {
+  int fft_num_frames = cu_windows_.NumRows();
+  KALDI_ASSERT(fft_num_frames % fft_batch_size_ == 0);
+
+  cuda_process_window(
+      lanes, num_lanes, max_chunk_frames_, frame_length_, opts.dither,
+      std::numeric_limits<float>::epsilon(), opts.remove_dc_offset,
+      opts.preemph_coeff, NeedRawLogEnergy(), log_energy_pre_window->Data(),
+      log_energy_pre_window->Stride(), window_function_.cu_window.Data(),
+      tmp_window_.Data(), tmp_window_.Stride(), cu_windows_.Data(),
+      cu_windows_.Stride());
+
+  CU_SAFE_CALL(cudaGetLastError());
+}
+
+void CudaOnlineBatchedSpectralFeatures::UpdateStashBatched(
+    const LaneDesc *lanes, int32_t num_lanes,
+    const CuMatrixBase<BaseFloat> &wave) {
+  KALDI_ASSERT(stash_.NumCols() < 1024);
+
+  cuda_update_stash(lanes, num_lanes, wave.Data(), wave.Stride(), stash_.Data(),
+                    stash_.NumCols(), stash_.Stride());
+}
+
+void CudaOnlineBatchedSpectralFeatures::ComputeFinalFeaturesBatched(
+    const LaneDesc *lanes, int32_t num_lanes, BaseFloat vtln_wrap,
+    CuMatrix<BaseFloat> *cu_signal_log_energy,
+    CuMatrix<BaseFloat> *cu_features) {
+  MfccOptions mfcc_opts = cumfcc_opts_.mfcc_opts;
+  Vector<float> tmp;
+  KALDI_ASSERT(mfcc_opts.htk_compat == false);
+
+  if (num_lanes == 0) return;
+
+  if (mfcc_opts.use_energy && !mfcc_opts.raw_energy) {
+    cuda_dot_log(max_chunk_frames_, num_lanes, cu_windows_.NumCols(),
+                 cu_windows_.Data(), cu_windows_.Stride(),
+                 cu_signal_log_energy->Data(), cu_signal_log_energy->Stride());
+    CU_SAFE_CALL(cudaGetLastError());
+  }
+
+  // make sure a reallocation hasn't changed these
+  KALDI_ASSERT(cu_windows_.Stride() == stride_);
+  KALDI_ASSERT(tmp_window_.Stride() == tmp_stride_);
+
+  // Perform FFTs in batches of fft_size.  This reduces memory requirements
+  for (int idx = 0; idx < max_chunk_frames_ * num_lanes;
+       idx += fft_batch_size_) {
+    CUFFT_SAFE_CALL(cufftExecR2C(
+        plan_, cu_windows_.Data() + cu_windows_.Stride() * idx,
+        (cufftComplex *)(tmp_window_.Data() + tmp_window_.Stride() * idx)));
+  }
+
+  // Compute Power spectrum
+  cuda_power_spectrum(max_chunk_frames_, num_lanes, padded_length_,
+                      tmp_window_.Data(), tmp_window_.Stride(),
+                      power_spectrum_.Data(), power_spectrum_.Stride(),
+                      cumfcc_opts_.use_power);
+  CU_SAFE_CALL(cudaGetLastError());
+
+  int num_bins = bin_size_;
+  // mel banks plus optional dct transform
+  if (cumfcc_opts_.use_dct) {
+    // MFCC uses dct
+    cuda_mel_banks_compute(lanes, num_lanes, max_chunk_frames_, num_bins,
+                         std::numeric_limits<float>::epsilon(), offsets_,
+                         sizes_, vecs_, power_spectrum_.Data(),
+                         power_spectrum_.Stride(), cu_mel_energies_.Data(),
+                         cu_mel_energies_.Stride(), cumfcc_opts_.use_log_fbank);
+    CU_SAFE_CALL(cudaGetLastError());
+    if (cu_features->NumRows() > cu_mel_energies_.NumRows()) {
+      CuSubMatrix<BaseFloat> cu_feats_sub(*cu_features, 0,
+                                          cu_mel_energies_.NumRows(), 0,
+                                          cu_features->NumCols());
+      cu_feats_sub.AddMatMat(1.0, cu_mel_energies_, kNoTrans, cu_dct_matrix_,
+                             kTrans, 0.0);
+    } else {
+      cu_features->AddMatMat(1.0, cu_mel_energies_, kNoTrans, cu_dct_matrix_,
+                             kTrans, 0.0);
+    }
+    cuda_apply_lifter_and_floor_energy(
+        lanes, num_lanes, max_chunk_frames_, cu_features->NumCols(),
+        mfcc_opts.cepstral_lifter, mfcc_opts.use_energy, mfcc_opts.energy_floor,
+        cu_signal_log_energy->Data(), cu_signal_log_energy->Stride(),
+        cu_lifter_coeffs_.Data(), cu_features->Data(), cu_features->Stride());
+    CU_SAFE_CALL(cudaGetLastError());
+  } else {
+    // fbank puts the result of mel_banks_compute directly into cu_features 
+    cuda_mel_banks_compute(lanes, num_lanes, max_chunk_frames_, num_bins,
+                         std::numeric_limits<float>::epsilon(), offsets_,
+                         sizes_, vecs_, power_spectrum_.Data(),
+                         power_spectrum_.Stride(), cu_features->Data(),
+                         cu_features->Stride(), cumfcc_opts_.use_log_fbank);
+    CU_SAFE_CALL(cudaGetLastError());
+  }
+  CU_SAFE_CALL(cudaGetLastError());
+}
+
+void CudaOnlineBatchedSpectralFeatures::ComputeFeaturesBatched(
+    const LaneDesc *lanes, int32_t n_lanes,
+    const CuMatrixBase<BaseFloat> &cu_wave_in, BaseFloat sample_freq,
+    BaseFloat vtln_warp, CuMatrix<BaseFloat> *cu_feats_out) {
+  // Note: cu_features is actually a rank 3 tensor.
+  //       channels x frames x features
+  // it is currently represented as a matrix with n_channels*n_frames rows and
+  //                                              n_features cols
+  const FrameExtractionOptions &frame_opts = GetFrameOptions();
+
+  if (frame_opts.dither != 0.0f) {
+    // Calling cu-rand directly
+    // CuRand class works on CuMatrixBase which must
+    // assume that the matrix is part of a larger matrix
+    // Doing this directly avoids unecessary memory copies
+    CURAND_SAFE_CALL(
+        curandGenerateNormal(GetCurandHandle(), tmp_window_.Data(),
+                             tmp_window_.NumRows() * tmp_window_.Stride(),
+                             0.0 /*mean*/, 1.0 /*stddev*/));
+  }
+
+  // Extract Windows
+  ExtractWindowsBatched(lanes, n_lanes, cu_wave_in);
+
+  UpdateStashBatched(lanes, n_lanes, cu_wave_in);
+
+  // Process Windows
+  ProcessWindowsBatched(lanes, n_lanes, frame_opts, &raw_log_energies_);
+
+  // Compute Features
+  ComputeFinalFeaturesBatched(lanes, n_lanes, 1.0, &raw_log_energies_,
+                              cu_feats_out);
+}
+
+CudaOnlineBatchedSpectralFeatures::~CudaOnlineBatchedSpectralFeatures() {
+  delete[] cu_vecs_;
+  CuDevice::Instantiate().Free(vecs_);
+  CuDevice::Instantiate().Free(offsets_);
+  CuDevice::Instantiate().Free(sizes_);
+  cufftDestroy(plan_);
+}
+}  // namespace kaldi
diff --git a/src/cudafeat/feature-online-batched-spectral-cuda.h b/src/cudafeat/feature-online-batched-spectral-cuda.h
new file mode 100644
index 00000000000..e4549c7177c
--- /dev/null
+++ b/src/cudafeat/feature-online-batched-spectral-cuda.h
@@ -0,0 +1,113 @@
+// cudafeat/feature-batched-spectral-cuda.h
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens, Levi Barnes
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_CUDAFEAT_FEATURE_BATCHED_SPECTRAL_CUDA_H_
+#define KALDI_CUDAFEAT_FEATURE_BATCHED_SPECTRAL_CUDA_H_
+
+#if HAVE_CUDA == 1
+#include <cufft.h>
+#endif
+
+#include "cudafeat/feature-spectral-cuda.h"
+#include "cudafeat/feature-window-cuda.h"
+#include "cudafeat/lane-desc.h"
+#include "cudamatrix/cu-matrix.h"
+#include "cudamatrix/cu-vector.h"
+#include "feat/feature-fbank.h"
+#include "feat/feature-mfcc.h"
+
+namespace kaldi {
+// This class implements MFCC and Fbank computation in CUDA.
+// It handles batched input.
+// It takes input from device memory and outputs to
+// device memory.  It also does no synchronization.
+class CudaOnlineBatchedSpectralFeatures : public MfccComputer {
+ public:
+  void ComputeFeatures(const CuVectorBase<BaseFloat> &cu_wave,
+                       BaseFloat sample_freq, BaseFloat vtln_warp,
+                       CuMatrix<BaseFloat> *cu_features) {
+     // Non-batched processing not allowed from 
+     //    CudaOnlineBatchedSpectralFeatures
+     KALDI_ASSERT(false);
+  }
+
+  void ComputeFeaturesBatched(const LaneDesc *lanes, int32_t n_lanes,
+                              const CuMatrixBase<BaseFloat> &cu_wave_in,
+                              BaseFloat sample_freq, BaseFloat vtln_warp,
+                              CuMatrix<BaseFloat> *cu_feats_out);
+
+  CudaOnlineBatchedSpectralFeatures(const CudaSpectralFeatureOptions &opts,
+                                    int32_t max_chunk_frames,
+                                    int32_t num_channels, int32_t max_lanes);
+  ~CudaOnlineBatchedSpectralFeatures();
+  CudaSpectralFeatureOptions cumfcc_opts_;
+  int32 Dim()
+  // The dimension of the output is different for MFCC and Fbank.
+  // This returns the appropriate value depending on the feature
+  // extraction algorithm
+  {
+    if (cumfcc_opts_.feature_type == MFCC) return MfccComputer::Dim();
+    // If we're running fbank, we need to set the dimension right
+    else
+      return cumfcc_opts_.mfcc_opts.mel_opts.num_bins +
+             (cumfcc_opts_.mfcc_opts.use_energy ? 1 : 0);
+  }
+
+ private:
+
+  void ExtractWindowsBatched(const LaneDesc *lanes, int32_t num_lanes,
+                             const CuMatrixBase<BaseFloat> &wave);
+
+  void UpdateStashBatched(const LaneDesc *lanes, int32_t num_lanes,
+                          const CuMatrixBase<BaseFloat> &wave);
+
+  void ProcessWindowsBatched(const LaneDesc *lanes, int32_t num_lanes,
+                             const FrameExtractionOptions &opts,
+                             CuMatrixBase<BaseFloat> *log_energy_pre_window);
+
+  void ComputeFinalFeaturesBatched(const LaneDesc *lanes, int32_t num_lanes,
+                                   BaseFloat vtln_wrap,
+                                   CuMatrix<BaseFloat> *cu_signal_log_energy,
+                                   CuMatrix<BaseFloat> *cu_features);
+
+  CuVector<float> cu_lifter_coeffs_;
+  CuMatrix<BaseFloat> cu_windows_;
+  CuMatrix<float> tmp_window_, cu_mel_energies_;
+  CuMatrix<float> cu_dct_matrix_;
+  CuMatrix<BaseFloat> stash_;
+  CuMatrix<BaseFloat> power_spectrum_;
+  CuMatrix<BaseFloat> raw_log_energies_;
+
+  int frame_length_, padded_length_, fft_length_, fft_batch_size_;
+  cufftHandle plan_;
+  CudaFeatureWindowFunction window_function_;
+
+  int bin_size_;
+  int32 *offsets_, *sizes_;
+  CuVector<float> *cu_vecs_;
+  float **vecs_;
+
+  // for sanity checking cufft
+  int32_t stride_, tmp_stride_;
+
+  int32_t max_chunk_frames_;
+  int32_t num_channels_;
+  int32_t max_lanes_;
+};
+}  // namespace kaldi
+
+#endif
diff --git a/src/cudafeat/feature-online-cmvn-cuda.h b/src/cudafeat/feature-online-cmvn-cuda.h
index 729467a7a88..f927249afa5 100644
--- a/src/cudafeat/feature-online-cmvn-cuda.h
+++ b/src/cudafeat/feature-online-cmvn-cuda.h
@@ -52,7 +52,7 @@ class CudaOnlineCmvn {
 
  private:
   const OnlineCmvnOptions &opts_;
-  const CudaOnlineCmvnState &cmvn_state_;
+  const CudaOnlineCmvnState cmvn_state_;
 };
 }
 
diff --git a/src/cudafeat/feature-spectral-cuda.cu b/src/cudafeat/feature-spectral-cuda.cu
index 18b3eed980a..8d01a9ac7d4 100644
--- a/src/cudafeat/feature-spectral-cuda.cu
+++ b/src/cudafeat/feature-spectral-cuda.cu
@@ -276,7 +276,7 @@ __device__ inline int32 FirstSampleOfFrame(int32 frame, int32 frame_shift,
 __global__ void extract_window_kernel(
     int32 frame_shift, int32 frame_length, int32 frame_length_padded,
     int32 window_size, bool snip_edges, int32_t sample_offset,
-    const BaseFloat __restrict__ *wave, int32 wave_dim,
+    const BaseFloat * __restrict__ wave, int32 wave_dim,
     BaseFloat *__restrict__ windows, int32_t wlda) {
   int frame = blockIdx.x;
   int tidx = threadIdx.x;
@@ -503,8 +503,8 @@ void CudaSpectralFeatures::ComputeFinalFeatures(int num_frames, BaseFloat vtln_w
                             kTrans, 0.0);
 
      apply_lifter_and_floor_energy<<<num_frames, CU1DBLOCK>>>(
-         cu_features->NumRows(), cu_features->NumCols(), 
-	 mfcc_opts.cepstral_lifter, mfcc_opts.use_energy, 
+         cu_features->NumRows(), cu_features->NumCols(),
+	 mfcc_opts.cepstral_lifter, mfcc_opts.use_energy,
 	 mfcc_opts.energy_floor, cu_signal_log_energy->Data(),
          cu_lifter_coeffs_.Data(), cu_features->Data(), cu_features->Stride());
   } else {
diff --git a/src/cudafeat/lane-desc.h b/src/cudafeat/lane-desc.h
new file mode 100644
index 00000000000..125bd7f8e79
--- /dev/null
+++ b/src/cudafeat/lane-desc.h
@@ -0,0 +1,53 @@
+// cudafeat/lane-desc.h
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CUDAFEAT_LANE_DESC_H_
+#define CUDAFEAT_LANE_DESC_H_
+
+#include <cstdint>
+
+namespace kaldi {
+
+typedef int32_t ChannelId;
+
+// The description for a single channel.
+// A vector of these will be passed into components to
+// control which channels are executed.
+struct LaneDesc {
+  ChannelId channel;
+
+  // number of samples in this chunk
+  int32_t num_chunk_samples;
+
+  // current sample for this chunk
+  int32_t current_sample;
+
+  // number of frames in this chunk.
+  int32_t num_chunk_frames;
+
+  // current frame for this chunk
+  int32_t current_frame;
+
+  // is this the last chunk
+  int32_t last;
+
+  // is this the first chunk
+  int32_t first;
+};
+
+}  // namespace kaldi
+#endif
diff --git a/src/cudafeat/online-batched-feature-pipeline-cuda.cc b/src/cudafeat/online-batched-feature-pipeline-cuda.cc
new file mode 100644
index 00000000000..1c0b84e8f85
--- /dev/null
+++ b/src/cudafeat/online-batched-feature-pipeline-cuda.cc
@@ -0,0 +1,201 @@
+// cudafeat/online-batched-feature-pipeline-cuda.cc
+
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cudafeat/online-batched-feature-pipeline-cuda.h"
+
+#if HAVE_CUDA == 1
+#include <nvToolsExt.h>
+#endif
+
+namespace kaldi {
+
+OnlineBatchedFeaturePipelineCuda::OnlineBatchedFeaturePipelineCuda(
+    const OnlineNnet2FeaturePipelineConfig &config,
+    int32_t max_chunk_size_samples, int32_t max_lanes, int32_t num_channels)
+    : info_(config),
+      cmvn_(NULL),
+      max_chunk_size_samples_(max_chunk_size_samples),
+      max_lanes_(max_lanes),
+      num_channels_(num_channels) {
+  spectral_feat_ = NULL;
+  cmvn_ = NULL;
+  ivector_ = NULL;
+
+  // Temporary to get frame extraction options
+  if (info_.feature_type == "mfcc") {
+    MfccComputer computer(info_.mfcc_opts);
+    frame_opts_ = computer.GetFrameOptions();
+  } else if (info_.feature_type == "fbank") {
+    FbankComputer computer(info_.fbank_opts);
+    frame_opts_ = computer.GetFrameOptions();
+  } else {
+    // Which ever base feature was requested is not currently supported
+    KALDI_ASSERT(false);
+  }
+
+  // compute maximum chunk size for a given number of samples
+  // round up because there may be additional context provided
+  int32_t shift = frame_opts_.WindowShift();
+  max_chunk_size_frames_ = (max_chunk_size_samples_ + shift - 1) / shift;
+
+  if (info_.feature_type == "mfcc") {
+    spectral_feat_ = new CudaOnlineBatchedSpectralFeatures(
+        info_.mfcc_opts, max_chunk_size_frames_, num_channels_, max_lanes_);
+  } else if (info_.feature_type == "fbank") {
+    spectral_feat_ = new CudaOnlineBatchedSpectralFeatures(
+        info_.fbank_opts, max_chunk_size_frames_, num_channels_, max_lanes_);
+  } else {
+    // Which ever base feature was requested is not currently supported
+    KALDI_ASSERT(false);
+  }
+
+  if (info_.use_cmvn) {
+    KALDI_ASSERT(info_.global_cmvn_stats_rxfilename != "");
+
+    Matrix<double> global_cmvn_stats;
+    ReadKaldiObject(info_.global_cmvn_stats_rxfilename, &global_cmvn_stats);
+
+    OnlineCmvnState cmvn_state(global_cmvn_stats);
+    CudaOnlineCmvnState cu_cmvn_state(cmvn_state);
+
+    // TODO do we want to parameterize stats coarsening factor?
+    // Setting this likely won't impact performance or accuracy
+    // but will improve memory usage.  It's unclear where we
+    // would want to register this parameter though.
+    cmvn_ =
+        new CudaOnlineBatchedCmvn(info_.cmvn_opts, cu_cmvn_state, FeatureDim(),
+                                  max_chunk_size_frames_, num_channels_, 1);
+  }
+
+  if (info_.use_ivectors) {
+    OnlineIvectorExtractionConfig ivector_extraction_opts;
+    ReadConfigFromFile(config.ivector_extraction_config,
+                       &ivector_extraction_opts);
+    info_.ivector_extractor_info.Init(ivector_extraction_opts);
+
+    ivector_ = new BatchedIvectorExtractorCuda(ivector_extraction_opts,
+                                               max_chunk_size_frames_,
+                                               max_lanes_, num_channels_);
+  }
+
+  current_samples_stash_ = new int32_t[num_channels_];
+
+  // allocated pinned memory for storing channel desc
+  cudaMallocHost(&h_lanes_, sizeof(LaneDesc) * max_lanes_);
+
+  // allocate device memory
+  lanes_ =
+      (LaneDesc *)CuDevice::Instantiate().Malloc(sizeof(LaneDesc) * max_lanes_);
+
+  cudaEventCreateWithFlags(&event_, cudaEventDisableTiming);
+}
+
+OnlineBatchedFeaturePipelineCuda::~OnlineBatchedFeaturePipelineCuda() {
+  if (spectral_feat_ != NULL) delete spectral_feat_;
+  if (cmvn_ != NULL) delete cmvn_;
+  if (ivector_ != NULL) delete ivector_;
+
+  cudaFreeHost(h_lanes_);
+
+  delete[] current_samples_stash_;
+
+  CuDevice::Instantiate().Free(lanes_);
+
+  cudaEventDestroy(event_);
+}
+
+void OnlineBatchedFeaturePipelineCuda::ComputeFeaturesBatched(
+    int32_t num_lanes, const std::vector<ChannelId> &channels,
+    const std::vector<int32_t> &num_chunk_samples,
+    const std::vector<bool> &first, const std::vector<bool> &last,
+    BaseFloat sample_freq, const CuMatrixBase<BaseFloat> &cu_waves,
+    CuMatrix<BaseFloat> *input_features, CuVector<BaseFloat> *ivector_features,
+    std::vector<int32_t> *num_frames_computed) {
+  nvtxRangePushA("OnlineBatchedFeaturePipelineCuda::ComputeFeaturesBatched");
+  KALDI_ASSERT(num_lanes <= max_lanes_);
+  KALDI_ASSERT(num_lanes <= num_frames_computed->size());
+
+  // Ensure that h_lanes_ is consumed before overwriting.
+  cudaEventSynchronize(event_);
+
+  /// for each lane copy input into pinned memory
+  for (int32_t lane = 0; lane < num_lanes; lane++) {
+    ChannelId channel = channels[lane];
+    KALDI_ASSERT(channel < num_channels_);
+    KALDI_ASSERT(num_chunk_samples[lane] <= max_chunk_size_samples_);
+
+    LaneDesc desc;
+    desc.channel = channel;
+    desc.last = last[lane];
+    desc.first = first[lane];
+
+    desc.current_sample = 0;
+    if (!desc.first)
+      // If not the first chunk then grab sample count from stash
+      desc.current_sample = current_samples_stash_[channel];
+
+    desc.num_chunk_samples = num_chunk_samples[lane];
+    desc.current_frame = NumFrames(desc.current_sample, frame_opts_, false);
+
+    // Compute total number of samples and frames
+    int32_t num_samples = desc.current_sample + desc.num_chunk_samples;
+    int32_t num_frames = NumFrames(num_samples, frame_opts_, desc.last);
+
+    desc.num_chunk_frames = num_frames - desc.current_frame;
+
+    // store desc in lane array
+    h_lanes_[lane] = desc;
+
+    // update current_sames stash
+    current_samples_stash_[channel] =
+        desc.current_sample + desc.num_chunk_samples;
+
+    // write how many frames will be computed to output array
+    (*num_frames_computed)[lane] = desc.num_chunk_frames;
+  }
+
+  cudaMemcpyAsync(lanes_, h_lanes_, sizeof(LaneDesc) * num_lanes,
+                  cudaMemcpyHostToDevice, cudaStreamPerThread);
+
+  // record event to know when copy is finished so that we don't overwrite
+  // pinned array
+  cudaEventRecord(event_, cudaStreamPerThread);
+
+  if (info_.feature_type == "mfcc" || info_.feature_type == "fbank") {
+    // Fbank called via the MFCC codepath
+    // MFCC
+    float vtln_warp = 1.0;
+    spectral_feat_->ComputeFeaturesBatched(
+        lanes_, num_lanes, cu_waves, sample_freq, vtln_warp, input_features);
+  } else {
+    KALDI_ASSERT(false);
+  }
+
+  if (info_.use_cmvn) {
+    cmvn_->ComputeFeaturesBatched(num_lanes, lanes_, *input_features,
+                                  input_features);
+  }
+  // Ivector
+  if (info_.use_ivectors && ivector_features != NULL) {
+    ivector_->GetIvectors(*input_features, ivector_features, lanes_, num_lanes);
+  }
+
+  nvtxRangePop();
+}
+
+}  // namespace kaldi
diff --git a/src/cudafeat/online-batched-feature-pipeline-cuda.h b/src/cudafeat/online-batched-feature-pipeline-cuda.h
new file mode 100644
index 00000000000..5184cd23605
--- /dev/null
+++ b/src/cudafeat/online-batched-feature-pipeline-cuda.h
@@ -0,0 +1,133 @@
+// cudafeat/online-batched-feature-pipeline-cuda.h
+
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_CUDAFEAT_ONLINE_BATCHED_FEATURE_PIPELINE_CUDA_H_
+#define KALDI_CUDAFEAT_ONLINE_BATCHED_FEATURE_PIPELINE_CUDA_H_
+
+#include <deque>
+#include <string>
+#include <vector>
+
+#include "base/kaldi-error.h"
+#include "feat/feature-window.h"
+#include "matrix/matrix-lib.h"
+#include "util/common-utils.h"
+
+#include "online2/online-nnet2-feature-pipeline.h"
+
+#include "cudafeat/feature-online-batched-cmvn-cuda.h"
+#include "cudafeat/feature-online-batched-ivector-cuda.h"
+#include "cudafeat/feature-online-batched-spectral-cuda.h"
+#include "cudafeat/lane-desc.h"
+
+namespace kaldi {
+
+class OnlineBatchedFeaturePipelineCuda {
+ public:
+  explicit OnlineBatchedFeaturePipelineCuda(
+      const OnlineNnet2FeaturePipelineConfig &config, int32_t max_chunk_size,
+      int32_t max_lanes, int32_t num_channels);
+
+  // Computes features and ivectors for a batched chunk of audio data.
+  // Upon exit this call guarentees that all input data is read
+  // allowing the input arrays to be overwritten.
+  //
+  // All work is submitted to the per-thread-default stream and no
+  // manual synchronization occurs outside of the per-thread-default-stream.
+  //
+  // if (num_lanes < max_lanes) only the valid lanes will be read and only
+  // the valid lanes will be output.  Data in other lanes is undefined.
+  //
+  // if (num_chunk_samples[i] < max_chunk_size) only the valid samples
+  // will be read by that lane.
+  //
+  // inputs:
+  //   num_lanes:  number of lanes to compute featurs for
+  //   channels:  lane vector specifying the channel for each lane
+  //   num_chunk_samples:  lane vector specifying number of samples in each lane
+  //     note: this cannot exceed max_chunk_size_samples
+  //   first:  lane vector specifying if this is the first chunk of data
+  //   last:  lane vector specifying if this is the last chunk of data
+  //   sample_freq:  model sample frequency
+  //   cu_waves:  lane matrix of input wave data
+  //     with num rows equal to max_lanes
+  //     and num cols equal to max_chunk_size
+  // outputs:
+  //   input_features:  lane matrix of output of base features
+  //     with num rows equal to max_lanes * max_chunk_size
+  //     and num_cols equal to feat_dim
+  //   ivector_features:  lane vector of output of ivectors for chunk
+  //     with Dim = max_lanes * ivector_dim.
+  //     ivectors are output in order ivector_lane0, ivector_lane1, etc
+  //     if ivector_features is null they will not be computed
+  //   num_frames_computed:  output vector containing the number of
+  //     frames to be computed for each lane.
+
+  void ComputeFeaturesBatched(int32_t num_lanes,
+                              const std::vector<ChannelId> &channels,
+                              const std::vector<int32_t> &num_chunk_samples,
+                              const std::vector<bool> &first,
+                              const std::vector<bool> &last,
+                              BaseFloat sample_freq,
+                              const CuMatrixBase<BaseFloat> &cu_waves,
+                              CuMatrix<BaseFloat> *input_features,
+                              CuVector<BaseFloat> *ivector_features,
+                              std::vector<int32_t> *num_frames_computed);
+
+  ~OnlineBatchedFeaturePipelineCuda();
+
+  // Returns the maximum number of frames in a single chunk.
+  // This should be used to size the input_features array that is
+  // passed into ComputeFeaturesBatched
+  int32_t GetMaxChunkFrames() { return max_chunk_size_frames_; }
+
+  int32_t FeatureDim() { return spectral_feat_->Dim(); }
+  int32_t IvectorDim() {
+    if (ivector_)
+      return ivector_->IvectorDim();
+    else
+      return 0;
+  }
+
+  const FrameExtractionOptions &GetFrameOptions() { return frame_opts_; }
+
+ private:
+  OnlineNnet2FeaturePipelineInfo info_;
+
+  CudaOnlineBatchedSpectralFeatures *spectral_feat_;
+  CudaOnlineBatchedCmvn *cmvn_;
+  BatchedIvectorExtractorCuda *ivector_;
+  FrameExtractionOptions frame_opts_;
+
+  int32_t max_chunk_size_samples_;  // The maximum size of a chunk in samples
+  int32_t max_chunk_size_frames_;   // The maximum size of a chunk in frames
+  int32_t max_lanes_;               // The maximum number of lanes
+  int32_t num_channels_;            // The maximum number of channels
+
+  // channel array for stashing sample count
+  int32_t *current_samples_stash_;
+  ChannelId *channels_;
+
+  // Host and Device array of lane descriptions
+  LaneDesc *h_lanes_, *lanes_;
+
+  cudaEvent_t event_;
+};
+}  // namespace kaldi
+
+#endif  // KALDI_CUDAFEAT_ONLINE_CUDA_BATCHED_FEATURE_EXTRACTOR_H_
diff --git a/src/cudafeat/online-ivector-feature-cuda-kernels.cu b/src/cudafeat/online-ivector-feature-cuda-kernels.cu
index 227f49deb63..12d9b071f59 100644
--- a/src/cudafeat/online-ivector-feature-cuda-kernels.cu
+++ b/src/cudafeat/online-ivector-feature-cuda-kernels.cu
@@ -158,36 +158,49 @@ __global__ void get_matrix_sum_double_buffer_kernel(int32_t b, int32_t num_rows,
   }
 }
 
-// This kernel updates the linear and quadradic terms.
-// It does not support a previous weight coming in and would need to be updated
-// for online decoding.
+// This kernel updates the diagonal of the quadratic terms and
+// element zero of the linear term. Code is meant to match
+// ivector_extractor.cc:
+//   double old_num_frames = num_frames_,
+//          new_num_frames = num_frames_ + tot_weight;
+//   double old_prior_scale = std::max(old_num_frames, max_count_) / max_count_,
+//          new_prior_scale = std::max(new_num_frames, max_count_) / max_count_;
+//   double prior_scale_change = new_prior_scale - old_prior_scale;
+//   if (prior_scale_change != 0.0) {
+//     linear_term_(0) += prior_offset_ * prior_scale_change;
+//     quadratic_term_.AddToDiag(prior_scale_change);
+//   }
+//Extra 1.0f on prior_scale_change is to match ivector_extractor.cc:
+//  linear_term_(0) += prior_offset;
+//  quadratic_term_.AddToDiag(1.0);
 __global__ void update_linear_and_quadratic_terms_kernel(
-    int32_t n, float prior_offset, float* cur_tot_weight, int32_t max_count,
-    float* quadratic, float* linear) {
-  float val = 1.0f;
+    int32_t n, float old_num_frames, float prior_offset, float* cur_tot_weight, 
+    int32_t max_count, float* quadratic, float* linear) {
   float cur_weight = *cur_tot_weight;
 
-  if (max_count > 0.0f) {
-    float new_scale = max((float)cur_weight, (float)max_count) / max_count;
+  float new_num_frames = old_num_frames + cur_weight;
+  float prior_scale_change = 1.0f;
 
-    float prior_scale_change = new_scale - 1.0f;
-    val += prior_scale_change;
+  if(max_count!=0.0f) {
+    float old_prior_scale = max(old_num_frames, (float)max_count) / max_count;
+    float new_prior_scale = max(new_num_frames, (float)max_count) / max_count;
+    prior_scale_change += new_prior_scale - old_prior_scale;
   }
 
   for (int32_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n;
-       i += blockDim.x * gridDim.x) {
+     i += blockDim.x * gridDim.x) {
     int32_t diag_idx = ((i + 1) * (i + 2) / 2) - 1;
-    quadratic[diag_idx] += val;
+    quadratic[diag_idx] += prior_scale_change;
   }
 
-  if (threadIdx.x == 0) {
-    linear[0] += val * prior_offset;
+  if (threadIdx.x == 0 && blockIdx.x==0) {
+    linear[0] += prior_offset * prior_scale_change;  
   }
 }
 
 void batched_gemv_reduce(int batch_size, int rows, int cols, int A_stride,
                          const float* AT, int B_stride, const float* B,
-                         const float* y, float* C) {
+                         float* C) {
   batched_gemv_reduce_kernel<<<batch_size, dim3(32, 32)>>>(
       rows, cols, AT, A_stride, B, B_stride, C);
   CU_SAFE_CALL(cudaGetLastError());
@@ -204,13 +217,15 @@ void splice_features(int32_t num_frames, int32_t feat_dim, int32_t left,
   CU_SAFE_CALL(cudaGetLastError());
 }
 
-void update_linear_and_quadratic_terms(int32_t n, float prior_offset,
+void update_linear_and_quadratic_terms(int32_t n, float old_num_frames,
+                                       float prior_offset,
                                        float* cur_tot_weight, int32_t max_count,
                                        float* quadratic, float* linear) {
   // Only using 1 CTA here  for now as the updates are tiny and this lets us
   // use syncthreads as a global barrier.
   update_linear_and_quadratic_terms_kernel<<<1, 1024>>>(
-      n, prior_offset, cur_tot_weight, max_count, quadratic, linear);
+      n, old_num_frames, prior_offset, cur_tot_weight, max_count, quadratic, 
+      linear);
   CU_SAFE_CALL(cudaGetLastError());
 }
 
diff --git a/src/cudafeat/online-ivector-feature-cuda-kernels.h b/src/cudafeat/online-ivector-feature-cuda-kernels.h
index 62407b77b2b..5dfe4030e70 100644
--- a/src/cudafeat/online-ivector-feature-cuda-kernels.h
+++ b/src/cudafeat/online-ivector-feature-cuda-kernels.h
@@ -20,13 +20,14 @@
 namespace kaldi {
 void batched_gemv_reduce(int batch_size, int rows, int cols, int A_stride,
                          const float *AT, int B_stride, const float *B,
-                         const float *y, float *C);
+                         float *C);
 
 void splice_features(int32_t num_frames, int32_t feat_dim, int32_t left,
                      int32_t size, const float *feats, int32_t ldf,
                      float *sfeats, int32_t lds);
 
-void update_linear_and_quadratic_terms(int32_t n, float prior_offset_,
+void update_linear_and_quadratic_terms(int32_t n, float old_num_frames,
+                                       float prior_offset_,
                                        float *cur_tot_weight, int32_t max_count,
                                        float *quadratic, float *linear);
 
diff --git a/src/cudafeat/online-ivector-feature-cuda.cc b/src/cudafeat/online-ivector-feature-cuda.cc
index 6530643ccb7..9a0a3444fb4 100644
--- a/src/cudafeat/online-ivector-feature-cuda.cc
+++ b/src/cudafeat/online-ivector-feature-cuda.cc
@@ -38,14 +38,17 @@ void IvectorExtractorFastCuda::GetIvector(const CuMatrixBase<BaseFloat> &feats,
   nvtxRangePushA("GetIvector");
   CuMatrix<BaseFloat> posteriors, X;
   CuVector<BaseFloat> gamma;
+  int rows = feats.NumRows();
+  int cols = feats.NumCols();
+
+  int lda_rows = cu_lda_.NumRows();
+  int lda_cols = cu_lda_.NumCols();
 
   // normalized pipeline
-  CuMatrix<BaseFloat> lda_feats_normalized(feats.NumRows(), feats.NumCols(),
-                                           kUndefined);
+  CuMatrix<BaseFloat> lda_feats_normalized(rows, lda_rows, kUndefined);
   {
     CudaOnlineCmvn cmvn(info_.cmvn_opts, naive_cmvn_state_);
-    CuMatrix<BaseFloat> cmvn_feats(feats.NumRows(), feats.NumCols(),
-                                   kUndefined);
+    CuMatrix<BaseFloat> cmvn_feats(rows, cols, kUndefined);
     CuMatrix<BaseFloat> spliced_feats_normalized;
 
     // Normalize
@@ -55,12 +58,30 @@ void IvectorExtractorFastCuda::GetIvector(const CuMatrixBase<BaseFloat> &feats,
     SpliceFeats(cmvn_feats, &spliced_feats_normalized);
 
     // Transform by LDA matrix
-    lda_feats_normalized.AddMatMat(1.0, spliced_feats_normalized, kNoTrans,
-                                   cu_lda_, kTrans, 0.0);
+    if (spliced_feats_normalized.NumCols() == lda_cols) {
+      // Linear transformation
+      lda_feats_normalized.AddMatMat(1.0, spliced_feats_normalized, kNoTrans,
+                                     cu_lda_, kTrans, 0.0);
+    } else if (spliced_feats_normalized.NumCols() + 1 == lda_cols) {
+      // Affine transformation
+
+      // create submatrix which removes last column
+      CuSubMatrix<BaseFloat> cu_lda(cu_lda_, 0, lda_rows, 0, lda_cols - 1);
+  
+      // Add offset
+      lda_feats_normalized.CopyRowsFromVec(offset_);
+      lda_feats_normalized.AddMatMat(1.0, spliced_feats_normalized, kNoTrans,
+                                   cu_lda, kTrans, 1.0);
+
+    } else {
+      KALDI_ERR << "Dimension mismatch: source features have dimension "
+                << spliced_feats_normalized.NumCols() << " and LDA #cols is " 
+                << lda_cols;
+    }
   }
 
   // non-normalized pipeline
-  CuMatrix<BaseFloat> lda_feats(feats.NumRows(), feats.NumCols(), kUndefined);
+  CuMatrix<BaseFloat> lda_feats(rows, lda_rows, kUndefined);
   {
     CuMatrix<BaseFloat> spliced_feats;
 
@@ -68,7 +89,24 @@ void IvectorExtractorFastCuda::GetIvector(const CuMatrixBase<BaseFloat> &feats,
     SpliceFeats(feats, &spliced_feats);
 
     // Transform by LDA matrix
-    lda_feats.AddMatMat(1.0, spliced_feats, kNoTrans, cu_lda_, kTrans, 0.0);
+    if (spliced_feats.NumCols() == lda_cols) {
+      // Linear transformation
+      lda_feats.AddMatMat(1.0, spliced_feats, kNoTrans, cu_lda_, kTrans, 0.0);
+    } else if (spliced_feats.NumCols() + 1 == lda_cols) {
+      // Affine transformation
+
+      // create submatrix which removes last column
+      CuSubMatrix<BaseFloat> cu_lda(cu_lda_, 0, lda_rows, 0, lda_cols - 1);
+      
+      // Add offset
+      lda_feats.CopyRowsFromVec(offset_);
+      lda_feats.AddMatMat(1.0, spliced_feats, kNoTrans, cu_lda, kTrans, 1.0);
+
+    } else {
+      KALDI_ERR << "Dimension mismatch: source features have dimension "
+                << spliced_feats.NumCols() << " and LDA #cols is " 
+                << lda_cols;
+    }
   }
 
   // based on normalized feats
@@ -216,17 +254,21 @@ void IvectorExtractorFastCuda::ComputeIvectorFromStats(
 
   batched_gemv_reduce(num_gauss_, feat_dim_, ivector_dim_,
                       ie_Sigma_inv_M_f_.Stride(), ie_Sigma_inv_M_f_.Data(),
-                      X.Stride(), X.Data(), gamma.Data(), linear.Data());
+                      X.Stride(), X.Data(), linear.Data());
 
   CuSubVector<float> q_vec(quadratic.Data(),
                            ivector_dim_ * (ivector_dim_ + 1) / 2);
   q_vec.AddMatVec(1.0f, ie_U_, kTrans, gamma, 0.0f);
 
+  // TODO for online this needs to be stored and passed forward
+  // For offline this is always zero.
+  float old_num_frames = 0.0f;
+
   // compute and apply prior offset to linear and quadraditic terms
   // offset tot_post_ by correct buffer
-  update_linear_and_quadratic_terms(quadratic.NumRows(), prior_offset_,
-                                    tot_post_.Data() + b_, info_.max_count,
-                                    quadratic.Data(), linear.Data());
+  update_linear_and_quadratic_terms(
+      quadratic.NumRows(), old_num_frames, prior_offset_, tot_post_.Data() + b_,
+      info_.max_count, quadratic.Data(), linear.Data());
   // advance double buffer
   b_ = (b_ + 1) % 2;
 
@@ -238,38 +280,72 @@ void IvectorExtractorFastCuda::ComputeIvectorFromStats(
   // linear system.  So just use choleskey's to solve for a single ivector
   // Equation being solved: quadratic * ivector = linear
 
+#if CUDA_VERSION >= 9010
+  // Comment this out to use LU decomposistion instead.
+  // CHOLESKY's should be faster and more accurate so this is preffered.
+#define CHOLESKY
   int nrhs = 1;
-
   // Forming new non-SP matrix for cusolver.
   CuMatrix<float> A(quadratic);
 
-#if CUDA_VERSION >= 9010
+#ifdef CHOLESKY
   // query temp buffer size
   int L_work;
   CUSOLVER_SAFE_CALL(
       cusolverDnSpotrf_bufferSize(GetCusolverDnHandle(), CUBLAS_FILL_MODE_LOWER,
-                                  ivector_dim_, A.Data(), A.Stride(), &L_work));
+                                  A.NumRows(), A.Data(), A.Stride(), &L_work));
 
   // allocate temp buffer
-  float *workspace =
-      static_cast<float *>(CuDevice::Instantiate().Malloc(L_work));
+  float *workspace = static_cast<float *>(
+      CuDevice::Instantiate().Malloc(L_work * sizeof(float)));
 
   // perform factorization
   CUSOLVER_SAFE_CALL(cusolverDnSpotrf(
-      GetCusolverDnHandle(), CUBLAS_FILL_MODE_LOWER, ivector_dim_, A.Data(),
+      GetCusolverDnHandle(), CUBLAS_FILL_MODE_LOWER, A.NumRows(), A.Data(),
       A.Stride(), workspace, L_work, d_info_));
 
   // solve for rhs
   CUSOLVER_SAFE_CALL(cusolverDnSpotrs(
-      GetCusolverDnHandle(), CUBLAS_FILL_MODE_LOWER, ivector_dim_, nrhs,
+      GetCusolverDnHandle(), CUBLAS_FILL_MODE_LOWER, A.NumRows(), nrhs,
       A.Data(), A.Stride(), ivector->Data(), ivector_dim_, d_info_));
 
   CuDevice::Instantiate().Free(workspace);
 #else
-  KALDI_ERR << "Online Ivectors in CUDA is not supported by your CUDA version. "
-            << "Upgrade to CUDA 9.1 or later";
+  // query temp buffer size
+  int L_work;
+  CUSOLVER_SAFE_CALL(
+      cusolverDnSgetrf_bufferSize(GetCusolverDnHandle(), A.NumRows(),
+                                  A.NumCols(), A.Data(), A.Stride(), &L_work));
+
+  // allocate temp buffer
+  float *workspace = static_cast<float *>(
+      CuDevice::Instantiate().Malloc(L_work * sizeof(float)));
+  int *devIpiv =
+      static_cast<int *>(CuDevice::Instantiate().Malloc(L_work * sizeof(int)));
+
+  // perform factorization
+  CUSOLVER_SAFE_CALL(cusolverDnSgetrf(GetCusolverDnHandle(), A.NumRows(),
+                                      A.NumCols(), A.Data(), A.Stride(),
+                                      workspace, devIpiv, d_info_));
+
+  // solve for rhs
+  CUSOLVER_SAFE_CALL(cusolverDnSgetrs(
+      GetCusolverDnHandle(), CUBLAS_OP_N, A.NumRows(), nrhs, A.Data(),
+      A.Stride(), devIpiv, ivector->Data(), ivector_dim_, d_info_));
+
+  CuDevice::Instantiate().Free(workspace);
+  CuDevice::Instantiate().Free(devIpiv);
+#endif
+#else
+  // Cuda version is too old for cu-solver.
+  // Use Kaldi built-in inversion routine.
+  quadratic.Invert();
+  CuVector<float> linear_tmp(linear);
+  ivector->Resize(ivector_dim_, kUndefined);
+  ivector->AddSpVec(1.0, quadratic, linear_tmp, 0.0);
 #endif
-  // remove prior
+
+  // remove prior from ivector
   CuSubVector<float> ivector0(*ivector, 0, 1);
   ivector0.Add(-prior_offset_);
 }
diff --git a/src/cudafeat/online-ivector-feature-cuda.h b/src/cudafeat/online-ivector-feature-cuda.h
index 9bb3e44f4d3..f6fe1e65cb9 100644
--- a/src/cudafeat/online-ivector-feature-cuda.h
+++ b/src/cudafeat/online-ivector-feature-cuda.h
@@ -45,6 +45,12 @@ class IvectorExtractorFastCuda {
     Read(config);
     cu_lda_.Resize(info_.lda_mat.NumRows(), info_.lda_mat.NumCols());
     cu_lda_.CopyFromMat(info_.lda_mat);
+
+    // The last col in the LDA matrix may be an affine offset
+    // copy that column to offset_ now.  This may or may not be used
+    // when getting the features later
+    offset_.Resize(cu_lda_.NumRows());
+    offset_.CopyColFromMat(cu_lda_, cu_lda_.NumCols() - 1);
     d_info_ = static_cast<int *>(CuDevice::Instantiate().Malloc(sizeof(int)));
   }
   ~IvectorExtractorFastCuda() {
@@ -110,6 +116,7 @@ class IvectorExtractorFastCuda {
   CuMatrix<BaseFloat> ubm_means_inv_vars_;
   CuMatrix<BaseFloat> ubm_inv_vars_;
   CuMatrix<BaseFloat> cu_lda_;
+  CuVector<BaseFloat> offset_;
   // extractor variables
   CuMatrix<BaseFloat> ie_U_;
 
diff --git a/src/cudafeatbin/Makefile b/src/cudafeatbin/Makefile
index 105ece3c67f..ae83bf59343 100644
--- a/src/cudafeatbin/Makefile
+++ b/src/cudafeatbin/Makefile
@@ -1,16 +1,27 @@
-
 all:
+		
 EXTRA_CXXFLAGS = -Wno-sign-compare
 include ../kaldi.mk
 
+ifeq ($(CUDA), true)
+ifneq ($(WITH_CUDADECODER), 0)
+
+# Make sure we have CUDA_ARCH from kaldi.mk,
+ifndef CUDA_ARCH
+  $(error CUDA_ARCH is undefined, run 'src/configure')
+endif
+
 LDFLAGS += $(CUDA_LDFLAGS)
 LDLIBS += $(CUDA_LDLIBS)
 
-BINFILES =
-
-ifeq ($(CUDA), true)
-  BINFILES += compute-mfcc-feats-cuda apply-cmvn-online-cuda compute-online-feats-cuda compute-fbank-feats-cuda
-endif
+BINFILES = compute-mfcc-feats-cuda \
+						apply-cmvn-online-cuda \
+						compute-online-feats-cuda \
+						compute-fbank-feats-cuda \
+						apply-batched-cmvn-online-cuda \
+						compute-mfcc-online-batched-cuda \
+            compute-fbank-online-batched-cuda \
+						compute-online-feats-batched-cuda
 
 OBJFILES =
 
@@ -24,4 +35,11 @@ ADDLIBS = ../cudafeat/kaldi-cudafeat.a ../online2/kaldi-online2.a  \
           ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \
           ../base/kaldi-base.a
 
+else
+all:
+		$(warning "Not building cudadecoder extension -- to build with it, configure with --with-cudadecoder[=true]")
+endif
+
+endif
+
 include ../makefiles/default_rules.mk
diff --git a/src/cudafeatbin/apply-batched-cmvn-online-cuda.cc b/src/cudafeatbin/apply-batched-cmvn-online-cuda.cc
new file mode 100644
index 00000000000..24e7cbd4a70
--- /dev/null
+++ b/src/cudafeatbin/apply-batched-cmvn-online-cuda.cc
@@ -0,0 +1,298 @@
+// online2bin/apply-batched-cmvn-online.cc
+
+// cudafeat/online-cuda-batched-feature-pipeline-kernels.h
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+#include <cuda_profiler_api.h>
+#endif
+
+#include <string>
+#include <vector>
+#include "base/kaldi-common.h"
+#include "cudafeat/feature-online-batched-cmvn-cuda.h"
+#include "feat/online-feature.h"
+#include "util/common-utils.h"
+
+using namespace kaldi;
+
+// This class stores data for input and output of this binary.
+struct UtteranceDataHandle {
+  std::string utt;
+  Matrix<BaseFloat> feats_in;
+  Matrix<BaseFloat> feats_out;
+  int32_t num_frames;
+
+  UtteranceDataHandle(const std::string &utt, Matrix<float> &feats)
+      : utt(utt), num_frames(feats.NumRows()) {
+    feats_out.Resize(feats.NumRows(), feats.NumCols(), kUndefined);
+    feats_in.Swap(&feats);
+  }
+};
+
+int main(int argc, char *argv[]) {
+  try {
+    typedef kaldi::int32 int32;
+    using namespace kaldi;
+    const char *usage =
+        "Apply online cepstral mean (and possibly variance) computation "
+        "online,\n"
+        "using the same code as used for online decoding in the 'new' setup "
+        "in\n"
+        "online2/ and online2bin/.'\n"
+        "The computation is done on the device in chunks that are batched. "
+        "spk2utt is not supported.\n"
+        "\n"
+        "Usage: apply-batched-cmvn-online-cuda [options] <global-cmvn-stats> "
+        "<feature-rspecifier> "
+        "<feature-wspecifier>\n"
+        "e.g. apply-batched-cmvn-online-cuda 'matrix-sum "
+        "scp:data/train/cmvn.scp -|' "
+        "data/train/split8/1/feats.scp ark:-\n";
+
+    int32_t num_channels = 200;
+    int32_t batch_size = 100;
+    int32_t chunk_length = 10000;
+    int32_t stats_coarsening_factor = 1;
+
+    int32_t feat_dim = -1;
+
+    ParseOptions po(usage);
+
+    po.Register("num-channels", &num_channels,
+                "The number of"
+                " channels used for compute");
+    po.Register("batch-size", &batch_size,
+                "The number of chunks from"
+                " audio cuts processed in a single batch");
+    po.Register("chunk-length", &chunk_length,
+                "The length of a chunk"
+                " of audio in frames that is processed at one time");
+    po.Register(
+        "stats-coarsening-factor", &stats_coarsening_factor,
+        " Coarsen CMVN stats by this factor.  This reduces memory and time. "
+        " But comes at the potential loss of accuracy.");
+
+    OnlineCmvnOptions cmvn_opts;
+
+    std::string spk2utt_rspecifier;
+    cmvn_opts.Register(&po);
+    CuDevice::RegisterDeviceOptions(&po);
+    RegisterCuAllocatorOptions(&po);
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 3) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    KALDI_ASSERT(num_channels >= batch_size);
+
+    g_cuda_allocator.SetOptions(g_allocator_options);
+    CuDevice::Instantiate().SelectGpuId("yes");
+    CuDevice::Instantiate().AllowMultithreading();
+
+    LaneDesc *d_lanes = (LaneDesc *)CuDevice::Instantiate().Malloc(
+        sizeof(LaneDesc) * batch_size);
+
+    std::string global_stats_rxfilename = po.GetArg(1),
+                feature_rspecifier = po.GetArg(2),
+                feature_wspecifier = po.GetArg(3);
+
+    // global_cmvn_stats helps us initialize to online CMVN to
+    // reasonable values at the beginning of the utterance.
+    Matrix<double> global_cmvn_stats;
+    ReadKaldiObject(global_stats_rxfilename, &global_cmvn_stats);
+
+    BaseFloatMatrixWriter feature_writer(feature_wspecifier);
+    int32 num_done = 0;
+    int64 tot_t = 0;
+
+    OnlineCmvnState cmvn_state(global_cmvn_stats);
+    CudaOnlineCmvnState cu_cmvn_state(cmvn_state);
+
+    std::vector<ChannelId> free_channels;
+
+    // list of audio handles to be processed
+    std::vector<UtteranceDataHandle> data_handles;
+    // maps currently active channels to their handle index
+    std::map<int, int> channel_to_handle_idx;
+    // Index of next unprocessed audio file
+    int not_done_idx = 0;
+
+    bool first = true;
+    // preload data for batching
+    SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
+    for (; !feature_reader.Done(); feature_reader.Next()) {
+      std::string utt = feature_reader.Key();
+      Matrix<BaseFloat> &feats = feature_reader.Value();
+      if (first == true) {
+        feat_dim = feats.NumCols();
+        first = false;
+      }
+
+      data_handles.emplace_back(utt, feats);
+    }
+
+    CudaOnlineBatchedCmvn cuda_cmvn(cmvn_opts, cu_cmvn_state, feat_dim,
+                                    chunk_length, num_channels,
+                                    stats_coarsening_factor);
+
+    CuMatrix<BaseFloat> d_batch_in(batch_size * chunk_length, feat_dim);
+    CuMatrix<BaseFloat> d_batch_out(batch_size * chunk_length, feat_dim);
+
+    std::vector<LaneDesc> lanes;
+
+    for (int i = 0; i < num_channels; i++) {
+      free_channels.push_back(i);
+    }
+
+    // A single pass through this loop will fill the
+    // batch with new work if any is available.
+    // Then process a single iteration of batched cmvn.
+    // At exit each process handle should have valid data
+    // in feats_out.
+    while (true) {
+      // This loop will fill the batch by pulling from the
+      // data_handles vector for new work
+      while (lanes.size() < batch_size && not_done_idx < data_handles.size()) {
+        UtteranceDataHandle &handle = data_handles[not_done_idx];
+        int32_t current_frame = 0;
+        int32_t num_frames = handle.num_frames;
+        int32_t num_chunk_frames = std::min(chunk_length, num_frames);
+
+        // grab a free channel
+        int32_t channel = free_channels.back();
+        free_channels.pop_back();
+
+        LaneDesc desc;
+        desc.channel = channel;
+        desc.current_frame = current_frame;
+        desc.num_chunk_frames = num_chunk_frames;
+        lanes.push_back(desc);
+
+        channel_to_handle_idx[channel] = not_done_idx;
+        not_done_idx++;
+      }
+
+      // No work in lanes, this means corpus is finished
+      if (lanes.size() == 0) break;
+
+      cudaMemcpyAsync(d_lanes, &lanes[0], sizeof(LaneDesc) * lanes.size(),
+                      cudaMemcpyHostToDevice, cudaStreamPerThread);
+
+      // This loop copies a slice from each active audio cut
+      // down to the device for processing
+      for (int lane = 0; lane < lanes.size(); lane++) {
+        LaneDesc &desc = lanes[lane];
+        int32_t channel = desc.channel;
+        int32_t current_frame = desc.current_frame;
+        int32_t num_chunk_frames = desc.num_chunk_frames;
+
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        // Create a submatrix for this slice of data
+        CuSubMatrix<BaseFloat> A(d_batch_in.Range(
+            lane * chunk_length, num_chunk_frames, 0, feat_dim));
+        SubMatrix<BaseFloat> B(handle.feats_in.Range(
+            current_frame, num_chunk_frames, 0, feat_dim));
+
+        // Copy slice down to the device
+        A.CopyFromMat(B);
+      }
+
+      // process batch
+      cuda_cmvn.ComputeFeaturesBatched(lanes.size(), d_lanes, d_batch_in,
+                                       &d_batch_out);
+
+      // At this time the batch is computed.  We now need to copy each slice
+      // into the appropriate output buffer
+      for (int lane = 0; lane < lanes.size(); lane++) {
+        LaneDesc &desc = lanes[lane];
+        ChannelId channel = desc.channel;
+
+        int32_t current_frame = desc.current_frame;
+        int32_t num_chunk_frames = desc.num_chunk_frames;
+
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        // Copy slice back up
+        CuSubMatrix<BaseFloat> A(d_batch_out.Range(
+            lane * chunk_length, num_chunk_frames, 0, feat_dim));
+        SubMatrix<BaseFloat> B(handle.feats_out.Range(
+            current_frame, num_chunk_frames, 0, feat_dim));
+
+        B.CopyFromMat(A);
+      }  // end copy to host loop
+
+      // For each lane check if compute is done.
+      // If completed, remove from channel list and
+      // free the channel.
+      for (int lane = 0; lane < lanes.size();) {
+        LaneDesc &desc = lanes[lane];
+        ChannelId channel = desc.channel;
+
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        // advance channel
+        desc.current_frame += desc.num_chunk_frames;
+        desc.num_chunk_frames =
+            std::min(chunk_length, handle.num_frames - desc.current_frame);
+
+        if (desc.current_frame == handle.num_frames) {
+          // free this channel
+          free_channels.push_back(channel);
+          // Move last lane to this lane
+          lanes[lane] = lanes.back();
+          lanes.pop_back();
+        } else {
+          // This lane is not finished so leave it alone
+          lane++;
+        }
+      }  // end check if done loop
+    }    // end while(true)
+
+    // output all utterances.  In an efficeint implementation
+    // this would be done on demand in a threaded manner.  This
+    // binary is purely for checking correctness and demonstrating
+    // usage and thus this type of optimization is not done.
+    for (int i = 0; i < data_handles.size(); i++) {
+      UtteranceDataHandle &handle = data_handles[i];
+
+      num_done++;
+      tot_t += handle.feats_out.NumRows();
+      feature_writer.Write(handle.utt, handle.feats_out);
+    }
+
+    CuDevice::Instantiate().Free(d_lanes);
+
+    KALDI_LOG << "Applied online CMVN to " << num_done << " files, or " << tot_t
+              << " frames.";
+
+    cudaDeviceSynchronize();
+    cudaProfilerStop();
+
+    return (num_done != 0 ? 0 : 1);
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/cudafeatbin/compute-fbank-online-batched-cuda.cc b/src/cudafeatbin/compute-fbank-online-batched-cuda.cc
new file mode 100644
index 00000000000..cf4c013ce2d
--- /dev/null
+++ b/src/cudafeatbin/compute-fbank-online-batched-cuda.cc
@@ -0,0 +1,382 @@
+// cudafeat/compute-fbank-online-batched-cuda.cc
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens, Levi Barnes
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+#include <cuda_profiler_api.h>
+#endif
+
+#include <string>
+#include <vector>
+
+#include "base/kaldi-common.h"
+#include "cudafeat/feature-online-batched-spectral-cuda.h"
+#include "cudamatrix/cu-matrix.h"
+#include "cudamatrix/cu-vector.h"
+#include "feat/feature-window.h"
+#include "feat/wave-reader.h"
+#include "util/common-utils.h"
+
+using namespace kaldi;
+
+// This class stores data for input and output for this binary.
+// We will read/write slices of this input/output in an online
+// fasion.
+struct UtteranceDataHandle {
+  std::string utt;
+  WaveData wave_data_in;
+  Matrix<BaseFloat> feats_out;
+  Vector<BaseFloat> ivector_out;
+  int32_t num_samples;
+  int32_t current_sample;
+  int32_t num_frames;
+  int32_t current_frame;
+
+  UtteranceDataHandle(const std::string &utt, WaveData &wave_data,
+                      const FrameExtractionOptions &opts, int32_t feat_dim)
+      : utt(utt) {
+    current_sample = 0;
+    current_frame = 0;
+    num_samples = wave_data.Data().NumCols();
+
+    wave_data_in = wave_data;
+
+    num_frames = NumFrames(num_samples, opts, true);
+    feats_out.Resize(num_frames, feat_dim);
+  }
+};
+
+int main(int argc, char *argv[]) {
+  try {
+    typedef kaldi::int32 int32;
+    using namespace kaldi;
+    const char *usage =
+        "Compute online fbank features.\n\n"
+        "This binary processes the audio in chunks of samples. "
+        "In addition, the computation is batched and done in CUDA. "
+        "This binary is not intended to demonstrate how to achieve "
+        "maximum performance.  Instead it is intended to demonstrate "
+        "how to use the class CudaOnlineBatchedSpectralFeatures and provide "
+        "a mechanism to test this class independently.\n\n"
+        "Usage: ./compute-online-batched-fbank-cuda --batch-size=50 "
+        "<wave-rspecifier> "
+        "<feature-wspecifier> \n";
+
+    int32_t num_channels = 50;
+    int32_t num_lanes = 10;
+    int32_t max_chunk_length_samples = 10000;
+    BaseFloat sample_freq = -1;
+    BaseFloat vtln_warp = 1.0;
+
+    ParseOptions po(usage);
+    FbankOptions feature_opts;
+    feature_opts.Register(&po);
+
+    po.Register("num-channels", &num_channels,
+                "The number of"
+                " channels used for compute");
+    po.Register("batch-size", &num_lanes,
+                "The number of chunks from"
+                " audio cuts processed in a single batch");
+    po.Register("chunk-length", &max_chunk_length_samples,
+                "The length of a chunk"
+                " of audio in terms of samples.");
+
+    CuDevice::RegisterDeviceOptions(&po);
+    RegisterCuAllocatorOptions(&po);
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    KALDI_ASSERT(num_channels >= num_lanes);
+
+    g_cuda_allocator.SetOptions(g_allocator_options);
+    CuDevice::Instantiate().SelectGpuId("yes");
+    CuDevice::Instantiate().AllowMultithreading();
+
+    LaneDesc *d_lanes = (LaneDesc *)CuDevice::Instantiate().Malloc(
+        sizeof(LaneDesc) * num_lanes);
+
+    std::string wav_rspecifier = po.GetArg(1),
+                feature_wspecifier = po.GetArg(2);
+
+    SequentialTableReader<WaveHolder> reader(wav_rspecifier);
+    BaseFloatMatrixWriter feature_writer;
+
+    if (!feature_writer.Open(feature_wspecifier)) {
+      KALDI_ERR << "Could not initialize feature_writer with wspecifier "
+                << feature_wspecifier;
+      exit(1);
+    }
+
+    std::vector<ChannelId> free_channels;
+
+    // list of audio handles to be processed
+    std::vector<UtteranceDataHandle> data_handles;
+    // maps currently active channels to their handle index
+    std::map<int32_t, int32_t> channel_to_handle_idx;
+    // Index of next unprocessed audio file
+    int32_t not_done_idx = 0;
+    int32_t num_done = 0, tot_t = 0;
+
+    // compute the maximum chunk length in frames
+    const FrameExtractionOptions &frame_opts = feature_opts.frame_opts;
+
+    // span between consective features in output
+    int32_t shift = frame_opts.WindowShift();
+    int32_t max_chunk_frames = (max_chunk_length_samples + shift - 1) / shift;
+
+    int32_t ldf = max_chunk_frames;
+
+    CudaOnlineBatchedSpectralFeatures fbank(feature_opts, max_chunk_frames,
+                                            num_channels, num_lanes);
+
+    int32_t feat_dim = fbank.Dim();
+
+    CuMatrix<BaseFloat> d_batch_wav_in(num_lanes, max_chunk_length_samples,
+                                       kUndefined, kStrideEqualNumCols);
+    CuMatrix<BaseFloat> d_batch_feats_out(num_lanes * ldf, feat_dim, kUndefined,
+                                          kStrideEqualNumCols);
+
+    // host matrices for staging data in pinned memory before copying down
+    Matrix<BaseFloat> h_batch_wav_in(num_lanes, max_chunk_length_samples,
+                                     kUndefined, kStrideEqualNumCols);
+    Matrix<BaseFloat> h_batch_feats_out(num_lanes * ldf, feat_dim, kUndefined,
+                                        kStrideEqualNumCols);
+
+    size_t wave_in_size =
+        num_lanes * max_chunk_length_samples * sizeof(BaseFloat);
+    size_t feats_out_size = num_lanes * ldf * feat_dim * sizeof(BaseFloat);
+    ;
+
+    cudaHostRegister(h_batch_wav_in.Data(), wave_in_size, 0);
+    cudaHostRegister(h_batch_feats_out.Data(), feats_out_size, 0);
+
+    CU_SAFE_CALL(cudaGetLastError());
+
+    std::vector<int32_t> num_frames_computed(num_lanes);
+
+    std::vector<LaneDesc> lanes;
+
+    for (int32_t i = 0; i < num_channels; i++) {
+      free_channels.push_back(i);
+    }
+
+    sample_freq = frame_opts.samp_freq;
+
+    double duration = 0.0;
+    // preload data for batching
+    for (; !reader.Done(); reader.Next()) {
+      std::string utt = reader.Key();
+      WaveData &wave_data = reader.Value();
+      duration += wave_data.Duration();
+      data_handles.emplace_back(utt, wave_data, frame_opts, feat_dim);
+    }
+
+    // Timing just compute, we don't want to include
+    // disc I/O in this timer.
+    Timer timer;
+    // A single pass through this loop will fill the
+    // batch with new work if any is available.
+    // Then process a single iteration of batched cmvn.
+    // At exit each process handle should have valid data
+    // in feats_out.
+    while (true) {
+      // This loop will fill the batch by pulling from the
+      // data_handles vector for new work
+      while (lanes.size() < num_lanes && not_done_idx < data_handles.size()) {
+        UtteranceDataHandle &handle = data_handles[not_done_idx];
+        int32_t num_samples = handle.num_samples;
+        num_samples = std::min(max_chunk_length_samples, num_samples);
+
+        // grab a free channel
+        int32_t channel = free_channels.back();
+        free_channels.pop_back();
+
+        LaneDesc desc;
+        desc.channel = channel;
+        desc.current_sample = 0;
+        desc.num_chunk_samples = num_samples;
+        desc.first = true;
+        desc.last = num_samples == handle.num_samples;
+        desc.current_frame = 0;
+        desc.num_chunk_frames = NumFrames(num_samples, frame_opts, desc.last);
+        lanes.push_back(desc);
+
+        channel_to_handle_idx[channel] = not_done_idx;
+        not_done_idx++;
+      }
+
+      // No work in lanes, this means corpus is finished
+      if (lanes.size() == 0) break;
+
+      cudaMemcpyAsync(d_lanes, &lanes[0], sizeof(LaneDesc) * lanes.size(),
+                      cudaMemcpyHostToDevice, cudaStreamPerThread);
+
+      // This loop copies a slice from each active audio cut
+      // down to the device for processing
+      for (int32_t lane = 0; lane < lanes.size(); lane++) {
+        LaneDesc &desc = lanes[lane];
+        int32_t channel = desc.channel;
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        int32_t current_sample = handle.current_sample;
+        int32_t num_samples = desc.num_chunk_samples;
+
+        // Create a subvector for this slice of data
+        SubVector<BaseFloat> p_wave(
+            h_batch_wav_in.Row(lane).Range(0, num_samples));
+
+        SubVector<BaseFloat> h_wave(handle.wave_data_in.Data().Row(0).Range(
+            current_sample, num_samples));
+
+        // Copy slice into pinned memory
+        p_wave.CopyFromVec(h_wave);
+      }
+
+      // use a memcpy here to avoid a possible 2D memcpy which is very slow
+      cudaMemcpyAsync(d_batch_wav_in.Data(), h_batch_wav_in.Data(),
+                      wave_in_size, cudaMemcpyHostToDevice,
+                      cudaStreamPerThread);
+      CU_SAFE_CALL(cudaGetLastError());
+
+      // process batch
+      fbank.ComputeFeaturesBatched(d_lanes, lanes.size(), d_batch_wav_in,
+                                   sample_freq, vtln_warp, &d_batch_feats_out);
+
+      // copy feats to host
+      cudaMemcpyAsync(h_batch_feats_out.Data(), d_batch_feats_out.Data(),
+                      feats_out_size, cudaMemcpyDeviceToHost,
+                      cudaStreamPerThread);
+      CU_SAFE_CALL(cudaGetLastError());
+
+      // wait for copy to host to complete before copying to final
+      // location.  For additional optimization you should double buffer
+      // h_batch_* arrays so that the GPU isn't idle while the CPU
+      // is copying data into final destination.  We don't envision
+      // people using this binary directly and thus won't do that
+      // here to keep the API example more concise.
+      cudaStreamSynchronize(cudaStreamPerThread);
+
+      // At this time the batch is computed.  We now need to copy each slice
+      // into the appropriate output buffer
+      for (int lane = 0; lane < lanes.size(); lane++) {
+        LaneDesc &desc = lanes[lane];
+        ChannelId channel = desc.channel;
+
+        int32_t current_frame = desc.current_frame;
+        int32_t num_chunk_frames = desc.num_chunk_frames;
+        if (num_chunk_frames == 0) continue;
+
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        // Copy slice back up
+        CuSubMatrix<BaseFloat> A(d_batch_feats_out.Range(
+            lane * max_chunk_frames, num_chunk_frames, 0, feat_dim));
+        SubMatrix<BaseFloat> B(handle.feats_out.Range(
+            current_frame, num_chunk_frames, 0, feat_dim));
+
+        B.CopyFromMat(A);
+      }  // end copy to host loop
+
+      // For each lane check if compute is done.
+      // If completed, remove from channel list and
+      // free the channel.
+      for (int32_t lane = 0; lane < lanes.size();) {
+        LaneDesc &desc = lanes[lane];
+        ChannelId channel = desc.channel;
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        int32_t &chunk_samples = desc.num_chunk_samples;
+        // advance by samples processed in last chunk
+        handle.current_sample += chunk_samples;
+
+        desc.current_sample += desc.num_chunk_samples;
+        desc.num_chunk_samples = std::min(
+            max_chunk_length_samples, handle.num_samples - desc.current_sample);
+        desc.current_frame = NumFrames(desc.current_sample, frame_opts, false);
+        int32_t num_samples = desc.current_sample + desc.num_chunk_samples;
+        int32_t num_frames = NumFrames(num_samples, frame_opts, desc.last);
+        desc.num_chunk_frames =
+            std::min(max_chunk_frames, num_frames - desc.current_frame);
+        // read if we said last chunk was last
+        bool finished = desc.last;
+
+        // compute next batch of samples
+        int32_t num_remaining_samples =
+            handle.num_samples - handle.current_sample;
+        chunk_samples =
+            std::min(max_chunk_length_samples, num_remaining_samples);
+
+        int32_t num_total_samples = handle.current_sample + chunk_samples;
+
+        desc.last = num_total_samples == handle.num_samples;
+        desc.first = false;
+
+        if (finished) {
+          // free this channel
+          free_channels.push_back(channel);
+          // Move last lane to this lane
+          lanes[lane] = lanes.back();
+          lanes.pop_back();
+
+          num_done++;
+        } else {
+          lane++;
+        }
+      }  // end check if done loop
+    }    // end while(true)
+    double total_time = timer.Elapsed();
+
+    // output all utterances.  In an efficeint implementation
+    // this would be done on demand in a threaded manner.  This
+    // binary is purely for checking correctness and demonstrating
+    // usage and thus this type of optimization is not done.
+    for (int i = 0; i < data_handles.size(); i++) {
+      UtteranceDataHandle &handle = data_handles[i];
+
+      tot_t += handle.feats_out.NumRows();
+      feature_writer.Write(handle.utt, handle.feats_out);
+    }
+
+    KALDI_LOG << "Computed Online Features for  " << num_done << " files, and "
+              << tot_t << " frames.";
+
+    KALDI_LOG << "Total Audio: " << duration
+              << " seconds, Total Time: " << total_time
+              << " seconds, RTFX: " << duration / total_time;
+
+    cudaHostUnregister(h_batch_wav_in.Data());
+    cudaHostUnregister(h_batch_feats_out.Data());
+
+    cudaDeviceSynchronize();
+#if HAVE_CUDA == 1
+    cudaProfilerStop();
+#endif
+
+    return 0;
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/cudafeatbin/compute-mfcc-online-batched-cuda.cc b/src/cudafeatbin/compute-mfcc-online-batched-cuda.cc
new file mode 100644
index 00000000000..09ebee438dd
--- /dev/null
+++ b/src/cudafeatbin/compute-mfcc-online-batched-cuda.cc
@@ -0,0 +1,381 @@
+// cudafeat/compute-mfcc-online-batched-cuda.cc
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens, Levi Barnes
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA == 1
+#include <cuda_profiler_api.h>
+#endif
+
+#include <string>
+#include <vector>
+
+#include "base/kaldi-common.h"
+#include "cudafeat/feature-online-batched-spectral-cuda.h"
+#include "cudamatrix/cu-matrix.h"
+#include "cudamatrix/cu-vector.h"
+#include "feat/feature-window.h"
+#include "feat/wave-reader.h"
+#include "util/common-utils.h"
+
+using namespace kaldi;
+
+// This class stores data for input and output for this binary.
+// We will read/write slices of this input/output in an online
+// fasion.
+struct UtteranceDataHandle {
+  std::string utt;
+  WaveData wave_data_in;
+  Matrix<BaseFloat> feats_out;
+  Vector<BaseFloat> ivector_out;
+  int32_t num_samples;
+  int32_t current_sample;
+  int32_t num_frames;
+  int32_t current_frame;
+
+  UtteranceDataHandle(const std::string &utt, WaveData &wave_data,
+                      const FrameExtractionOptions &opts, int32_t feat_dim)
+      : utt(utt) {
+    current_sample = 0;
+    current_frame = 0;
+    num_samples = wave_data.Data().NumCols();
+
+    wave_data_in = wave_data;
+
+    num_frames = NumFrames(num_samples, opts, true);
+    feats_out.Resize(num_frames, feat_dim);
+  }
+};
+
+int main(int argc, char *argv[]) {
+  try {
+    typedef kaldi::int32 int32;
+    using namespace kaldi;
+    const char *usage =
+        "Compute online mfcc features.\n\n"
+        "This binary processes the audio in chunks of samples. "
+        "In addition, the computation is batched and done in CUDA. "
+        "This binary is not intended to demonstrate how to achieve "
+        "maximum performance.  Instead it is intended to demonstrate "
+        "how to use the class CudaOnlineBatchedSpectralFeatures and provide "
+        "a mechanism to test this class independently.\n\n"
+        "Usage: ./compute-mfcc-batched-cuda --batch-size=50 "
+        "<wave-rspecifier> "
+        "<feature-wspecifier> \n";
+
+    int32_t num_channels = 50;
+    int32_t num_lanes = 10;
+    int32_t max_chunk_length_samples = 10000;
+    BaseFloat sample_freq = -1;
+    BaseFloat vtln_warp = 1.0;
+
+    ParseOptions po(usage);
+    MfccOptions feature_opts;
+    feature_opts.Register(&po);
+
+    po.Register("num-channels", &num_channels,
+                "The number of"
+                " channels used for compute");
+    po.Register("batch-size", &num_lanes,
+                "The number of chunks from"
+                " audio cuts processed in a single batch");
+    po.Register("chunk-length", &max_chunk_length_samples,
+                "The length of a chunk"
+                " of audio in terms of samples.");
+
+    CuDevice::RegisterDeviceOptions(&po);
+    RegisterCuAllocatorOptions(&po);
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    KALDI_ASSERT(num_channels >= num_lanes);
+
+    g_cuda_allocator.SetOptions(g_allocator_options);
+    CuDevice::Instantiate().SelectGpuId("yes");
+    CuDevice::Instantiate().AllowMultithreading();
+
+    LaneDesc *d_lanes = (LaneDesc *)CuDevice::Instantiate().Malloc(
+        sizeof(LaneDesc) * num_lanes);
+
+    std::string wav_rspecifier = po.GetArg(1),
+                feature_wspecifier = po.GetArg(2);
+
+    SequentialTableReader<WaveHolder> reader(wav_rspecifier);
+    BaseFloatMatrixWriter feature_writer;
+
+    if (!feature_writer.Open(feature_wspecifier)) {
+      KALDI_ERR << "Could not initialize feature_writer with wspecifier "
+                << feature_wspecifier;
+      exit(1);
+    }
+
+    std::vector<ChannelId> free_channels;
+
+    // list of audio handles to be processed
+    std::vector<UtteranceDataHandle> data_handles;
+    // maps currently active channels to their handle index
+    std::map<int32_t, int32_t> channel_to_handle_idx;
+    // Index of next unprocessed audio file
+    int32_t not_done_idx = 0;
+    int32_t num_done = 0, tot_t = 0;
+
+    int32_t feat_dim = feature_opts.num_ceps;
+
+    // compute the maximum chunk length in frames
+    const FrameExtractionOptions &frame_opts = feature_opts.frame_opts;
+
+    // span between consective features in output
+    int32_t shift = frame_opts.WindowShift();
+    int32_t max_chunk_frames = (max_chunk_length_samples + shift - 1) / shift;
+
+    int32_t ldf = max_chunk_frames;
+
+    CudaOnlineBatchedSpectralFeatures mfcc(feature_opts, max_chunk_frames,
+                                           num_channels, num_lanes);
+
+    CuMatrix<BaseFloat> d_batch_wav_in(num_lanes, max_chunk_length_samples,
+                                       kUndefined, kStrideEqualNumCols);
+    CuMatrix<BaseFloat> d_batch_feats_out(num_lanes * ldf, feat_dim, kUndefined,
+                                          kStrideEqualNumCols);
+
+    // host matrices for staging data in pinned memory before copying down
+    Matrix<BaseFloat> h_batch_wav_in(num_lanes, max_chunk_length_samples,
+                                     kUndefined, kStrideEqualNumCols);
+    Matrix<BaseFloat> h_batch_feats_out(num_lanes * ldf, feat_dim, kUndefined,
+                                        kStrideEqualNumCols);
+
+    size_t wave_in_size =
+        num_lanes * max_chunk_length_samples * sizeof(BaseFloat);
+    size_t feats_out_size = num_lanes * ldf * feat_dim * sizeof(BaseFloat);
+    ;
+
+    cudaHostRegister(h_batch_wav_in.Data(), wave_in_size, 0);
+    cudaHostRegister(h_batch_feats_out.Data(), feats_out_size, 0);
+
+    CU_SAFE_CALL(cudaGetLastError());
+
+    std::vector<int32_t> num_frames_computed(num_lanes);
+
+    std::vector<LaneDesc> lanes;
+
+    for (int32_t i = 0; i < num_channels; i++) {
+      free_channels.push_back(i);
+    }
+
+    sample_freq = frame_opts.samp_freq;
+
+    double duration = 0.0;
+    // preload data for batching
+    for (; !reader.Done(); reader.Next()) {
+      std::string utt = reader.Key();
+      WaveData &wave_data = reader.Value();
+      duration += wave_data.Duration();
+      data_handles.emplace_back(utt, wave_data, frame_opts, feat_dim);
+    }
+
+    // Timing just compute, we don't want to include
+    // disc I/O in this timer.
+    Timer timer;
+    // A single pass through this loop will fill the
+    // batch with new work if any is available.
+    // Then process a single iteration of batched cmvn.
+    // At exit each process handle should have valid data
+    // in feats_out.
+    while (true) {
+      // This loop will fill the batch by pulling from the
+      // data_handles vector for new work
+      while (lanes.size() < num_lanes && not_done_idx < data_handles.size()) {
+        UtteranceDataHandle &handle = data_handles[not_done_idx];
+        int32_t num_samples = handle.num_samples;
+        num_samples = std::min(max_chunk_length_samples, num_samples);
+
+        // grab a free channel
+        int32_t channel = free_channels.back();
+        free_channels.pop_back();
+
+        LaneDesc desc;
+        desc.channel = channel;
+        desc.current_sample = 0;
+        desc.num_chunk_samples = num_samples;
+        desc.first = true;
+        desc.last = num_samples == handle.num_samples;
+        desc.current_frame = 0;
+        desc.num_chunk_frames = NumFrames(num_samples, frame_opts, desc.last);
+        lanes.push_back(desc);
+
+        channel_to_handle_idx[channel] = not_done_idx;
+        not_done_idx++;
+      }
+
+      // No work in lanes, this means corpus is finished
+      if (lanes.size() == 0) break;
+
+      cudaMemcpyAsync(d_lanes, &lanes[0], sizeof(LaneDesc) * lanes.size(),
+                      cudaMemcpyHostToDevice, cudaStreamPerThread);
+
+      // This loop copies a slice from each active audio cut
+      // down to the device for processing
+      for (int32_t lane = 0; lane < lanes.size(); lane++) {
+        LaneDesc &desc = lanes[lane];
+        int32_t channel = desc.channel;
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        int32_t current_sample = handle.current_sample;
+        int32_t num_samples = desc.num_chunk_samples;
+
+        // Create a subvector for this slice of data
+        SubVector<BaseFloat> p_wave(
+            h_batch_wav_in.Row(lane).Range(0, num_samples));
+
+        SubVector<BaseFloat> h_wave(handle.wave_data_in.Data().Row(0).Range(
+            current_sample, num_samples));
+
+        // Copy slice into pinned memory
+        p_wave.CopyFromVec(h_wave);
+      }
+
+      // use a memcpy here to avoid a possible 2D memcpy which is very slow
+      cudaMemcpyAsync(d_batch_wav_in.Data(), h_batch_wav_in.Data(),
+                      wave_in_size, cudaMemcpyHostToDevice,
+                      cudaStreamPerThread);
+      CU_SAFE_CALL(cudaGetLastError());
+
+      // process batch
+      mfcc.ComputeFeaturesBatched(d_lanes, lanes.size(), d_batch_wav_in,
+                                  sample_freq, vtln_warp, &d_batch_feats_out);
+
+      // copy feats to host
+      cudaMemcpyAsync(h_batch_feats_out.Data(), d_batch_feats_out.Data(),
+                      feats_out_size, cudaMemcpyDeviceToHost,
+                      cudaStreamPerThread);
+      CU_SAFE_CALL(cudaGetLastError());
+
+      // wait for copy to host to complete before copying to final
+      // location.  For additional optimization you should double buffer
+      // h_batch_* arrays so that the GPU isn't idle while the CPU
+      // is copying data into final destination.  We don't envision
+      // people using this binary directly and thus won't do that
+      // here to keep the API example more concise.
+      cudaStreamSynchronize(cudaStreamPerThread);
+
+      // At this time the batch is computed.  We now need to copy each slice
+      // into the appropriate output buffer
+      for (int lane = 0; lane < lanes.size(); lane++) {
+        LaneDesc &desc = lanes[lane];
+        ChannelId channel = desc.channel;
+
+        int32_t current_frame = desc.current_frame;
+        int32_t num_chunk_frames = desc.num_chunk_frames;
+        if (num_chunk_frames == 0) continue;
+
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        // Copy slice back up
+        CuSubMatrix<BaseFloat> A(d_batch_feats_out.Range(
+            lane * max_chunk_frames, num_chunk_frames, 0, feat_dim));
+        SubMatrix<BaseFloat> B(handle.feats_out.Range(
+            current_frame, num_chunk_frames, 0, feat_dim));
+
+        B.CopyFromMat(A);
+      }  // end copy to host loop
+
+      // For each lane check if compute is done.
+      // If completed, remove from channel list and
+      // free the channel.
+      for (int32_t lane = 0; lane < lanes.size();) {
+        LaneDesc &desc = lanes[lane];
+        ChannelId channel = desc.channel;
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        int32_t &chunk_samples = desc.num_chunk_samples;
+        // advance by samples processed in last chunk
+        handle.current_sample += chunk_samples;
+
+        desc.current_sample += desc.num_chunk_samples;
+        desc.num_chunk_samples = std::min(
+            max_chunk_length_samples, handle.num_samples - desc.current_sample);
+        desc.current_frame = NumFrames(desc.current_sample, frame_opts, false);
+        int32_t num_samples = desc.current_sample + desc.num_chunk_samples;
+        int32_t num_frames = NumFrames(num_samples, frame_opts, desc.last);
+        desc.num_chunk_frames =
+            std::min(max_chunk_frames, num_frames - desc.current_frame);
+        // read if we said last chunk was last
+        bool finished = desc.last;
+
+        // compute next batch of samples
+        int32_t num_remaining_samples =
+            handle.num_samples - handle.current_sample;
+        chunk_samples =
+            std::min(max_chunk_length_samples, num_remaining_samples);
+
+        int32_t num_total_samples = handle.current_sample + chunk_samples;
+
+        desc.last = num_total_samples == handle.num_samples;
+        desc.first = false;
+
+        if (finished) {
+          // free this channel
+          free_channels.push_back(channel);
+          // Move last lane to this lane
+          lanes[lane] = lanes.back();
+          lanes.pop_back();
+
+          num_done++;
+        } else {
+          lane++;
+        }
+      }  // end check if done loop
+    }    // end while(true)
+    double total_time = timer.Elapsed();
+
+    // output all utterances.  In an efficeint implementation
+    // this would be done on demand in a threaded manner.  This
+    // binary is purely for checking correctness and demonstrating
+    // usage and thus this type of optimization is not done.
+    for (int i = 0; i < data_handles.size(); i++) {
+      UtteranceDataHandle &handle = data_handles[i];
+
+      tot_t += handle.feats_out.NumRows();
+      feature_writer.Write(handle.utt, handle.feats_out);
+    }
+
+    KALDI_LOG << "Computed Online Features for  " << num_done << " files, and "
+              << tot_t << " frames.";
+
+    KALDI_LOG << "Total Audio: " << duration
+              << " seconds, Total Time: " << total_time
+              << " seconds, RTFX: " << duration / total_time;
+
+    cudaHostUnregister(h_batch_wav_in.Data());
+    cudaHostUnregister(h_batch_feats_out.Data());
+
+    cudaDeviceSynchronize();
+#if HAVE_CUDA == 1
+    cudaProfilerStop();
+#endif
+    return 0;
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/cudafeatbin/compute-online-feats-batched-cuda.cc b/src/cudafeatbin/compute-online-feats-batched-cuda.cc
new file mode 100644
index 00000000000..d7f95ce8849
--- /dev/null
+++ b/src/cudafeatbin/compute-online-feats-batched-cuda.cc
@@ -0,0 +1,519 @@
+// cudafeat/compute-online-feats-batched-cuda.cc
+//
+// Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+// Justin Luitjens
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if HAVE_CUDA
+#include <cuda_profiler_api.h>
+#include <nvToolsExt.h>
+#endif
+
+#include <string>
+#include <vector>
+
+#include "base/kaldi-common.h"
+#include "cudafeat/online-batched-feature-pipeline-cuda.h"
+#include "cudamatrix/cu-matrix.h"
+#include "cudamatrix/cu-vector.h"
+#include "feat/wave-reader.h"
+#include "util/common-utils.h"
+
+using namespace kaldi;
+
+// This class stores data for input and output for this binary.
+// We will read/write slices of this input/output in an online
+// fasion.
+struct UtteranceDataHandle {
+  std::string utt;
+  WaveData wave_data_in;
+  Matrix<BaseFloat> feats_out;
+  Vector<BaseFloat> ivector_out;
+  int32_t num_samples;
+  int32_t current_sample;
+  int32_t current_frame;
+
+  UtteranceDataHandle(const std::string &utt, WaveData &wave_data,
+                      const FrameExtractionOptions &opts, int32_t feat_dim,
+                      int32_t ivector_dim)
+      : utt(utt) {
+    current_sample = 0;
+    current_frame = 0;
+    num_samples = wave_data.Data().NumCols();
+
+    wave_data_in = wave_data;
+
+    int32_t num_frames = NumFrames(num_samples, opts, true);
+    feats_out.Resize(num_frames, feat_dim);
+    ivector_out.Resize(ivector_dim);
+  }
+};
+
+struct CallbackState {
+  int32_t current_frame;
+  int32_t num_chunk_frames;
+  UtteranceDataHandle *handle;
+  Matrix<BaseFloat> *features;
+  Vector<BaseFloat> *ivectors;
+};
+
+int32_t feat_dim, ivector_dim, ldf;
+
+void CUDART_CB CopySlicesCallback(void *cb_state_p) {
+  nvtxRangePushA("CopySlices");
+  std::vector<CallbackState> &cb_state =
+      *reinterpret_cast<std::vector<CallbackState> *>(cb_state_p);
+  // At this time the batch is computed.  We now need to copy each slice
+  // into the appropriate output buffer
+  for (int32_t lane = 0; lane < cb_state.size(); lane++) {
+    CallbackState &state = cb_state[lane];
+    UtteranceDataHandle &handle = *state.handle;
+
+    int32_t current_frame = state.current_frame;
+    int32_t num_chunk_frames = state.num_chunk_frames;
+
+    if (num_chunk_frames > 0) {
+      // Copy slice
+      SubMatrix<BaseFloat> p_feats(
+          state.features->Range(lane * ldf, num_chunk_frames, 0, feat_dim));
+      SubMatrix<BaseFloat> h_feats(
+          handle.feats_out.Range(current_frame, num_chunk_frames, 0, feat_dim));
+      h_feats.CopyFromMat(p_feats);
+    }
+    // This overwrites the old ivector at every chunk.
+    // Essentially after each chunk we have an estimate for the ivector.
+    SubVector<BaseFloat> p_ivector(
+        state.ivectors->Range(ivector_dim * lane, ivector_dim));
+    handle.ivector_out.CopyFromVec(p_ivector);
+  }  // end copy slices loop
+  nvtxRangePop();
+}  // end callback
+
+int main(int argc, char *argv[]) {
+  try {
+    typedef kaldi::int32 int32;
+    using namespace kaldi;
+    const char *usage =
+        "Compute online features and ivector features.\n\n"
+        "This binary processes the audio in chunks of samples. "
+        "In addition, the computation is batched and done in CUDA. "
+        "This binary is not intended to demonstrate how to achieve "
+        "maximum perfomrance.  Instead it is intended to demonstrate "
+        "how to use the class OnlineCudaFeaturePipeline and provide "
+        "a mechanism to test this class independently.\n\n"
+        "Usage: ./compute-online-feats-batched-cuda --batch-size=100 "
+        "<wave-rspecifier> "
+        "<ivector-wspecifier> "
+        "<feature-wspecifier> \n";
+
+    int32_t num_channels = 50;
+    int32_t num_lanes = 10;
+    int32_t max_chunk_length_samples = 10000;
+    BaseFloat sample_freq = -1;
+
+    ParseOptions po(usage);
+    OnlineNnet2FeaturePipelineConfig feature_opts;
+    feature_opts.Register(&po);
+
+    po.Register("num-channels", &num_channels,
+                "The number of"
+                " channels used for compute");
+    po.Register("batch-size", &num_lanes,
+                "The number of chunks from"
+                " audio cuts processed in a single batch");
+    po.Register("chunk-length", &max_chunk_length_samples,
+                "The length of a chunk"
+                " of audio in terms of samples.");
+
+    CuDevice::RegisterDeviceOptions(&po);
+    RegisterCuAllocatorOptions(&po);
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 3) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    KALDI_ASSERT(num_channels >= num_lanes);
+
+    g_cuda_allocator.SetOptions(g_allocator_options);
+    CuDevice::Instantiate().SelectGpuId("yes");
+    CuDevice::Instantiate().AllowMultithreading();
+
+    std::string wav_rspecifier = po.GetArg(1),
+                ivector_wspecifier = po.GetArg(2),
+                feature_wspecifier = po.GetArg(3);
+
+    SequentialTableReader<WaveHolder> reader(wav_rspecifier);
+    BaseFloatVectorWriter ivector_writer;
+    BaseFloatMatrixWriter feature_writer;
+
+    if (!ivector_writer.Open(ivector_wspecifier)) {
+      KALDI_ERR << "Could not initialize ivector_writer with wspecifier "
+                << ivector_wspecifier;
+      exit(1);
+    }
+    if (!feature_writer.Open(feature_wspecifier)) {
+      KALDI_ERR << "Could not initialize feature_writer with wspecifier "
+                << feature_wspecifier;
+      exit(1);
+    }
+
+    std::vector<ChannelId> free_channels;
+
+    // list of audio handles to be processed
+    std::vector<UtteranceDataHandle> data_handles;
+    // maps currently active channels to their handle index
+    std::map<int32_t, int32_t> channel_to_handle_idx;
+    // Index of next unprocessed audio file
+    int32_t not_done_idx = 0;
+    int32_t num_done = 0, tot_t = 0;
+
+    OnlineBatchedFeaturePipelineCuda feature_pipeline(
+        feature_opts, max_chunk_length_samples, num_lanes, num_channels);
+
+    feat_dim = feature_pipeline.FeatureDim();
+    ivector_dim = feature_pipeline.IvectorDim();
+    // span between consective features in output
+    ldf = feature_pipeline.GetMaxChunkFrames();
+
+    // compute the maximum chunk length in frames
+    const FrameExtractionOptions &frame_opts =
+        feature_pipeline.GetFrameOptions();
+
+    CU_SAFE_CALL(cudaGetLastError());
+
+    // create synchronization event and copy streams
+    cudaEvent_t event;
+    cudaEventCreateWithFlags(&event, cudaEventDisableTiming);
+
+    cudaStream_t dtoh, htod;
+    cudaStreamCreateWithFlags(&dtoh, cudaStreamNonBlocking);
+    cudaStreamCreateWithFlags(&htod, cudaStreamNonBlocking);
+
+    // This binary is pipelined to allow concurrent memory copies and compute.
+    // State exists for each pipeline and successive chunks go to different
+    // pipelines in a modular fasion.  The calling thread will synchronize with
+    // a pipeline prior to launching work in that pipeline.  2 should be enough
+    // to get concurrency on current hardware.
+    const int num_pipelines = 3;
+
+    cudaEvent_t pipeline_events[num_pipelines];
+    for (int p = 0; p < num_pipelines; p++) {
+      cudaEventCreateWithFlags(&pipeline_events[p], cudaEventDisableTiming);
+    }
+
+    // This state must be replicated for each pipeline stage to avoid race
+    // conditions
+    std::vector<CallbackState> cb_state[num_pipelines];
+    CuMatrix<BaseFloat> d_batch_wav_in[num_pipelines],
+        d_batch_feats_out[num_pipelines];
+    // pinned matrix to stage slices in
+    Matrix<BaseFloat> h_batch_wav_in[num_pipelines],
+        h_batch_feats_out[num_pipelines];
+    CuVector<BaseFloat> d_batch_ivector_out[num_pipelines];
+    // pinned vector to stage slices in
+    Vector<BaseFloat> h_batch_ivector_out[num_pipelines];
+
+    // size pipeline state
+    for (int p = 0; p < num_pipelines; p++) {
+      d_batch_wav_in[p].Resize(num_lanes, max_chunk_length_samples, kUndefined,
+                               kStrideEqualNumCols);
+      h_batch_wav_in[p].Resize(num_lanes, max_chunk_length_samples, kUndefined,
+                               kStrideEqualNumCols);
+
+      d_batch_feats_out[p].Resize(num_lanes * ldf, feat_dim, kUndefined,
+                                  kStrideEqualNumCols);
+      h_batch_feats_out[p].Resize(num_lanes * ldf, feat_dim, kUndefined,
+                                  kStrideEqualNumCols);
+
+      d_batch_ivector_out[p].Resize(num_lanes * ivector_dim);
+      h_batch_ivector_out[p].Resize(num_lanes * ivector_dim);
+    }
+
+    size_t wave_in_size =
+        num_lanes * max_chunk_length_samples * sizeof(BaseFloat);
+    size_t feats_out_size = num_lanes * ldf * feat_dim * sizeof(BaseFloat);
+    size_t ivector_out_size = num_lanes * ivector_dim * sizeof(BaseFloat);
+
+    // pin memory for faster and asynchronous copies
+    for (int p = 0; p < num_pipelines; p++) {
+      cudaHostRegister(h_batch_wav_in[p].Data(), wave_in_size, 0);
+      cudaHostRegister(h_batch_feats_out[p].Data(), feats_out_size, 0);
+      if (ivector_dim > 0) {
+        cudaHostRegister(h_batch_ivector_out[p].Data(), ivector_out_size, 0);
+      }
+    }
+
+    std::vector<bool> first, last;
+    std::vector<ChannelId> channels;
+    std::vector<int32_t> num_chunk_samples;
+
+    std::vector<int32_t> num_frames_computed(num_lanes);
+
+    for (int32_t i = 0; i < num_channels; i++) {
+      free_channels.push_back(i);
+    }
+
+    sample_freq = frame_opts.samp_freq;
+
+    double duration = 0.0;
+    double samples = 0.0;
+    // preload data for batching
+    for (; !reader.Done(); reader.Next()) {
+      std::string utt = reader.Key();
+      WaveData &wave_data = reader.Value();
+      duration += wave_data.Duration();
+      samples += wave_data.Data().NumCols();
+      KALDI_ASSERT(wave_data.SampFreq() == sample_freq);
+
+      data_handles.emplace_back(utt, wave_data, frame_opts, feat_dim,
+                                ivector_dim);
+    }
+
+    // Timing just compute, we don't want to include
+    // disc I/O in this timer.
+    Timer timer;
+
+    size_t free_byte, total_byte;
+    cudaMemGetInfo(&free_byte, &total_byte);
+    double allocated = (total_byte - free_byte) / 1024 / 1024;
+    // current pipeline
+    int p = -1;
+
+    int chunk = 0;
+    // A single pass through this loop will fill the
+    // batch with new work if any is available.
+    // Then process a single iteration of batched cmvn.
+    // At exit each process handle should have valid data
+    // in feats_out.
+    while (true) {
+      chunk++;
+
+      // advance the pipeline
+      p = (p + 1) % num_pipelines;
+
+      // ensure previous work in this pipeline is complete
+      cudaEventSynchronize(pipeline_events[p]);
+
+      // This loop will fill the batch by pulling from the
+      // data_handles vector for new work
+      while (channels.size() < num_lanes &&
+             not_done_idx < data_handles.size()) {
+        UtteranceDataHandle &handle = data_handles[not_done_idx];
+        int32_t num_samples = handle.num_samples;
+        num_samples = std::min(max_chunk_length_samples, num_samples);
+
+        // grab a free channel
+        int32_t channel = free_channels.back();
+        free_channels.pop_back();
+
+        channels.push_back(channel);
+        first.push_back(true);
+        last.push_back(num_samples == handle.num_samples);
+        num_chunk_samples.push_back(num_samples);
+
+        channel_to_handle_idx[channel] = not_done_idx;
+        not_done_idx++;
+      }
+
+      // No work in lanes, this means corpus is finished
+      if (channels.size() == 0) break;
+
+      // This loop copies a slice from each active audio cut
+      // down to the device for processing
+      for (int32_t lane = 0; lane < channels.size(); lane++) {
+        int32_t channel = channels[lane];
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        int32_t current_sample = handle.current_sample;
+        int32_t num_samples = num_chunk_samples[lane];
+
+        // Create a subvector for this slice of data
+        SubVector<BaseFloat> p_wave(
+            h_batch_wav_in[p].Row(lane).Range(0, num_samples));
+
+        SubVector<BaseFloat> h_wave(handle.wave_data_in.Data().Row(0).Range(
+            current_sample, num_samples));
+
+        // Copy slice into pinned memory
+        p_wave.CopyFromVec(h_wave);
+      }
+
+      // use a memcpy here to avoid a possible 2D memcpy which is very slow
+      cudaMemcpyAsync(d_batch_wav_in[p].Data(), h_batch_wav_in[p].Data(),
+                      wave_in_size, cudaMemcpyHostToDevice, htod);
+      CU_SAFE_CALL(cudaGetLastError());
+
+      // ensure computation doesn't begin till copy is done
+      cudaEventRecord(event, htod);
+      cudaStreamWaitEvent(cudaStreamPerThread, event, 0);
+
+      // process batch
+      feature_pipeline.ComputeFeaturesBatched(
+          channels.size(), channels, num_chunk_samples, first, last,
+          sample_freq, d_batch_wav_in[p], &d_batch_feats_out[p],
+          &d_batch_ivector_out[p], &num_frames_computed);
+
+      // ensure copies don't begin until compute is done
+      cudaEventRecord(event, cudaStreamPerThread);
+      cudaStreamWaitEvent(dtoh, event, 0);
+
+      // copy feats to host
+      cudaMemcpyAsync(h_batch_feats_out[p].Data(), d_batch_feats_out[p].Data(),
+                      feats_out_size, cudaMemcpyDeviceToHost, dtoh);
+      CU_SAFE_CALL(cudaGetLastError());
+
+      if (ivector_dim > 0) {
+        // copy ivectors to host
+        cudaMemcpyAsync(h_batch_ivector_out[p].Data(),
+                        d_batch_ivector_out[p].Data(), ivector_out_size,
+                        cudaMemcpyDeviceToHost, dtoh);
+        CU_SAFE_CALL(cudaGetLastError());
+      }
+
+      // reset callback state vector
+      cb_state[p].resize(0);
+      // construct callback state
+      for (int32_t lane = 0; lane < channels.size(); lane++) {
+        ChannelId channel = channels[lane];
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        int32_t current_frame = handle.current_frame;
+        int32_t num_chunk_frames = num_frames_computed[lane];
+
+        handle.current_frame += num_chunk_frames;
+
+        CallbackState state;
+        state.current_frame = current_frame;
+        state.num_chunk_frames = num_chunk_frames;
+        state.handle = &handle;
+        state.features = &h_batch_feats_out[p];
+        state.ivectors = &h_batch_ivector_out[p];
+
+        cb_state[p].push_back(state);
+      }
+
+      // enqueue copy slices callback
+#if CUDA_VERSION >= 10000
+      cudaLaunchHostFunc(dtoh, CopySlicesCallback, (void *)&cb_state[p]);
+#else
+      KALDI_ERR << "Cuda 10.0 or newer required to run this binary";
+#endif
+      // mark the end of this chunk
+      cudaEventRecord(pipeline_events[p], dtoh);
+
+      // For each lane check if compute is done.
+      // If completed, remove from channel list and
+      // free the channel. If not then schedule the
+      // next chunk for that lane.
+      for (int32_t lane = 0; lane < channels.size();) {
+        ChannelId channel = channels[lane];
+        UtteranceDataHandle &handle =
+            data_handles[channel_to_handle_idx[channel]];
+
+        // read if we said last chunk was last
+        bool finished = last[lane];
+
+        int32_t &chunk_samples = num_chunk_samples[lane];
+        // advance by samples processed in last chunk
+        handle.current_sample += chunk_samples;
+
+        // compute next batch of samples
+        int32_t num_remaining_samples =
+            handle.num_samples - handle.current_sample;
+        chunk_samples =
+            std::min(max_chunk_length_samples, num_remaining_samples);
+
+        int32_t num_total_samples = handle.current_sample + chunk_samples;
+
+        num_chunk_samples[lane] = chunk_samples;
+        last[lane] = num_total_samples == handle.num_samples;
+        first[lane] = false;
+
+        if (finished) {
+          // free this channel
+          free_channels.push_back(channel);
+          // Move last lane to this lane
+          channels[lane] = channels.back();
+          num_chunk_samples[lane] = num_chunk_samples.back();
+          first[lane] = first.back();
+          last[lane] = last.back();
+
+          // Remove last element from lists
+          channels.pop_back();
+          num_chunk_samples.pop_back();
+          first.pop_back();
+          last.pop_back();
+
+          num_done++;
+        } else {
+          lane++;
+        }
+      }  // end check if done loop
+    }    // end while(true)
+    double total_time = timer.Elapsed();
+
+    // output all utterances.  In an efficeint implementation
+    // this would be done on demand in a threaded manner.  This
+    // binary is purely for checking correctness and demonstrating
+    // usage and thus this type of optimization is not done.
+    for (int i = 0; i < data_handles.size(); i++) {
+      UtteranceDataHandle &handle = data_handles[i];
+
+      tot_t += handle.feats_out.NumRows();
+      feature_writer.Write(handle.utt, handle.feats_out);
+      if (ivector_dim > 0) {
+        ivector_writer.Write(handle.utt, handle.ivector_out);
+      }
+    }
+
+    KALDI_LOG << "Computed Online Features for  " << num_done << " files, and "
+              << tot_t << " frames.";
+
+    KALDI_LOG << "Total Audio: " << duration << " seconds";
+    KALDI_LOG << "Total Time: " << total_time << " seconds";
+    KALDI_LOG << "RTFX: " << duration / total_time;
+    KALDI_LOG << "Avg Chunk Latency: " << total_time / chunk * 1e6 << " us";
+    KALDI_LOG << "Samples: " << samples;
+    KALDI_LOG << "Samples/Second: " << samples / total_time;
+    KALDI_LOG << "Memory Usage: " << allocated << " MB";
+    for (int p = 0; p < num_pipelines; p++) {
+      cudaHostUnregister(h_batch_wav_in[p].Data());
+      cudaHostUnregister(h_batch_feats_out[p].Data());
+      if (ivector_dim > 0) {
+        cudaHostUnregister(h_batch_ivector_out[p].Data());
+      }
+    }
+
+    cudaEventDestroy(event);
+    for (int p = 0; p < num_pipelines; p++) {
+      cudaEventDestroy(pipeline_events[p]);
+    }
+    cudaStreamDestroy(dtoh);
+    cudaStreamDestroy(htod);
+
+    cudaDeviceSynchronize();
+    cudaProfilerStop();
+    CU_SAFE_CALL(cudaGetLastError());
+
+    return 0;
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/cudamatrix/cu-allocator.cc b/src/cudamatrix/cu-allocator.cc
index 025f4d2651f..32781faf1ed 100644
--- a/src/cudamatrix/cu-allocator.cc
+++ b/src/cudamatrix/cu-allocator.cc
@@ -152,7 +152,7 @@ void CuMemoryAllocator::RemoveFromFreeBlocks(MemoryBlock *block) {
       largest_free_block_[subregion_index] = 0;
     else
       largest_free_block_[subregion_index] =
-          subregion->free_blocks.begin()->first;
+          subregion->free_blocks.rbegin()->first;
   }
 }
 
@@ -212,7 +212,7 @@ void* CuMemoryAllocator::MallocFromSubregion(SubRegion *subregion,
       largest_free_block_[subregion_index] = 0;
     else
       largest_free_block_[subregion_index] =
-          subregion->free_blocks.begin()->first;
+          subregion->free_blocks.rbegin()->first;
   }
 
   KALDI_PARANOID_ASSERT(block_size >= size && block->allocated == false);
@@ -605,7 +605,7 @@ void CuMemoryAllocator::SortSubregions() {
     if (subregions_[i]->free_blocks.empty())
       largest_free_block_[i] = 0;
     else
-      largest_free_block_[i] = subregions_[i]->free_blocks.begin()->first;
+      largest_free_block_[i] = subregions_[i]->free_blocks.rbegin()->first;
   }
 }
 
diff --git a/src/cudamatrix/cu-array.h b/src/cudamatrix/cu-array.h
index 82d07bdab4f..84f78f00a91 100644
--- a/src/cudamatrix/cu-array.h
+++ b/src/cudamatrix/cu-array.h
@@ -189,8 +189,8 @@ class CuSubArray: public CuArrayBase<T> {
   CuSubArray(const T* data, MatrixIndexT length) {
     // Yes, we're evading C's restrictions on const here, and yes, it can be used
     // to do wrong stuff; unfortunately the workaround would be very difficult.
-    CuArrayBase<T>::data_ = const_cast<T*>(data);
-    CuArrayBase<T>::dim_ = length;
+    this->data_ = const_cast<T*>(data);
+    this->dim_ = length;
   }
 };
 
diff --git a/src/cudamatrix/cu-common.cc b/src/cudamatrix/cu-common.cc
index c788a621a85..d4d17f36b8e 100644
--- a/src/cudamatrix/cu-common.cc
+++ b/src/cudamatrix/cu-common.cc
@@ -31,10 +31,30 @@
 #include "cudamatrix/cu-common.h"
 #include "cudamatrix/cu-matrixdim.h"
 
-
 namespace kaldi {
 
 #if HAVE_CUDA == 1
+
+#ifdef USE_NVTX
+NvtxTracer::NvtxTracer(const char* name) {
+  const uint32_t colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff };
+  const int num_colors = sizeof(colors)/sizeof(uint32_t);
+  int color_id = ((int)name[0])%num_colors;
+	nvtxEventAttributes_t eventAttrib = {0};
+	eventAttrib.version = NVTX_VERSION;
+	eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+	eventAttrib.colorType = NVTX_COLOR_ARGB;
+	eventAttrib.color = colors[color_id];
+	eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+	eventAttrib.message.ascii = name;
+	nvtxRangePushEx(&eventAttrib);
+  // nvtxRangePushA(name);
+}
+NvtxTracer::~NvtxTracer() {
+  nvtxRangePop();
+}
+#endif
+
 cublasOperation_t KaldiTransToCuTrans(MatrixTransposeType kaldi_trans) {
   cublasOperation_t cublas_trans;
 
diff --git a/src/cudamatrix/cu-common.h b/src/cudamatrix/cu-common.h
index 3d9d7e52939..5ae260107d2 100644
--- a/src/cudamatrix/cu-common.h
+++ b/src/cudamatrix/cu-common.h
@@ -34,6 +34,7 @@
 #include <cusparse.h>
 #include <curand.h>
 #include <cuda_runtime_api.h>
+#include "nvToolsExt.h"
 
 #define CU_SAFE_CALL(fun) \
 { \
@@ -94,6 +95,17 @@
 
 namespace kaldi {
 
+#ifdef USE_NVTX
+class NvtxTracer {
+public:
+    NvtxTracer(const char* name);
+    ~NvtxTracer();
+};
+#define NVTX_RANGE(name) NvtxTracer uniq_name_using_macros(name);
+#else
+#define NVTX_RANGE(name)
+#endif
+
 /** Number of blocks in which the task of size 'size' is splitted **/
 inline int32 n_blocks(int32 size, int32 block_size) {
   return size / block_size + ((size % block_size == 0)? 0 : 1);
@@ -126,6 +138,10 @@ const char* cusparseGetStatusString(cusparseStatus_t status);
 const char* curandGetStatusString(curandStatus_t status);
 }
 
+#else
+namespace kaldi {
+#define NVTX_RANGE(name)
+};
 #endif // HAVE_CUDA
 
 namespace kaldi {
diff --git a/src/cudamatrix/cu-device.cc b/src/cudamatrix/cu-device.cc
index a41ebccd51e..eeb9b99aa35 100644
--- a/src/cudamatrix/cu-device.cc
+++ b/src/cudamatrix/cu-device.cc
@@ -348,8 +348,43 @@ bool CuDevice::IsComputeExclusive() {
   }
 }
 
-template<typename TA, typename TB>
-bool greater_pair(const std::pair<TA, TB> &left, const std::pair<TA, TB>& right) {
+bool CuDevice::SelectGpuId(int dev_id) {
+  KALDI_LOG << "Trying to select device: " << dev_id;
+  cudaError_t e = cudaSetDevice(dev_id);
+  if (e != cudaSuccess) {
+    KALDI_WARN << "Cannot select this device: return code " << e
+               << ", Error message: \"" << cudaGetErrorString(e) << "\"";
+    return false;
+  } else {
+    e = cudaDeviceSynchronize();
+    if (e != cudaSuccess) {
+      KALDI_WARN << "Cannot select this device: return code " << e
+                 << ", Error message: \"" << cudaGetErrorString(e) << "\"";
+      return false;
+    }
+  }
+
+  std::string debug_str;
+  int num_gpus = dev_id + 1;  // used for debugging purposes
+  bool got_context = GetCudaContext(num_gpus, &debug_str);
+  if (!got_context) {
+    KALDI_WARN << "Cannot get Cuda Context, Error message: \"" << debug_str
+               << "\"";
+  }
+
+  return true;
+}
+
+bool CuDevice::SelectAndInitializeGpuIdWithExistingCudaContext(int dev_id) {
+  // Make sure the global allocator object has the up-to-date options.
+  g_cuda_allocator.SetOptions(g_allocator_options);
+  if (!CuDevice::SelectGpuId(dev_id)) return false;
+  FinalizeActiveGpu();
+  return true;
+}
+
+template <typename TA, typename TB>
+bool greater_pair(const std::pair<TA, TB> &left, const std::pair<TA, TB> &right) {
   return left.second > right.second;
 }
 
@@ -424,6 +459,7 @@ bool CuDevice::SelectGpuIdAuto() {
 
   int dev_id;
   float mem_ratio;
+  bool success;
   do {
     // try to select the GPU in the best to worst order
     // Note we have to check the return codes manually, as the CU_SAFE_CALL
@@ -432,20 +468,11 @@ bool CuDevice::SelectGpuIdAuto() {
     dev_id = free_mem_ratio[max_id].first;
     mem_ratio = free_mem_ratio[max_id].second;
 
-    KALDI_LOG << "Trying to select device: " << dev_id << " (automatically), mem_ratio: " << mem_ratio;
-    e = cudaSetDevice(dev_id);
-    if (e != cudaSuccess) {
-      KALDI_WARN << "Cannot select this device: return code " << e
-                 << ", Error message: \"" << cudaGetErrorString(e) << "\"";
-    } else {
-      e = cudaDeviceSynchronize();
-      if (e != cudaSuccess) {
-        KALDI_WARN << "Cannot select this device: return code " << e
-                   << ", Error message: \"" << cudaGetErrorString(e) << "\"";
-      }
-    }
+    KALDI_LOG << "Device: " << dev_id << ", mem_ratio: " << mem_ratio;
+    success = SelectGpuId(dev_id);
+
     max_id++;
-  } while ((e != cudaSuccess) && (max_id < free_mem_ratio.size()));
+  } while (!success && (max_id < free_mem_ratio.size()));
 
   if (e != cudaSuccess) {
     KALDI_WARN << "Failed to (automatically) select any device";
diff --git a/src/cudamatrix/cu-device.h b/src/cudamatrix/cu-device.h
index 9341f180069..525dc329e4c 100644
--- a/src/cudamatrix/cu-device.h
+++ b/src/cudamatrix/cu-device.h
@@ -154,6 +154,11 @@ class CuDevice {
   ///  "no"       -- Run on CPU.
   void SelectGpuId(std::string use_gpu);
 
+  // Select a specific GPU for computation. Will reuse the existing Cuda Context
+  // for that device. Initialize the necessary handles for GPU use (e.g. cublas
+  // handle)
+  bool SelectAndInitializeGpuIdWithExistingCudaContext(int dev_id);
+
   /// Check if the CUDA GPU is selected for use
   bool Enabled() const {
     return (device_id_ > -1);
@@ -255,6 +260,10 @@ class CuDevice {
   /// success.
   bool SelectGpuIdAuto();
 
+  // Selects GPU given its ID. Called from SelectGpuIdAuto or
+  // SelectGpuIdWithExistingCudaContext
+  bool SelectGpuId(int dev_id);
+
   /// This function, called from SelectGpuId(), is to be called when a
   /// GPU context corresponding to the GPU we want to use exists; it
   /// works out the device-id, creates the cuBLAS and cuSparse handles,
diff --git a/src/cudamatrix/cu-kernels-ansi.h b/src/cudamatrix/cu-kernels-ansi.h
index 2b99d09f0e4..4678ad85b81 100644
--- a/src/cudamatrix/cu-kernels-ansi.h
+++ b/src/cudamatrix/cu-kernels-ansi.h
@@ -40,10 +40,10 @@ typedef float   BaseFloat;
 #endif
 
 
-void cudaD_add_row_sum_mat(int Gr, int Bl, double* result, const double* mat,
+void cudaD_add_row_sum_mat(double* result, const double* mat, void* scratch,
                            const MatrixDim d, const double alpha,
                            const double beta);
-void cudaF_add_row_sum_mat(int Gr, int Bl, float* result, const float* mat,
+void cudaF_add_row_sum_mat(float* result, const float* mat, void* scratch,
                            const MatrixDim d, const float alpha,
                            const float beta);
 void cudaD_add_col_sum_mat(int Gr, int Bl, double* result, const double* mat,
@@ -713,10 +713,10 @@ void cudaD_trace_mat_mat(dim3 Gr, dim3 Bl, const double* A, const double* B,
                          MatrixDim dA, int B_stride, double* value);
 void cudaF_trace_mat_mat(dim3 Gr, dim3 Bl, const float* A, const float* B,
                          MatrixDim dA, int B_stride, float* value);
-void cudaD_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const double* A,
+void cudaD_trace_mat_mat_trans(const double* A,
                                const double* B, MatrixDim dA, int B_stride,
                                double* value);
-void cudaF_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const float* A, const float* B,
+void cudaF_trace_mat_mat_trans(const float* A, const float* B,
                                MatrixDim dA, int B_stride, float* value);
 void cudaD_trace_mat_smat(dim3 Gr, dim3 Bl, const double* mat,
                           MatrixDim mat_dim, const int* smat_row_ptr,
diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu
index 21468ca9f63..8044ff699bc 100644
--- a/src/cudamatrix/cu-kernels.cu
+++ b/src/cudamatrix/cu-kernels.cu
@@ -30,6 +30,7 @@
 #include <math_constants.h>
 #include "cudamatrix/cu-kernels-ansi.h"
 #include <cub/block/block_reduce.cuh>
+#include <cuda.h> // for CUDA_VERSION
 
 
 /***********************************************************************
@@ -966,6 +967,168 @@ static void _trace_mat_mat(const Real* A, const Real* B, MatrixDim dA,
 
 }
 
+#if __CUDA_ARCH__ < 600
+template <typename Type>
+inline __device__ void myAtomicAdd(Type *address, Type val) {
+  atomicAdd(address, val);
+}
+// Ref:
+// http://on-demand.gputechconf.com/gtc/2013/presentations/S3101-Atomic-Memory-Operations.pdf
+template <>
+inline __device__ void myAtomicAdd(double *address, double val) {
+  unsigned long long int *address_as_ull = (unsigned long long int *)address;
+  unsigned long long int old = *address_as_ull, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __double_as_longlong(val + __longlong_as_double(assumed)));
+  } while (assumed != old);
+}
+#else
+#define myAtomicAdd(a, b) atomicAdd(a, b)
+#endif  // __CUDA_ARCH__
+
+// Kernel to compute tr(AB^T) and store result in GPU memory.
+// Each block computes a partial sum and adds to result atomicaly.
+// Order of computation is not guaranteed, but this doesn't affect
+// accuracy significantly.
+template <typename Real, int THREADS_X, int THREADS_Y, int unroll_count>
+__global__ void _trace_mat_mat_trans_atomic(Real * result,
+                                            const Real * A, const Real * B,
+                                            MatrixDim dA, int B_stride) {
+  // This kernel assumes result is already set to zero
+  Real thread_data = static_cast<Real>(0);
+  int colStart = blockIdx.x * blockDim.x + threadIdx.x;
+
+  // Thread reduction while reading from global memory
+  // in a grid strided loop
+  if (colStart < dA.cols) {
+    int rowStart = blockIdx.y * blockDim.y + threadIdx.y;
+    int stride = blockDim.y * gridDim.y;
+
+    int unroll_stride = unroll_count * stride;
+    // Closest multiple of unroll_stride less than dA.rows
+    int Nmod = (dA.rows / unroll_stride) * unroll_stride;
+    int j = rowStart;
+    for (; j < Nmod; j += unroll_stride) {
+      Real A_vals[unroll_count];
+      Real B_vals[unroll_count];
+
+      #pragma unroll
+      for (int u = 0; u < unroll_count; ++u) {
+        int idx_A = colStart + (j + u*stride) * dA.stride;
+        int idx_B = colStart + (j + u*stride) * B_stride;
+        A_vals[u] = A[idx_A];
+        B_vals[u] = B[idx_B];
+      }
+
+      #pragma unroll
+      for (int u = 0; u < unroll_count; ++u) {
+        thread_data += A_vals[u] * B_vals[u];
+      }
+    }
+    for (; j < dA.rows; j += stride) {
+      int idx_A = colStart + j * dA.stride;
+      int idx_B = colStart + j * B_stride;
+      Real A_val;
+      Real B_val;
+      A_val = A[idx_A];
+      B_val = B[idx_B];
+      thread_data += A_val * B_val;
+    }
+  }
+
+  // Block reduction
+  typedef cub::BlockReduce<Real, THREADS_X,
+                      cub::BLOCK_REDUCE_WARP_REDUCTIONS, THREADS_Y> BlockReduceT;
+  __shared__ typename BlockReduceT::TempStorage temp_storage;
+  Real aggregate = BlockReduceT(temp_storage).Sum(thread_data);
+
+  // Grid reduction (only thread 0 active per block)
+  if ((threadIdx.x == 0) && (threadIdx.y == 0))
+      myAtomicAdd(result, aggregate);
+}
+
+// Specialization of the above kernel to case when A and B are
+// the same matrices.
+template <typename Real, int THREADS_X, int THREADS_Y, int unroll_count>
+__global__ void _frobenius_norm_atomic(Real * result,
+                                       const Real * A,
+                                       MatrixDim dA) {
+  // This kernel assumes result is already set to zero
+  Real thread_data = static_cast<Real>(0);
+  int colStart = blockIdx.x * blockDim.x + threadIdx.x;
+
+  // Thread reduction while reading from global memory
+  // in a grid strided loop
+  if (colStart < dA.cols) {
+    int rowStart = blockIdx.y * blockDim.y + threadIdx.y;
+    int stride = blockDim.y * gridDim.y;
+
+    int unroll_stride = unroll_count * stride;
+    // Closest multiple of unroll_stride less than dA.rows
+    int Nmod = (dA.rows / unroll_stride) * unroll_stride;
+    int j = rowStart;
+    for (; j < Nmod; j += unroll_stride) {
+      Real A_vals[unroll_count];
+
+      #pragma unroll
+      for (int u = 0; u < unroll_count; ++u) {
+        int idx = colStart + (j + u*stride) * dA.stride;
+        A_vals[u] = A[idx];
+      }
+
+      #pragma unroll
+      for (int u = 0; u < unroll_count; ++u) {
+        thread_data += A_vals[u] * A_vals[u];
+      }
+    }
+    for (; j < dA.rows; j += stride) {
+      int idx = colStart + j * dA.stride;
+      Real A_val;
+      A_val = A[idx];
+      thread_data += A_val * A_val;
+    }
+  }
+
+  // Block reduction
+  typedef cub::BlockReduce<Real, THREADS_X,
+                      cub::BLOCK_REDUCE_WARP_REDUCTIONS, THREADS_Y> BlockReduceT;
+  __shared__ typename BlockReduceT::TempStorage temp_storage;
+  Real aggregate = BlockReduceT(temp_storage).Sum(thread_data);
+
+  // Grid reduction (only thread 0 active per block)
+  if ((threadIdx.x == 0) && (threadIdx.y == 0))
+      myAtomicAdd(result, aggregate);
+}
+
+template <typename Real>
+void trace_mat_mat_trans_atomic(Real *d_result,
+                                const Real *A, const Real *B,
+                                MatrixDim dA, int B_stride,
+                                cudaStream_t stream) {
+  // Assuming *d_result is set to zero already
+
+  constexpr int THREADS_X = 32;
+  constexpr int THREADS_Y = 16;
+
+  dim3 thrds(THREADS_X, THREADS_Y);
+
+  bool isSameAB = (A == B);
+
+  int elemsPerThread = (dA.rows + static_cast<int>(thrds.y) - 1) / static_cast<int>(thrds.y);
+  elemsPerThread = (elemsPerThread > 8) ? 8 : elemsPerThread;
+
+  dim3 nblks((dA.cols + static_cast<int>(thrds.x) - 1) / static_cast<int>(thrds.x),
+             (dA.rows + (static_cast<int>(thrds.y) * elemsPerThread) - 1) / (static_cast<int>(thrds.y) * elemsPerThread));
+
+  if (isSameAB)
+    _frobenius_norm_atomic<Real, THREADS_X, THREADS_Y, 4><<<nblks, thrds, 0, stream>>>(d_result, A, dA);
+  else
+    _trace_mat_mat_trans_atomic<Real, THREADS_X, THREADS_Y, 4><<<nblks, thrds, 0, stream>>>(d_result,
+                                                  A, B, dA, B_stride);
+}
+
 // _trace_mat_mat_trans reduce the partial sum to
 // value[blockIdx.y * gridDim.x + blockIdx.x]
 template<typename Real>
@@ -1641,6 +1804,177 @@ static void _vec_transform_reduce(
     result[blockIdx.x] = op.PostReduce(sdata[0], result[blockIdx.x]);
 }
 
+
+template <typename T, typename ReduceLambda>
+inline __device__ void myAtomicReduce(T *address, T val, ReduceLambda op);
+
+template <typename ReduceLambda>
+inline __device__ void myAtomicReduce(double *address, double val, ReduceLambda op) {
+  unsigned long long int *address_as_ull = (unsigned long long int *)address;
+  unsigned long long int old = *address_as_ull, assumed;
+  do {
+    assumed = old;
+    old =
+      atomicCAS(address_as_ull, assumed,
+                __double_as_longlong(op.Reduce(val, __longlong_as_double(assumed))));
+  } while (assumed != old);
+}
+
+template <typename ReduceLambda>
+inline __device__ void myAtomicReduce(float *address, float val, ReduceLambda op) {
+  unsigned int *address_as_uint = (unsigned int *)address;
+  unsigned int old = *address_as_uint, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_uint, assumed,
+                    __float_as_uint(op.Reduce(val, __uint_as_float(assumed))));
+  } while (assumed != old);
+}
+
+template <>
+inline __device__ void myAtomicReduce(double *address, double val, TransReduceOp<SUMAB, double> op) {
+  myAtomicAdd(address, val);
+}
+
+template <>
+inline __device__ void myAtomicReduce(float *address, float val, TransReduceOp<SUMAB, float> op) {
+  myAtomicAdd(address, val);
+}
+
+#if CUDA_VERSION < 9000
+// if not CUDA 9+, no need for syncwarp
+inline __device__ void __syncwarp(unsigned mask=0xffffffff) {}
+#endif
+
+// Reduce a matrix 'data' to a row vector 'dots'
+template <EnumTransformReduce TransReduceType, typename Real, int unroll_count>
+__global__ void _strided_reduction_fused_kernel(Real * __restrict__ dots, const Real * __restrict__ data,
+                                                void * __restrict__ scratch, const MatrixDim d,
+                                                const TransReduceOp<TransReduceType, Real> op) {
+  // This kernel assuming blockDim.x == warpSize
+  Real thread_data = op.InitValue();
+  int colStart = blockIdx.x * blockDim.x + threadIdx.x;
+
+  Real* reduce = reinterpret_cast<Real*>(scratch);
+  unsigned int* flag = reinterpret_cast<unsigned int*>(reduce + d.cols);
+
+  Real dots_orig;
+  dots_orig = dots[colStart];
+
+  __shared__ bool isLastBlock;
+
+  // Read in current data and write out initial value to allow for atomics
+  if ((blockIdx.y == 0) && (threadIdx.y == 0) && (colStart < d.cols)) {
+    // Keep original value in registers
+    // Set dots to op.InitValue() in preparation for atomics
+    reduce[colStart] = thread_data;
+
+    // Mark arrived for grid sync later
+    if (threadIdx.x == 0) {
+      volatile unsigned int * vflag = (volatile unsigned int *)flag;
+      vflag[blockIdx.x] = gridDim.y;
+    }
+  }
+  __threadfence();
+
+  // Thread reduction while reading from global memory
+  // in a grid strided loop
+  if (colStart < d.cols) {
+    int rowStart = blockIdx.y * blockDim.y + threadIdx.y;
+    int stride = blockDim.y * gridDim.y;
+
+    int unroll_stride = unroll_count * stride;
+    // Closest multiple of unroll_stride less than d.rows
+    int Nmod = (d.rows / unroll_stride) * unroll_stride;
+    int j = rowStart;
+    for (; j < Nmod; j += unroll_stride) {
+      Real vals[unroll_count];
+
+      #pragma unroll
+      for (int u = 0; u < unroll_count; ++u) {
+        int idx = colStart + (j + u*stride) * d.stride;
+        vals[u] = op.Transform(data[idx]);
+      }
+
+      #pragma unroll
+      for (int u = 0; u < unroll_count; ++u) {
+        thread_data = op.Reduce(thread_data, vals[u]);
+      }
+    }
+    for (; j < d.rows; j += stride) {
+      int idx = colStart + j * d.stride;
+      thread_data = op.Reduce(thread_data, op.Transform(data[idx]));
+    }
+  }
+
+  // Block reduction in shared memory
+  extern __shared__ char tmp[];  // One element per thread in block
+  auto *temp = (Real *)tmp;      // Cast to desired type
+  int myidx = threadIdx.x + (static_cast<int>(blockDim.x) * static_cast<int>(threadIdx.y));
+  temp[myidx] = thread_data;
+  __syncthreads();
+  for (int j = blockDim.y / 2; j > 0; j /= 2) {
+    if (threadIdx.y < j)
+      temp[myidx] = op.Reduce(temp[myidx], temp[myidx + j * blockDim.x]);
+    __syncthreads();
+  }
+
+  // Grid reduction (only 1 warp active per block)
+  if (threadIdx.y == 0) {
+    thread_data = temp[myidx];
+
+    int flagval = 0;
+    volatile unsigned int * vflag = (volatile unsigned int *)flag;
+    // Grid sync to ensure reduce is ready for atomics
+    do {
+      flagval = vflag[blockIdx.x];
+    } while (flagval < gridDim.y);
+    __threadfence();
+
+    if (colStart < d.cols) {
+      myAtomicReduce(reduce + colStart, thread_data, op);
+    }
+
+    if (threadIdx.x == 0) {
+      // Mark arrived
+      // Last block to arrive sets flag to zero as well
+      unsigned int value = atomicInc(&flag[blockIdx.x], 2*gridDim.y-1);
+      isLastBlock = (value == (2*gridDim.y - 1));
+    }
+    __syncwarp(); // For Volta+ independent thread scheduling
+
+    if (isLastBlock) {
+      // Post reduction
+      if (colStart < d.cols)
+        dots[colStart] = op.PostReduce(reduce[colStart], dots_orig);
+    }
+  }
+}
+
+template <typename Real>
+void _strided_reduction_fused(Real *dots, const Real *data, void *scratch,
+                              const MatrixDim d, const Real alpha,
+                              const Real beta) {
+  int device;
+  cudaGetDevice(&device);
+
+  int warpSize;
+  cudaDeviceGetAttribute(&warpSize, cudaDevAttrWarpSize, device);
+
+  dim3 thrds(warpSize, 16);
+  size_t shmemSize = sizeof(Real) * thrds.x * thrds.y;
+
+  int elemsPerThread = (d.rows + thrds.y - 1) / thrds.y;
+  elemsPerThread = (elemsPerThread > 8) ? 8 : elemsPerThread;
+
+  dim3 nblks((d.cols + thrds.x - 1) / thrds.x,
+             (d.rows + thrds.y*elemsPerThread - 1) / (thrds.y * elemsPerThread));
+
+  _strided_reduction_fused_kernel<SUMAB, Real, 4><<<nblks, thrds,
+      shmemSize, cudaStreamPerThread>>>(dots, data, scratch, d,
+      TransReduceOp<SUMAB, Real>(alpha, beta));
+}
+
 // Reduce a matrix 'mat' to a row vector 'result'
 template<EnumTransformReduce TransReduceType, typename Real>
 __global__
@@ -3980,11 +4314,10 @@ void cudaF_sum_mat_cols(int Gr, int Bl, float* result, const float* mat,
   _transform_reduce_mat_cols<<<Gr,Bl>>>(result,mat,d,
       TransReduceOp<SUM,float>());
 }
-void cudaF_add_row_sum_mat(int Gr, int Bl, float* result, const float* mat,
+void cudaF_add_row_sum_mat(float* result, const float* mat, void* scratch,
                            const MatrixDim d, const float alpha,
                            const float beta) {
-  _transform_reduce_mat_rows<<<Gr, Bl>>>(result, mat, d,
-      TransReduceOp<SUMAB, float>(alpha, beta));
+  _strided_reduction_fused(result, mat, scratch, d, alpha, beta);
 }
 void cudaF_add_col_sum_mat(int Gr, int Bl, float* result, const float* mat,
                            const MatrixDim d, const float alpha,
@@ -4031,9 +4364,9 @@ void cudaF_vec_max(int Gr, int Bl, const float* v, float* value, int dim,
       TransReduceOp<MAX, float>());
 }
 
-void cudaF_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const float* A, const float* B,
+void cudaF_trace_mat_mat_trans(const float* A, const float* B,
                                MatrixDim dA, int B_stride, float* value) {
-  _trace_mat_mat_trans<<<Gr,Bl>>>(A,B,dA,B_stride,value);
+  trace_mat_mat_trans_atomic(value, A, B, dA, B_stride, cudaStreamPerThread);
 }
 
 void cudaF_trace_mat_mat(dim3 Gr, dim3 Bl, const float* A, const float* B,
@@ -4696,11 +5029,10 @@ void cudaD_sum_mat_cols(int Gr, int Bl, double* result, const double* mat,
   _transform_reduce_mat_cols<<<Gr,Bl>>>(result,mat,d,
       TransReduceOp<SUM,double>());
 }
-void cudaD_add_row_sum_mat(int Gr, int Bl, double* result, const double* mat,
+void cudaD_add_row_sum_mat(double* result, const double* mat, void* scratch,
                            const MatrixDim d, const double alpha,
                            const double beta) {
-  _transform_reduce_mat_rows<<<Gr, Bl>>>(result, mat, d,
-      TransReduceOp<SUMAB, double>(alpha, beta));
+  _strided_reduction_fused(result, mat, scratch, d, alpha, beta);
 }
 void cudaD_add_col_sum_mat(int Gr, int Bl, double* result, const double* mat,
                            const MatrixDim d, const double alpha,
@@ -4737,10 +5069,10 @@ void cudaD_vec_max(int Gr, int Bl, const double* v, double* value, int dim,
       TransReduceOp<MAX, double>());
 }
 
-void cudaD_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const double* A,
+void cudaD_trace_mat_mat_trans(const double* A,
                                const double* B, MatrixDim dA, int B_stride,
                                double* value) {
-  _trace_mat_mat_trans<<<Gr,Bl>>>(A,B,dA,B_stride,value);
+  trace_mat_mat_trans_atomic(value, A, B, dA, B_stride, cudaStreamPerThread);
 }
 
 void cudaD_trace_mat_mat(dim3 Gr, dim3 Bl, const double* A, const double* B,
diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h
index 1df1626fc6d..138fa9f6e79 100644
--- a/src/cudamatrix/cu-kernels.h
+++ b/src/cudamatrix/cu-kernels.h
@@ -39,15 +39,15 @@
 
 namespace kaldi {
 
-inline void cuda_add_row_sum_mat(int Gr, int Bl, double* result,
-                                 const double* mat, const MatrixDim d,
+inline void cuda_add_row_sum_mat(double* result, const double* mat,
+                                 void* scratch, const MatrixDim d,
                                  const double alpha, const double beta) {
-  cudaD_add_row_sum_mat(Gr, Bl, result, mat, d, alpha, beta);
+  cudaD_add_row_sum_mat(result, mat, scratch, d, alpha, beta);
 }
-inline void cuda_add_row_sum_mat(int Gr, int Bl, float* result,
-                                 const float* mat, const MatrixDim d,
+inline void cuda_add_row_sum_mat(float* result, const float* mat,
+                                 void* scratch, const MatrixDim d,
                                  const float alpha, const float beta) {
-  cudaF_add_row_sum_mat(Gr, Bl, result, mat, d, alpha, beta);
+  cudaF_add_row_sum_mat(result, mat, scratch, d, alpha, beta);
 }
 inline void cuda_add_col_sum_mat(int Gr, int Bl, double* result,
                                  const double* mat, const MatrixDim d,
@@ -1373,15 +1373,15 @@ inline void cuda_trace_mat_mat(dim3 Gr, dim3 Bl, const float* A, const float* B,
                                MatrixDim dA, int B_stride, float* value) {
   cudaF_trace_mat_mat(Gr, Bl, A, B, dA, B_stride, value);
 }
-inline void cuda_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const double* A,
+inline void cuda_trace_mat_mat_trans(const double* A,
                                      const double* B, MatrixDim dA,
                                      int B_stride, double* value) {
-  cudaD_trace_mat_mat_trans(Gr, Bl, A, B, dA, B_stride, value);
+  cudaD_trace_mat_mat_trans(A, B, dA, B_stride, value);
 }
-inline void cuda_trace_mat_mat_trans(dim3 Gr, dim3 Bl, const float* A,
+inline void cuda_trace_mat_mat_trans(const float* A,
                                      const float* B, MatrixDim dA, int B_stride,
                                      float* value) {
-  cudaF_trace_mat_mat_trans(Gr, Bl, A, B, dA, B_stride, value);
+  cudaF_trace_mat_mat_trans(A, B, dA, B_stride, value);
 }
 inline void cuda_trace_mat_smat(dim3 Gr, dim3 Bl, const double* mat,
                                 MatrixDim mat_dim, const int* smat_row_ptr,
diff --git a/src/cudamatrix/cu-math-test.cc b/src/cudamatrix/cu-math-test.cc
index 022742ed29f..e1d59e777be 100644
--- a/src/cudamatrix/cu-math-test.cc
+++ b/src/cudamatrix/cu-math-test.cc
@@ -298,8 +298,9 @@ void UnitTestLstmNonlinearity() {
       CuMatrix<BaseFloat> perturbed_output(num_rows, 2 * cell_dim);
       cu::ComputeLstmNonlinearity(perturbed_input, perturbed_params,
                                   &perturbed_output);
-      BaseFloat new_objf = TraceMatMat(perturbed_output, output_deriv, kTrans),
-          objf_change = new_objf - baseline_objf;
+      CuMatrix<BaseFloat> delta_output(perturbed_output);
+      delta_output.AddMat(-1.0, output);
+      BaseFloat objf_change = TraceMatMat(delta_output, output_deriv, kTrans);
       measured_objf_change(i) = objf_change;
     }
     KALDI_LOG << "LSTM nonlinearity test: num_rows=" << num_rows
diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc
index 92cac2142b5..c67842d38bf 100644
--- a/src/cudamatrix/cu-matrix.cc
+++ b/src/cudamatrix/cu-matrix.cc
@@ -1096,14 +1096,26 @@ void CuMatrixBase<Real>::AddMatSmat(Real alpha, const CuMatrixBase<Real> &A,
 
     cusparseMatDescr_t descr;
     CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descr));
-    CU_SAFE_CALL(
-        cusparse_csrmm(
-            GetCusparseHandle(),
-            transB == kNoTrans ?
-                CUSPARSE_OPERATION_TRANSPOSE : CUSPARSE_OPERATION_NON_TRANSPOSE,
-            B.NumRows(), NumRows(), B.NumCols(), B.NumElements(), &alpha, descr,
-            B.CsrVal(), B.CsrRowPtr(), B.CsrColIdx(), A.Data(), A.Stride(),
-            &beta, Data(), Stride()));
+    if (transB == kTrans) {
+      CU_SAFE_CALL(
+        cusparse_csrmm2(
+	    GetCusparseHandle(),
+	    CUSPARSE_OPERATION_NON_TRANSPOSE,
+	    CUSPARSE_OPERATION_NON_TRANSPOSE,
+	    B.NumRows(), NumRows(), B.NumCols(), B.NumElements(), &alpha, descr,
+	    B.CsrVal(), B.CsrRowPtr(), B.CsrColIdx(), A.Data(), A.Stride(),
+	    &beta, Data(), Stride()));
+    } else {
+      CuSparseMatrix<Real> BT(B, kTrans);
+      CU_SAFE_CALL(
+        cusparse_csrmm2(
+	    GetCusparseHandle(),
+	    CUSPARSE_OPERATION_NON_TRANSPOSE,
+	    CUSPARSE_OPERATION_NON_TRANSPOSE,
+	    BT.NumRows(), NumRows(), BT.NumCols(), BT.NumElements(), &alpha, descr,
+	    BT.CsrVal(), BT.CsrRowPtr(), BT.CsrColIdx(), A.Data(), A.Stride(),
+	    &beta, Data(), Stride()));
+    }
     CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(descr));
 
     CuDevice::Instantiate().AccuProfile(__func__, tim);
@@ -2174,17 +2186,21 @@ Real TraceMatMat(const CuMatrixBase<Real> &A,
         dimGrid.y = 1;
       }
     }
-    CuVector<Real> result_vec(dimGrid.x * dimGrid.y, kUndefined);
     if (trans == kNoTrans) {
+      CuVector<Real> result_vec(dimGrid.x * dimGrid.y, kUndefined);
       cuda_trace_mat_mat(dimGrid, dimBlock, A.Data(), B.Data(), A.Dim(),
           B.Stride(), result_vec.Data());
+      CU_SAFE_CALL(cudaGetLastError());
+      Vector<Real> result_cpu(result_vec); // copying from CUDA faster than summing in CUDA.
+      result = result_cpu.Sum();
     } else {
-      cuda_trace_mat_mat_trans(dimGrid, dimBlock, A.Data(), B.Data(), A.Dim(),
+      CuVector<Real> result_vec(1, kSetZero);
+      cuda_trace_mat_mat_trans(A.Data(), B.Data(), A.Dim(),
           B.Stride(), result_vec.Data());
+      CU_SAFE_CALL(cudaGetLastError());
+      Vector<Real> result_cpu(result_vec);
+      result = result_cpu(0);
     }
-    CU_SAFE_CALL(cudaGetLastError());
-    Vector<Real> result_cpu(result_vec); // copying from CUDA faster than summing in CUDA.
-    result = result_cpu.Sum();
     CuDevice::Instantiate().AccuProfile(__func__, tim);
   } else
 #endif
diff --git a/src/cudamatrix/cu-vector.cc b/src/cudamatrix/cu-vector.cc
index 5ee5d578511..8736782a3e0 100644
--- a/src/cudamatrix/cu-vector.cc
+++ b/src/cudamatrix/cu-vector.cc
@@ -1282,8 +1282,13 @@ void CuVectorBase<Real>::AddRowSumMat(Real alpha, const CuMatrixBase<Real> &mat,
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     CuTimer tim;
-    cuda_add_row_sum_mat(mat.NumCols(), CU1DBLOCK, Data(), mat.Data(),
+    // Allocate 2*NumCols scratch space for intermediate reduction results
+    // and flag values for cuda block ordering guarantees
+    CuArray<Real> scratch(2*mat.NumCols());
+    cuda_add_row_sum_mat(Data(), mat.Data(), reinterpret_cast<void*>(scratch.Data()),
                          mat.Dim(), alpha, beta);
+    // cuda_add_row_sum_mat(mat.NumCols(), CU1DBLOCK, Data(), mat.Data(),
+    //                      mat.Dim(), alpha, beta);
     CU_SAFE_CALL(cudaGetLastError());
     
     CuDevice::Instantiate().AccuProfile(__func__, tim);
diff --git a/src/cudamatrix/cu-vector.h b/src/cudamatrix/cu-vector.h
index 9c532b52f39..f1c32756887 100644
--- a/src/cudamatrix/cu-vector.h
+++ b/src/cudamatrix/cu-vector.h
@@ -135,15 +135,15 @@ class CuVectorBase {
   void Floor(const CuVectorBase<Real> &src, Real floor_val, MatrixIndexT *floored_count = NULL);
   void Ceiling(const CuVectorBase<Real> &src, Real ceiling_val, MatrixIndexT *ceiled_count = NULL);
   void Pow(const CuVectorBase<Real> &src, Real power);
-  
+
   inline void ApplyFloor(Real floor_val, MatrixIndexT *floored_count = NULL) {
-    this -> Floor(*this, floor_val, floored_count); 
+    this -> Floor(*this, floor_val, floored_count);
   };
-  
+
   inline void ApplyCeiling(Real ceiling_val, MatrixIndexT *ceiled_count = NULL) {
     this -> Ceiling(*this, ceiling_val, ceiled_count);
   };
-  
+
   inline void ApplyPow(Real power) {
     this -> Pow(*this, power);
   };
@@ -329,27 +329,27 @@ class CuSubVector: public CuVectorBase<Real> {
     KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+
                  static_cast<UnsignedMatrixIndexT>(length) <=
                  static_cast<UnsignedMatrixIndexT>(t.Dim()));
-    CuVectorBase<Real>::data_ = const_cast<Real*>(t.Data()+origin);
-    CuVectorBase<Real>::dim_ = length;
+    this->data_ = const_cast<Real*>(t.Data()+origin);
+    this->dim_ = length;
   }
   /// Copy constructor
   /// this constructor needed for Range() to work in base class.
   CuSubVector(const CuSubVector &other) : CuVectorBase<Real> () {
-    CuVectorBase<Real>::data_ = other.data_;
-    CuVectorBase<Real>::dim_ = other.dim_;
+    this->data_ = other.data_;
+    this->dim_ = other.dim_;
   }
 
   CuSubVector(const Real* data, MatrixIndexT length) : CuVectorBase<Real> () {
     // Yes, we're evading C's restrictions on const here, and yes, it can be used
     // to do wrong stuff; unfortunately the workaround would be very difficult.
-    CuVectorBase<Real>::data_ = const_cast<Real*>(data);
-    CuVectorBase<Real>::dim_ = length;
+    this->data_ = const_cast<Real*>(data);
+    this->dim_ = length;
   }
 
   /// This operation does not preserve const-ness, so be careful.
   CuSubVector(const CuMatrixBase<Real> &matrix, MatrixIndexT row) {
-    CuVectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row));
-    CuVectorBase<Real>::dim_ = matrix.NumCols();
+    this->data_ = const_cast<Real*>(matrix.RowData(row));
+    this->dim_ = matrix.NumCols();
   }
 
 
diff --git a/src/cudamatrix/cublas-wrappers.h b/src/cudamatrix/cublas-wrappers.h
index 98d8b89accd..1f3e9c1b5f0 100644
--- a/src/cudamatrix/cublas-wrappers.h
+++ b/src/cudamatrix/cublas-wrappers.h
@@ -204,6 +204,88 @@ inline cublasStatus_t cublas_spr(cublasHandle_t handle, cublasFillMode_t uplo,
 //
 // cuSPARSE wrappers
 //
+#if CUDA_VERSION >= 10020
+inline cusparseStatus_t cusparse_csr2csc(cusparseHandle_t handle, int m, int n,
+                                         int nnz, const void *csrVal,
+                                         const int *csrRowPtr,
+                                         const int *csrColInd, void *cscVal,
+                                         int *cscRowInd, int *cscColPtr,
+					 cudaDataType valType,
+                                         cusparseAction_t copyValues,
+                                         cusparseIndexBase_t idxBase) {
+  cusparseStatus_t status;
+  size_t buffer_size;
+  status = cusparseCsr2cscEx2_bufferSize(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd,
+                          cscVal, cscColPtr, cscRowInd, valType, copyValues, idxBase, 
+			  CUSPARSE_CSR2CSC_ALG1, &buffer_size);
+  if(status != CUSPARSE_STATUS_SUCCESS) return status;
+
+  void *buffer = (buffer_size > 0) ? CuDevice::Instantiate().Malloc(buffer_size) : NULL; 
+  status = cusparseCsr2cscEx2(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd,
+                          cscVal, cscColPtr, cscRowInd, valType, copyValues, idxBase, 
+			  CUSPARSE_CSR2CSC_ALG1, buffer);
+  if(buffer)
+ 	 CuDevice::Instantiate().Free(buffer); // allocator will take care of syncing if necessary 
+
+  return status;
+}
+
+inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
+                                       cusparseOperation_t transA, 
+				       cusparseOperation_t transB, int m, int n,
+                                       int k, int nnz, const void *alpha,
+                                       const cusparseMatDescr_t descrA,
+                                       const void *csrValA,
+                                       const int *csrRowPtrA,
+                                       const int *csrColIndA, const void *B,
+                                       int ldb, const void *beta, void *C,
+                                       int ldc, cudaDataType valType) {
+  cusparseStatus_t status;
+  cusparseSpMatDescr_t matA;
+  cusparseIndexBase_t idxBase = cusparseGetMatIndexBase(descrA);
+  KALDI_ASSERT(transA == CUSPARSE_OPERATION_NON_TRANSPOSE);
+  // Casting away the const-ness. We won't write to those pointers, but that's
+  // needed to create the matrix descriptor
+  status =
+      cusparseCreateCsr(&matA, m, k, nnz, const_cast<int *>(csrRowPtrA),
+                        const_cast<int *>(csrColIndA),
+                        const_cast<void *>(csrValA), CUSPARSE_INDEX_32I,
+                        CUSPARSE_INDEX_32I, idxBase, valType);
+  if (status != CUSPARSE_STATUS_SUCCESS) return status;
+  cusparseDnMatDescr_t matB;
+  int nrowsB=k, ncolsB=n;
+  if(transB == CUSPARSE_OPERATION_TRANSPOSE) std::swap(nrowsB, ncolsB);
+  status = cusparseCreateDnMat(&matB, nrowsB, ncolsB, ldb, const_cast<void *>(B), valType,
+                               CUSPARSE_ORDER_COL);
+  if (status != CUSPARSE_STATUS_SUCCESS) return status;
+  cusparseDnMatDescr_t matC;
+  status =
+      cusparseCreateDnMat(&matC, m, n, ldc, C, valType, CUSPARSE_ORDER_COL);
+  if (status != CUSPARSE_STATUS_SUCCESS) return status;
+
+  size_t buffer_size;
+  status = cusparseSpMM_bufferSize(handle, transA, transB, alpha, matA, matB,
+                                   beta, matC, valType, CUSPARSE_MM_ALG_DEFAULT,
+                                   &buffer_size);
+  if (status != CUSPARSE_STATUS_SUCCESS) return status;
+
+  void *buffer = (buffer_size > 0) ? CuDevice::Instantiate().Malloc(buffer_size) : NULL; 
+  status = cusparseSpMM(handle, transA, transB, alpha, matA, matB, beta, matC,
+                        valType, CUSPARSE_MM_ALG_DEFAULT, buffer);
+
+  if (status != CUSPARSE_STATUS_SUCCESS) return status;
+  if(buffer)
+  	CuDevice::Instantiate().Free(buffer); 
+
+  status = cusparseDestroySpMat(matA);
+  if (status != CUSPARSE_STATUS_SUCCESS) return status;
+  status = cusparseDestroyDnMat(matB);
+  if (status != CUSPARSE_STATUS_SUCCESS) return status;
+  status = cusparseDestroyDnMat(matC);
+
+  return status;
+}
+#endif
 
 inline cusparseStatus_t cusparse_csr2csc(cusparseHandle_t handle, int m, int n,
                                          int nnz, const float *csrVal,
@@ -212,9 +294,16 @@ inline cusparseStatus_t cusparse_csr2csc(cusparseHandle_t handle, int m, int n,
                                          int *cscRowInd, int *cscColPtr,
                                          cusparseAction_t copyValues,
                                          cusparseIndexBase_t idxBase) {
+#if CUDA_VERSION >= 10020
+  return cusparse_csr2csc(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd,
+                          cscVal, cscRowInd, cscColPtr, CUDA_R_32F, copyValues,
+			  idxBase);
+#else
   return cusparseScsr2csc(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd,
                           cscVal, cscRowInd, cscColPtr, copyValues, idxBase);
+#endif
 }
+
 inline cusparseStatus_t cusparse_csr2csc(cusparseHandle_t handle, int m, int n,
                                          int nnz, const double *csrVal,
                                          const int *csrRowPtr,
@@ -222,33 +311,14 @@ inline cusparseStatus_t cusparse_csr2csc(cusparseHandle_t handle, int m, int n,
                                          int *cscRowInd, int *cscColPtr,
                                          cusparseAction_t copyValues,
                                          cusparseIndexBase_t idxBase) {
+#if CUDA_VERSION >= 10020
+  return cusparse_csr2csc(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd,
+                          cscVal, cscRowInd, cscColPtr, CUDA_R_64F, copyValues,
+                          idxBase);
+#else
   return cusparseDcsr2csc(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd,
                           cscVal, cscRowInd, cscColPtr, copyValues, idxBase);
-}
-
-inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle,
-                                       cusparseOperation_t transA, int m, int n,
-                                       int k, int nnz, const float *alpha,
-                                       const cusparseMatDescr_t descrA,
-                                       const float *csrValA,
-                                       const int *csrRowPtrA,
-                                       const int *csrColIndA, const float *B,
-                                       int ldb, const float *beta, float *C,
-                                       int ldc) {
-  return cusparseScsrmm(handle, transA, m, n, k, nnz, alpha, descrA, csrValA,
-                        csrRowPtrA, csrColIndA, B, ldb, beta, C, ldc);
-}
-inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle,
-                                       cusparseOperation_t transA, int m, int n,
-                                       int k, int nnz, const double *alpha,
-                                       const cusparseMatDescr_t descrA,
-                                       const double *csrValA,
-                                       const int *csrRowPtrA,
-                                       const int *csrColIndA, const double *B,
-                                       int ldb, const double *beta, double *C,
-                                       int ldc) {
-  return cusparseDcsrmm(handle, transA, m, n, k, nnz, alpha, descrA, csrValA,
-                        csrRowPtrA, csrColIndA, B, ldb, beta, C, ldc);
+#endif
 }
 
 inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
@@ -262,8 +332,14 @@ inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
                                         const int *csrColIndA, const float *B,
                                         int ldb, const float *beta, float *C,
                                         int ldc) {
+#if CUDA_VERSION >= 10020
+  return cusparse_csrmm2(handle, transA, transB, m, n, k, nnz, alpha, descrA,
+                        csrValA, csrRowPtrA, csrColIndA, B, ldb, beta, C, ldc,
+                        CUDA_R_32F); // overloaded with valtype (CUDA_R_32F)
+#else
   return cusparseScsrmm2(handle, transA, transB, m, n, k, nnz, alpha, descrA,
                          csrValA, csrRowPtrA, csrColIndA, B, ldb, beta, C, ldc);
+#endif
 }
 inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
                                         cusparseOperation_t transA,
@@ -276,12 +352,18 @@ inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
                                         const int *csrColIndA, const double *B,
                                         int ldb, const double *beta, double *C,
                                         int ldc) {
+#if CUDA_VERSION >= 10020
+  return cusparse_csrmm2(handle, transA, transB, m, n, k, nnz, alpha, descrA,
+                        csrValA, csrRowPtrA, csrColIndA, B, ldb, beta, C, ldc,
+                        CUDA_R_64F); // overloaded with valtype (CUDA_R_64F)
+#else
   return cusparseDcsrmm2(handle, transA, transB, m, n, k, nnz, alpha, descrA,
                          csrValA, csrRowPtrA, csrColIndA, B, ldb, beta, C, ldc);
+#endif
 }
 
 
-#endif
+#endif // HAVE_CUDA
 }
 // namespace kaldi
 
diff --git a/src/decoder/Makefile b/src/decoder/Makefile
index fbd8386f005..a814931f694 100644
--- a/src/decoder/Makefile
+++ b/src/decoder/Makefile
@@ -7,7 +7,8 @@ TESTFILES =
 
 OBJFILES = training-graph-compiler.o lattice-simple-decoder.o lattice-faster-decoder.o \
    lattice-faster-online-decoder.o simple-decoder.o faster-decoder.o \
-   decoder-wrappers.o grammar-fst.o decodable-matrix.o
+   decoder-wrappers.o grammar-fst.o decodable-matrix.o \
+   lattice-incremental-decoder.o lattice-incremental-online-decoder.o
 
 LIBNAME = kaldi-decoder
 
diff --git a/src/decoder/decodable-matrix.cc b/src/decoder/decodable-matrix.cc
index 3cc7b87f2d7..e414b35723a 100644
--- a/src/decoder/decodable-matrix.cc
+++ b/src/decoder/decodable-matrix.cc
@@ -32,7 +32,7 @@ DecodableMatrixMapped::DecodableMatrixMapped(
 
   if (likes.NumCols() != tm.NumPdfs())
     KALDI_ERR << "Mismatch, matrix has "
-              << likes.NumCols() << " rows but transition-model has "
+              << likes.NumCols() << " cols but transition-model has "
               << tm.NumPdfs() << " pdf-ids.";
 }
 
@@ -45,7 +45,7 @@ DecodableMatrixMapped::DecodableMatrixMapped(
   raw_data_ = likes->Data() - (stride_ * frame_offset_);
   if (likes->NumCols() != tm.NumPdfs())
     KALDI_ERR << "Mismatch, matrix has "
-              << likes->NumCols() << " rows but transition-model has "
+              << likes->NumCols() << " cols but transition-model has "
               << tm.NumPdfs() << " pdf-ids.";
 }
 
diff --git a/src/decoder/decodable-matrix.h b/src/decoder/decodable-matrix.h
index 30b8b467c2e..a01a333836d 100644
--- a/src/decoder/decodable-matrix.h
+++ b/src/decoder/decodable-matrix.h
@@ -40,7 +40,7 @@ class DecodableMatrixScaledMapped: public DecodableInterface {
                                                 scale_(scale), delete_likes_(false) {
     if (likes.NumCols() != tm.NumPdfs())
       KALDI_ERR << "DecodableMatrixScaledMapped: mismatch, matrix has "
-                << likes.NumCols() << " rows but transition-model has "
+                << likes.NumCols() << " cols but transition-model has "
                 << tm.NumPdfs() << " pdf-ids.";
   }
 
@@ -53,7 +53,7 @@ class DecodableMatrixScaledMapped: public DecodableInterface {
       scale_(scale), delete_likes_(true) {
     if (likes->NumCols() != tm.NumPdfs())
       KALDI_ERR << "DecodableMatrixScaledMapped: mismatch, matrix has "
-                << likes->NumCols() << " rows but transition-model has "
+                << likes->NumCols() << " cols but transition-model has "
                 << tm.NumPdfs() << " pdf-ids.";
   }
 
diff --git a/src/decoder/decoder-wrappers.cc b/src/decoder/decoder-wrappers.cc
index 588274e113b..f63b3caa7c0 100644
--- a/src/decoder/decoder-wrappers.cc
+++ b/src/decoder/decoder-wrappers.cc
@@ -68,7 +68,7 @@ void DecodeUtteranceLatticeFasterClass::operator () () {
   success_ = true;
   using fst::VectorFst;
   if (!decoder_->Decode(decodable_)) {
-    KALDI_WARN << "Failed to decode file " << utt_;
+    KALDI_WARN << "Failed to decode utterance with id " << utt_;
     success_ = false;
   }
   if (!decoder_->ReachedFinal()) {
@@ -195,6 +195,92 @@ DecodeUtteranceLatticeFasterClass::~DecodeUtteranceLatticeFasterClass() {
   delete decodable_;
 }
 
+template <typename FST>
+bool DecodeUtteranceLatticeIncremental(
+    LatticeIncrementalDecoderTpl<FST> &decoder, // not const but is really an input.
+    DecodableInterface &decodable, // not const but is really an input.
+    const TransitionModel &trans_model,
+    const fst::SymbolTable *word_syms,
+    std::string utt,
+    double acoustic_scale,
+    bool determinize,
+    bool allow_partial,
+    Int32VectorWriter *alignment_writer,
+    Int32VectorWriter *words_writer,
+    CompactLatticeWriter *compact_lattice_writer,
+    LatticeWriter *lattice_writer,
+    double *like_ptr) { // puts utterance's like in like_ptr on success.
+  using fst::VectorFst;
+  if (!decoder.Decode(&decodable)) {
+    KALDI_WARN << "Failed to decode utterance with id " << utt;
+    return false;
+  }
+  if (!decoder.ReachedFinal()) {
+    if (allow_partial) {
+      KALDI_WARN << "Outputting partial output for utterance " << utt
+                 << " since no final-state reached\n";
+    } else {
+      KALDI_WARN << "Not producing output for utterance " << utt
+                 << " since no final-state reached and "
+                 << "--allow-partial=false.\n";
+      return false;
+    }
+  }
+
+  // Get lattice
+  CompactLattice clat = decoder.GetLattice(decoder.NumFramesDecoded(), true);
+  if (clat.NumStates() == 0)
+    KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
+
+  double likelihood;
+  LatticeWeight weight;
+  int32 num_frames;
+  { // First do some stuff with word-level traceback...
+    CompactLattice decoded_clat;
+    CompactLatticeShortestPath(clat, &decoded_clat);
+    Lattice decoded;
+    fst::ConvertLattice(decoded_clat, &decoded);
+
+    if (decoded.Start() == fst::kNoStateId)
+      // Shouldn't really reach this point as already checked success.
+      KALDI_ERR << "Failed to get traceback for utterance " << utt;
+
+    std::vector<int32> alignment;
+    std::vector<int32> words;
+    GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
+    num_frames = alignment.size();
+    KALDI_ASSERT(num_frames == decoder.NumFramesDecoded());
+    if (words_writer->IsOpen())
+      words_writer->Write(utt, words);
+    if (alignment_writer->IsOpen())
+      alignment_writer->Write(utt, alignment);
+    if (word_syms != NULL) {
+      std::cerr << utt << ' ';
+      for (size_t i = 0; i < words.size(); i++) {
+        std::string s = word_syms->Find(words[i]);
+        if (s == "")
+          KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
+        std::cerr << s << ' ';
+      }
+      std::cerr << '\n';
+    }
+    likelihood = -(weight.Value1() + weight.Value2());
+  }
+
+  // We'll write the lattice without acoustic scaling.
+  if (acoustic_scale != 0.0)
+    fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
+  Connect(&clat);
+  compact_lattice_writer->Write(utt, clat);
+  KALDI_LOG << "Log-like per frame for utterance " << utt << " is "
+            << (likelihood / num_frames) << " over "
+            << num_frames << " frames.";
+  KALDI_VLOG(2) << "Cost for utterance " << utt << " is "
+                << weight.Value1() << " + " << weight.Value2();
+  *like_ptr = likelihood;
+  return true;
+}
+
 
 // Takes care of output.  Returns true on success.
 template <typename FST>
@@ -215,7 +301,7 @@ bool DecodeUtteranceLatticeFaster(
   using fst::VectorFst;
 
   if (!decoder.Decode(&decodable)) {
-    KALDI_WARN << "Failed to decode file " << utt;
+    KALDI_WARN << "Failed to decode utterance with id " << utt;
     return false;
   }
   if (!decoder.ReachedFinal()) {
@@ -296,6 +382,37 @@ bool DecodeUtteranceLatticeFaster(
 }
 
 // Instantiate the template above for the two required FST types.
+template bool DecodeUtteranceLatticeIncremental(
+    LatticeIncrementalDecoderTpl<fst::Fst<fst::StdArc> > &decoder,
+    DecodableInterface &decodable,
+    const TransitionModel &trans_model,
+    const fst::SymbolTable *word_syms,
+    std::string utt,
+    double acoustic_scale,
+    bool determinize,
+    bool allow_partial,
+    Int32VectorWriter *alignment_writer,
+    Int32VectorWriter *words_writer,
+    CompactLatticeWriter *compact_lattice_writer,
+    LatticeWriter *lattice_writer,
+    double *like_ptr);
+
+template bool DecodeUtteranceLatticeIncremental(
+    LatticeIncrementalDecoderTpl<fst::GrammarFst> &decoder,
+    DecodableInterface &decodable,
+    const TransitionModel &trans_model,
+    const fst::SymbolTable *word_syms,
+    std::string utt,
+    double acoustic_scale,
+    bool determinize,
+    bool allow_partial,
+    Int32VectorWriter *alignment_writer,
+    Int32VectorWriter *words_writer,
+    CompactLatticeWriter *compact_lattice_writer,
+    LatticeWriter *lattice_writer,
+    double *like_ptr);
+
+
 template bool DecodeUtteranceLatticeFaster(
     LatticeFasterDecoderTpl<fst::Fst<fst::StdArc> > &decoder,
     DecodableInterface &decodable,
@@ -345,7 +462,7 @@ bool DecodeUtteranceLatticeSimple(
   using fst::VectorFst;
 
   if (!decoder.Decode(&decodable)) {
-    KALDI_WARN << "Failed to decode file " << utt;
+    KALDI_WARN << "Failed to decode utterance with id " << utt;
     return false;
   }
   if (!decoder.ReachedFinal()) {
diff --git a/src/decoder/decoder-wrappers.h b/src/decoder/decoder-wrappers.h
index 17592d0282b..085c8e94e73 100644
--- a/src/decoder/decoder-wrappers.h
+++ b/src/decoder/decoder-wrappers.h
@@ -22,6 +22,7 @@
 
 #include "itf/options-itf.h"
 #include "decoder/lattice-faster-decoder.h"
+#include "decoder/lattice-incremental-decoder.h"
 #include "decoder/lattice-simple-decoder.h"
 
 // This header contains declarations from various convenience functions that are called
@@ -88,6 +89,23 @@ void AlignUtteranceWrapper(
 void ModifyGraphForCarefulAlignment(
     fst::VectorFst<fst::StdArc> *fst);
 
+/// TODO
+template <typename FST>
+bool DecodeUtteranceLatticeIncremental(
+    LatticeIncrementalDecoderTpl<FST> &decoder, // not const but is really an input.
+    DecodableInterface &decodable, // not const but is really an input.
+    const TransitionModel &trans_model,
+    const fst::SymbolTable *word_syms,
+    std::string utt,
+    double acoustic_scale,
+    bool determinize,
+    bool allow_partial,
+    Int32VectorWriter *alignments_writer,
+    Int32VectorWriter *words_writer,
+    CompactLatticeWriter *compact_lattice_writer,
+    LatticeWriter *lattice_writer,
+    double *like_ptr);  // puts utterance's likelihood in like_ptr on success.
+
 
 /// This function DecodeUtteranceLatticeFaster is used in several decoders, and
 /// we have moved it here.  Note: this is really "binary-level" code as it
diff --git a/src/decoder/lattice-biglm-faster-decoder.h b/src/decoder/lattice-biglm-faster-decoder.h
index 9ea53a95836..cdf813b5809 100644
--- a/src/decoder/lattice-biglm-faster-decoder.h
+++ b/src/decoder/lattice-biglm-faster-decoder.h
@@ -740,7 +740,7 @@ class LatticeBiglmFasterDecoder {
                 graph_cost = arc.weight.Value(),
                 cur_cost = tok->tot_cost,
                 tot_cost = cur_cost + ac_cost + graph_cost;
-            if (tot_cost > next_cutoff) continue;
+            if (tot_cost >= next_cutoff) continue;
             else if (tot_cost + config_.beam < next_cutoff)
               next_cutoff = tot_cost + config_.beam; // prune by best current token
             PairId next_pair = ConstructPair(arc.nextstate, next_lm_state);
@@ -791,7 +791,7 @@ class LatticeBiglmFasterDecoder {
       Token *tok = e->val;  // would segfault if state not in
                             // toks_ but this can't happen.
       BaseFloat cur_cost = tok->tot_cost;
-      if (cur_cost > cutoff) // Don't bother processing successors.
+      if (cur_cost >= cutoff) // Don't bother processing successors.
         continue;
       StateId state = PairToState(state_pair),
           lm_state = PairToLmState(state_pair);
diff --git a/src/decoder/lattice-faster-decoder.cc b/src/decoder/lattice-faster-decoder.cc
index 9106309eb84..b8b026abf93 100644
--- a/src/decoder/lattice-faster-decoder.cc
+++ b/src/decoder/lattice-faster-decoder.cc
@@ -229,24 +229,17 @@ void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
 
   extra_cost is used in pruning tokens, to save memory.
 
-  Define the 'forward cost' of a token as zero for any token on the frame
-  we're currently decoding; and for other frames, as the shortest-path cost
-  between that token and a token on the frame we're currently decoding.
-  (by "currently decoding" I mean the most recently processed frame).
-
-  Then define the extra_cost of a token (always >= 0) as the forward-cost of
-  the token minus the smallest forward-cost of any token on the same frame.
+  extra_cost can be thought of as a beta (backward) cost assuming
+  we had set the betas on currently-active tokens to all be the negative
+  of the alphas for those tokens.  (So all currently active tokens would
+  be on (tied) best paths).
 
   We can use the extra_cost to accurately prune away tokens that we know will
   never appear in the lattice.  If the extra_cost is greater than the desired
   lattice beam, the token would provably never appear in the lattice, so we can
   prune away the token.
 
-  The advantage of storing the extra_cost rather than the forward-cost, is that
-  it is less costly to keep the extra_cost up-to-date when we process new frames.
-  When we process a new frame, *all* the previous frames' forward-costs would change;
-  but in general the extra_cost will change only for a finite number of frames.
-  (Actually we don't update all the extra_costs every time we update a frame; we
+  (Note: we don't update all the extra_costs every time we update a frame; we
   only do it every 'config_.prune_interval' frames).
  */
 
@@ -789,7 +782,7 @@ BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
               graph_cost = arc.weight.Value(),
               cur_cost = tok->tot_cost,
               tot_cost = cur_cost + ac_cost + graph_cost;
-          if (tot_cost > next_cutoff) continue;
+          if (tot_cost >= next_cutoff) continue;
           else if (tot_cost + adaptive_beam < next_cutoff)
             next_cutoff = tot_cost + adaptive_beam; // prune by best current token
           // Note: the frame indexes into active_toks_ are one-based,
@@ -859,7 +852,7 @@ void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
     StateId state = e->key;
     Token *tok = e->val;  // would segfault if e is a NULL pointer but this can't happen.
     BaseFloat cur_cost = tok->tot_cost;
-    if (cur_cost > cutoff) // Don't bother processing successors.
+    if (cur_cost >= cutoff) // Don't bother processing successors.
       continue;
     // If "tok" has any existing forward links, delete them,
     // because we're about to regenerate them.  This is a kind
diff --git a/src/decoder/lattice-faster-decoder.h b/src/decoder/lattice-faster-decoder.h
index e0cf7dea8d6..57cbe5fe178 100644
--- a/src/decoder/lattice-faster-decoder.h
+++ b/src/decoder/lattice-faster-decoder.h
@@ -43,11 +43,13 @@ struct LatticeFasterDecoderConfig {
   int32 prune_interval;
   bool determinize_lattice; // not inspected by this class... used in
                             // command-line program.
-  BaseFloat beam_delta; // has nothing to do with beam_ratio
+  BaseFloat beam_delta;
   BaseFloat hash_ratio;
-  BaseFloat prune_scale;   // Note: we don't make this configurable on the command line,
-                           // it's not a very important parameter.  It affects the
-                           // algorithm that prunes the tokens as we go.
+  // Note: we don't make prune_scale configurable on the command line, it's not
+  // a very important parameter.  It affects the algorithm that prunes the
+  // tokens as we go.
+  BaseFloat prune_scale;
+
   // Most of the options inside det_opts are not actually queried by the
   // LatticeFasterDecoder class itself, but by the code that calls it, for
   // example in the function DecodeUtteranceLatticeFaster.
@@ -316,15 +318,10 @@ class LatticeFasterDecoderTpl {
   /// This function may be optionally called after AdvanceDecoding(), when you
   /// do not plan to decode any further.  It does an extra pruning step that
   /// will help to prune the lattices output by GetLattice and (particularly)
-  /// GetRawLattice more accurately, particularly toward the end of the
-  /// utterance.  It does this by using the final-probs in pruning (if any
-  /// final-state survived); it also does a final pruning step that visits all
-  /// states (the pruning that is done during decoding may fail to prune states
-  /// that are within kPruningScale = 0.1 outside of the beam).  If you call
-  /// this, you cannot call AdvanceDecoding again (it will fail), and you
-  /// cannot call GetLattice() and related functions with use_final_probs =
-  /// false.
-  /// Used to be called PruneActiveTokensFinal().
+  /// GetRawLattice more completely, particularly toward the end of the
+  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
+  /// will fail), and you cannot call GetLattice() and related functions with
+  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
   void FinalizeDecoding();
 
   /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
diff --git a/src/decoder/lattice-incremental-decoder.cc b/src/decoder/lattice-incremental-decoder.cc
new file mode 100644
index 00000000000..84e905cdbb9
--- /dev/null
+++ b/src/decoder/lattice-incremental-decoder.cc
@@ -0,0 +1,1720 @@
+// decoder/lattice-incremental-decoder.cc
+
+// Copyright      2019  Zhehuai Chen,  Daniel Povey
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "decoder/lattice-incremental-decoder.h"
+#include "lat/lattice-functions.h"
+#include "base/timer.h"
+
+namespace kaldi {
+
+// instantiate this class once for each thing you have to decode.
+template <typename FST, typename Token>
+LatticeIncrementalDecoderTpl<FST, Token>::LatticeIncrementalDecoderTpl(
+    const FST &fst, const TransitionModel &trans_model,
+    const LatticeIncrementalDecoderConfig &config)
+    : fst_(&fst),
+      delete_fst_(false),
+      num_toks_(0),
+      config_(config),
+      determinizer_(trans_model, config) {
+  config.Check();
+  toks_.SetSize(1000); // just so on the first frame we do something reasonable.
+}
+
+template <typename FST, typename Token>
+LatticeIncrementalDecoderTpl<FST, Token>::LatticeIncrementalDecoderTpl(
+    const LatticeIncrementalDecoderConfig &config, FST *fst,
+    const TransitionModel &trans_model)
+    : fst_(fst),
+      delete_fst_(true),
+      num_toks_(0),
+      config_(config),
+      determinizer_(trans_model, config) {
+  config.Check();
+  toks_.SetSize(1000); // just so on the first frame we do something reasonable.
+}
+
+template <typename FST, typename Token>
+LatticeIncrementalDecoderTpl<FST, Token>::~LatticeIncrementalDecoderTpl() {
+  DeleteElems(toks_.Clear());
+  ClearActiveTokens();
+  if (delete_fst_) delete fst_;
+}
+
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::InitDecoding() {
+  // clean up from last time:
+  DeleteElems(toks_.Clear());
+  cost_offsets_.clear();
+  ClearActiveTokens();
+  warned_ = false;
+  num_toks_ = 0;
+  decoding_finalized_ = false;
+  final_costs_.clear();
+  StateId start_state = fst_->Start();
+  KALDI_ASSERT(start_state != fst::kNoStateId);
+  active_toks_.resize(1);
+  Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
+  active_toks_[0].toks = start_tok;
+  toks_.Insert(start_state, start_tok);
+  num_toks_++;
+
+  determinizer_.Init();
+  num_frames_in_lattice_ = 0;
+  token2label_map_.clear();
+  next_token_label_ = LatticeIncrementalDeterminizer::kTokenLabelOffset;
+  ProcessNonemitting(config_.beam);
+}
+
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::UpdateLatticeDeterminization() {
+  if (NumFramesDecoded() - num_frames_in_lattice_ <
+      config_.determinize_max_delay)
+    return;
+
+
+  /* Make sure the token-pruning is active.  Note: PruneActiveTokens() has
+     internal logic that prevents it from doing unnecessary work if you
+     call it and then immediately call it again. */
+  PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
+
+  int32 first = num_frames_in_lattice_ + config_.determinize_min_chunk_size,
+      last = NumFramesDecoded(),
+      fewest_tokens = std::numeric_limits<int32>::max(),
+      best_frame = -1;
+  for (int32 t = last; t >= first; t--) {
+    /* Make sure PruneActiveTokens() has computed num_toks for all these
+       frames... */
+    KALDI_ASSERT(active_toks_[t].num_toks != -1);
+    if (active_toks_[t].num_toks < fewest_tokens) {
+      //  <= because we want the latest one in case of ties.
+      fewest_tokens = active_toks_[t].num_toks;
+      best_frame = t;
+    }
+  }
+  /* OK, determinize the chunk that spans from num_frames_in_lattice_ to
+     best_frame. */
+  bool use_final_probs = false;
+  GetLattice(best_frame, use_final_probs);
+  return;
+}
+// Returns true if any kind of traceback is available (not necessarily from
+// a final state).  It should only very rarely return false; this indicates
+// an unusual search error.
+template <typename FST, typename Token>
+bool LatticeIncrementalDecoderTpl<FST, Token>::Decode(DecodableInterface *decodable) {
+  InitDecoding();
+
+  // We use 1-based indexing for frames in this decoder (if you view it in
+  // terms of features), but note that the decodable object uses zero-based
+  // numbering, which we have to correct for when we call it.
+
+  while (!decodable->IsLastFrame(NumFramesDecoded() - 1)) {
+    if (NumFramesDecoded() % config_.prune_interval == 0) {
+      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
+    }
+    UpdateLatticeDeterminization();
+
+    BaseFloat cost_cutoff = ProcessEmitting(decodable);
+    ProcessNonemitting(cost_cutoff);
+  }
+  Timer timer;
+  FinalizeDecoding();
+  bool use_final_probs = true;
+  GetLattice(NumFramesDecoded(), use_final_probs);
+  KALDI_VLOG(2) << "Delay time during and after FinalizeDecoding()"
+                << "(secs): " << timer.Elapsed();
+
+  // Returns true if we have any kind of traceback available (not necessarily
+  // to the end state; query ReachedFinal() for that).
+  return !active_toks_.empty() && active_toks_.back().toks != NULL;
+}
+
+
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
+  size_t new_sz =
+      static_cast<size_t>(static_cast<BaseFloat>(num_toks) * config_.hash_ratio);
+  if (new_sz > toks_.Size()) {
+    toks_.SetSize(new_sz);
+  }
+}
+
+/*
+  A note on the definition of extra_cost.
+
+  extra_cost is used in pruning tokens, to save memory.
+
+  extra_cost can be thought of as a beta (backward) cost assuming
+  we had set the betas on currently-active tokens to all be the negative
+  of the alphas for those tokens.  (So all currently active tokens would
+  be on (tied) best paths).
+
+
+  Define the 'forward cost' of a token as zero for any token on the frame
+  we're currently decoding; and for other frames, as the shortest-path cost
+  between that token and a token on the frame we're currently decoding.
+  (by "currently decoding" I mean the most recently processed frame).
+
+  Then define the extra_cost of a token (always >= 0) as the forward-cost of
+  the token minus the smallest forward-cost of any token on the same frame.
+
+  We can use the extra_cost to accurately prune away tokens that we know will
+  never appear in the lattice.  If the extra_cost is greater than the desired
+  lattice beam, the token would provably never appear in the lattice, so we can
+  prune away the token.
+
+  The advantage of storing the extra_cost rather than the forward-cost, is that
+  it is less costly to keep the extra_cost up-to-date when we process new frames.
+  When we process a new frame, *all* the previous frames' forward-costs would change;
+  but in general the extra_cost will change only for a finite number of frames.
+  (Actually we don't update all the extra_costs every time we update a frame; we
+  only do it every 'config_.prune_interval' frames).
+ */
+
+// FindOrAddToken either locates a token in hash of toks_,
+// or if necessary inserts a new, empty token (i.e. with no forward links)
+// for the current frame.  [note: it's inserted if necessary into hash toks_
+// and also into the singly linked list of tokens active on this frame
+// (whose head is at active_toks_[frame]).
+template <typename FST, typename Token>
+inline Token *LatticeIncrementalDecoderTpl<FST, Token>::FindOrAddToken(
+    StateId state, int32 frame_plus_one, BaseFloat tot_cost, Token *backpointer,
+    bool *changed) {
+  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
+  // if the token was newly created or the cost changed.
+  KALDI_ASSERT(frame_plus_one < active_toks_.size());
+  Token *&toks = active_toks_[frame_plus_one].toks;
+  Elem *e_found = toks_.Find(state);
+  if (e_found == NULL) { // no such token presently.
+    const BaseFloat extra_cost = 0.0;
+    // tokens on the currently final frame have zero extra_cost
+    // as any of them could end up
+    // on the winning path.
+    Token *new_tok = new Token(tot_cost, extra_cost, NULL, toks, backpointer);
+    // NULL: no forward links yet
+    toks = new_tok;
+    num_toks_++;
+    toks_.Insert(state, new_tok);
+    if (changed) *changed = true;
+    return new_tok;
+  } else {
+    Token *tok = e_found->val;      // There is an existing Token for this state.
+    if (tok->tot_cost > tot_cost) { // replace old token
+      tok->tot_cost = tot_cost;
+      // SetBackpointer() just does tok->backpointer = backpointer in
+      // the case where Token == BackpointerToken, else nothing.
+      tok->SetBackpointer(backpointer);
+      // we don't allocate a new token, the old stays linked in active_toks_
+      // we only replace the tot_cost
+      // in the current frame, there are no forward links (and no extra_cost)
+      // only in ProcessNonemitting we have to delete forward links
+      // in case we visit a state for the second time
+      // those forward links, that lead to this replaced token before:
+      // they remain and will hopefully be pruned later (PruneForwardLinks...)
+      if (changed) *changed = true;
+    } else {
+      if (changed) *changed = false;
+    }
+    return tok;
+  }
+}
+
+// prunes outgoing links for all tokens in active_toks_[frame]
+// it's called by PruneActiveTokens
+// all links, that have link_extra_cost > lattice_beam are pruned
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::PruneForwardLinks(
+    int32 frame_plus_one, bool *extra_costs_changed, bool *links_pruned,
+    BaseFloat delta) {
+  // delta is the amount by which the extra_costs must change
+  // If delta is larger,  we'll tend to go back less far
+  //    toward the beginning of the file.
+  // extra_costs_changed is set to true if extra_cost was changed for any token
+  // links_pruned is set to true if any link in any token was pruned
+
+  *extra_costs_changed = false;
+  *links_pruned = false;
+  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
+  if (active_toks_[frame_plus_one].toks == NULL) { // empty list; should not happen.
+    if (!warned_) {
+      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
+                    "time only for each utterance\n";
+      warned_ = true;
+    }
+  }
+
+  // We have to iterate until there is no more change, because the links
+  // are not guaranteed to be in topological order.
+  bool changed = true; // difference new minus old extra cost >= delta ?
+  while (changed) {
+    changed = false;
+    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
+         tok = tok->next) {
+      ForwardLinkT *link, *prev_link = NULL;
+      // will recompute tok_extra_cost for tok.
+      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
+      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
+      for (link = tok->links; link != NULL;) {
+        // See if we need to excise this link...
+        Token *next_tok = link->next_tok;
+        BaseFloat link_extra_cost =
+            next_tok->extra_cost +
+            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
+             next_tok->tot_cost); // difference in brackets is >= 0
+        // link_exta_cost is the difference in score between the best paths
+        // through link source state and through link destination state
+        KALDI_ASSERT(link_extra_cost == link_extra_cost); // check for NaN
+        if (link_extra_cost > config_.lattice_beam) {     // excise link
+          ForwardLinkT *next_link = link->next;
+          if (prev_link != NULL)
+            prev_link->next = next_link;
+          else
+            tok->links = next_link;
+          delete link;
+          link = next_link; // advance link but leave prev_link the same.
+          *links_pruned = true;
+        } else { // keep the link and update the tok_extra_cost if needed.
+          if (link_extra_cost < 0.0) { // this is just a precaution.
+            if (link_extra_cost < -0.01)
+              KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
+            link_extra_cost = 0.0;
+          }
+          if (link_extra_cost < tok_extra_cost) tok_extra_cost = link_extra_cost;
+          prev_link = link; // move to next link
+          link = link->next;
+        }
+      } // for all outgoing links
+      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
+        changed = true; // difference new minus old is bigger than delta
+      tok->extra_cost = tok_extra_cost;
+      // will be +infinity or <= lattice_beam_.
+      // infinity indicates, that no forward link survived pruning
+    } // for all Token on active_toks_[frame]
+    if (changed) *extra_costs_changed = true;
+
+    // Note: it's theoretically possible that aggressive compiler
+    // optimizations could cause an infinite loop here for small delta and
+    // high-dynamic-range scores.
+  } // while changed
+}
+
+// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
+// on the final frame.  If there are final tokens active, it uses
+// the final-probs for pruning, otherwise it treats all tokens as final.
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
+  KALDI_ASSERT(!active_toks_.empty());
+  int32 frame_plus_one = active_toks_.size() - 1;
+
+  if (active_toks_[frame_plus_one].toks == NULL) // empty list; should not happen.
+    KALDI_WARN << "No tokens alive at end of file";
+
+  typedef typename unordered_map<Token *, BaseFloat>::const_iterator IterType;
+  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
+  decoding_finalized_ = true;
+  // We call DeleteElems() as a nicety, not because it's really necessary;
+  // otherwise there would be a time, after calling PruneTokensForFrame() on the
+  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
+  // to nonexistent tokens.
+  DeleteElems(toks_.Clear());
+
+  // Now go through tokens on this frame, pruning forward links...  may have to
+  // iterate a few times until there is no more change, because the list is not
+  // in topological order.  This is a modified version of the code in
+  // PruneForwardLinks, but here we also take account of the final-probs.
+  bool changed = true;
+  BaseFloat delta = 1.0e-05;
+  while (changed) {
+    changed = false;
+    for (Token *tok = active_toks_[frame_plus_one].toks; tok != NULL;
+         tok = tok->next) {
+      ForwardLinkT *link, *prev_link = NULL;
+      // will recompute tok_extra_cost.  It has a term in it that corresponds
+      // to the "final-prob", so instead of initializing tok_extra_cost to infinity
+      // below we set it to the difference between the (score+final_prob) of this
+      // token,
+      // and the best such (score+final_prob).
+      BaseFloat final_cost;
+      if (final_costs_.empty()) {
+        final_cost = 0.0;
+      } else {
+        IterType iter = final_costs_.find(tok);
+        if (iter != final_costs_.end())
+          final_cost = iter->second;
+        else
+          final_cost = std::numeric_limits<BaseFloat>::infinity();
+      }
+      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
+      // tok_extra_cost will be a "min" over either directly being final, or
+      // being indirectly final through other links, and the loop below may
+      // decrease its value:
+      for (link = tok->links; link != NULL;) {
+        // See if we need to excise this link...
+        Token *next_tok = link->next_tok;
+        BaseFloat link_extra_cost =
+            next_tok->extra_cost +
+            ((tok->tot_cost + link->acoustic_cost + link->graph_cost) -
+             next_tok->tot_cost);
+        if (link_extra_cost > config_.lattice_beam) { // excise link
+          ForwardLinkT *next_link = link->next;
+          if (prev_link != NULL)
+            prev_link->next = next_link;
+          else
+            tok->links = next_link;
+          delete link;
+          link = next_link; // advance link but leave prev_link the same.
+        } else {            // keep the link and update the tok_extra_cost if needed.
+          if (link_extra_cost < 0.0) { // this is just a precaution.
+            if (link_extra_cost < -0.01)
+              KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
+            link_extra_cost = 0.0;
+          }
+          if (link_extra_cost < tok_extra_cost) tok_extra_cost = link_extra_cost;
+          prev_link = link;
+          link = link->next;
+        }
+      }
+      // prune away tokens worse than lattice_beam above best path.  This step
+      // was not necessary in the non-final case because then, this case
+      // showed up as having no forward links.  Here, the tok_extra_cost has
+      // an extra component relating to the final-prob.
+      if (tok_extra_cost > config_.lattice_beam)
+        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
+      // to be pruned in PruneTokensForFrame
+
+      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta)) changed = true;
+      tok->extra_cost = tok_extra_cost; // will be +infinity or <= lattice_beam_.
+    }
+  } // while changed
+}
+
+template <typename FST, typename Token>
+BaseFloat LatticeIncrementalDecoderTpl<FST, Token>::FinalRelativeCost() const {
+  BaseFloat relative_cost;
+  ComputeFinalCosts(NULL, &relative_cost, NULL);
+  return relative_cost;
+}
+
+// Prune away any tokens on this frame that have no forward links.
+// [we don't do this in PruneForwardLinks because it would give us
+// a problem with dangling pointers].
+// It's called by PruneActiveTokens if any forward links have been pruned
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::PruneTokensForFrame(
+    int32 frame_plus_one) {
+  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
+  Token *&toks = active_toks_[frame_plus_one].toks;
+  if (toks == NULL) KALDI_WARN << "No tokens alive [doing pruning]";
+  Token *tok, *next_tok, *prev_tok = NULL;
+  int32 num_toks = 0;
+  for (tok = toks; tok != NULL; tok = next_tok, num_toks++) {
+    next_tok = tok->next;
+    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
+      // token is unreachable from end of graph; (no forward links survived)
+      // excise tok from list and delete tok.
+      if (prev_tok != NULL)
+        prev_tok->next = tok->next;
+      else
+        toks = tok->next;
+      delete tok;
+      num_toks_--;
+    } else { // fetch next Token
+      prev_tok = tok;
+    }
+  }
+  active_toks_[frame_plus_one].num_toks = num_toks;
+}
+
+// Go backwards through still-alive tokens, pruning them, starting not from
+// the current frame (where we want to keep all tokens) but from the frame before
+// that.  We go backwards through the frames and stop when we reach a point
+// where the delta-costs are not changing (and the delta controls when we consider
+// a cost to have "not changed").
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
+  int32 cur_frame_plus_one = NumFramesDecoded();
+  int32 num_toks_begin = num_toks_;
+
+  if (active_toks_[cur_frame_plus_one].num_toks == -1){
+    // The current frame's tokens don't get pruned so they don't get counted
+    // (the count is needed by the incremental determinization code).
+    // Fix this.
+    int this_frame_num_toks = 0;
+    for (Token *t = active_toks_[cur_frame_plus_one].toks; t != NULL; t = t->next)
+      this_frame_num_toks++;
+    active_toks_[cur_frame_plus_one].num_toks = this_frame_num_toks;
+ }
+
+  // The index "f" below represents a "frame plus one", i.e. you'd have to subtract
+  // one to get the corresponding index for the decodable object.
+  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
+    // Reason why we need to prune forward links in this situation:
+    // (1) we have never pruned them (new TokenList)
+    // (2) we have not yet pruned the forward links to the next f,
+    // after any of those tokens have changed their extra_cost.
+    if (active_toks_[f].must_prune_forward_links) {
+      bool extra_costs_changed = false, links_pruned = false;
+      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
+      if (extra_costs_changed && f > 0) // any token has changed extra_cost
+        active_toks_[f - 1].must_prune_forward_links = true;
+      if (links_pruned) // any link was pruned
+        active_toks_[f].must_prune_tokens = true;
+      active_toks_[f].must_prune_forward_links = false; // job done
+    }
+    if (f + 1 < cur_frame_plus_one && // except for last f (no forward links)
+        active_toks_[f + 1].must_prune_tokens) {
+      PruneTokensForFrame(f + 1);
+      active_toks_[f + 1].must_prune_tokens = false;
+    }
+  }
+  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin
+                << " to " << num_toks_;
+}
+
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::ComputeFinalCosts(
+    unordered_map<Token *, BaseFloat> *final_costs, BaseFloat *final_relative_cost,
+    BaseFloat *final_best_cost) const {
+  if (decoding_finalized_) {
+    // If we finalized decoding, the list toks_ will no longer exist, so return
+    // something we already computed.
+    if (final_costs) *final_costs = final_costs_;
+    if (final_relative_cost) *final_relative_cost = final_relative_cost_;
+    if (final_best_cost) *final_best_cost = final_best_cost_;
+    return;
+  }
+  if (final_costs != NULL) final_costs->clear();
+  const Elem *final_toks = toks_.GetList();
+  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
+  BaseFloat best_cost = infinity, best_cost_with_final = infinity;
+
+  while (final_toks != NULL) {
+    StateId state = final_toks->key;
+    Token *tok = final_toks->val;
+    const Elem *next = final_toks->tail;
+    BaseFloat final_cost = fst_->Final(state).Value();
+    BaseFloat cost = tok->tot_cost, cost_with_final = cost + final_cost;
+    best_cost = std::min(cost, best_cost);
+    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
+    if (final_costs != NULL && final_cost != infinity)
+      (*final_costs)[tok] = final_cost;
+    final_toks = next;
+  }
+  if (final_relative_cost != NULL) {
+    if (best_cost == infinity && best_cost_with_final == infinity) {
+      // Likely this will only happen if there are no tokens surviving.
+      // This seems the least bad way to handle it.
+      *final_relative_cost = infinity;
+    } else {
+      *final_relative_cost = best_cost_with_final - best_cost;
+    }
+  }
+  if (final_best_cost != NULL) {
+    if (best_cost_with_final != infinity) { // final-state exists.
+      *final_best_cost = best_cost_with_final;
+    } else { // no final-state exists.
+      *final_best_cost = best_cost;
+    }
+  }
+}
+
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::AdvanceDecoding(
+    DecodableInterface *decodable, int32 max_num_frames) {
+  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
+    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
+    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
+    // function after casting *this to the more specific type.
+    if (fst_->Type() == "const") {
+      LatticeIncrementalDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
+          reinterpret_cast<
+              LatticeIncrementalDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *>(
+              this);
+      this_cast->AdvanceDecoding(decodable, max_num_frames);
+      return;
+    } else if (fst_->Type() == "vector") {
+      LatticeIncrementalDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
+          reinterpret_cast<
+              LatticeIncrementalDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *>(
+              this);
+      this_cast->AdvanceDecoding(decodable, max_num_frames);
+      return;
+    }
+  }
+
+  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
+               "You must call InitDecoding() before AdvanceDecoding");
+  int32 num_frames_ready = decodable->NumFramesReady();
+  // num_frames_ready must be >= num_frames_decoded, or else
+  // the number of frames ready must have decreased (which doesn't
+  // make sense) or the decodable object changed between calls
+  // (which isn't allowed).
+  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
+  int32 target_frames_decoded = num_frames_ready;
+  if (max_num_frames >= 0)
+    target_frames_decoded =
+        std::min(target_frames_decoded, NumFramesDecoded() + max_num_frames);
+  while (NumFramesDecoded() < target_frames_decoded) {
+    if (NumFramesDecoded() % config_.prune_interval == 0) {
+      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
+    }
+    BaseFloat cost_cutoff = ProcessEmitting(decodable);
+    ProcessNonemitting(cost_cutoff);
+  }
+  UpdateLatticeDeterminization();
+}
+
+// FinalizeDecoding() is a version of PruneActiveTokens that we call
+// (optionally) on the final frame.  Takes into account the final-prob of
+// tokens.  This function used to be called PruneActiveTokensFinal().
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::FinalizeDecoding() {
+  int32 final_frame_plus_one = NumFramesDecoded();
+  int32 num_toks_begin = num_toks_;
+  // PruneForwardLinksFinal() prunes the final frame (with final-probs), and
+  // sets decoding_finalized_.
+  PruneForwardLinksFinal();
+  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
+    bool b1, b2;              // values not used.
+    BaseFloat dontcare = 0.0; // delta of zero means we must always update
+    PruneForwardLinks(f, &b1, &b2, dontcare);
+    PruneTokensForFrame(f + 1);
+  }
+  PruneTokensForFrame(0);
+  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin << " to " << num_toks_;
+}
+
+/// Gets the weight cutoff.  Also counts the active tokens.
+template <typename FST, typename Token>
+BaseFloat LatticeIncrementalDecoderTpl<FST, Token>::GetCutoff(
+    Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam, Elem **best_elem) {
+  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
+  // positive == high cost == bad.
+  size_t count = 0;
+  if (config_.max_active == std::numeric_limits<int32>::max() &&
+      config_.min_active == 0) {
+    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
+      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
+      if (w < best_weight) {
+        best_weight = w;
+        if (best_elem) *best_elem = e;
+      }
+    }
+    if (tok_count != NULL) *tok_count = count;
+    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
+    return best_weight + config_.beam;
+  } else {
+    tmp_array_.clear();
+    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
+      BaseFloat w = e->val->tot_cost;
+      tmp_array_.push_back(w);
+      if (w < best_weight) {
+        best_weight = w;
+        if (best_elem) *best_elem = e;
+      }
+    }
+    if (tok_count != NULL) *tok_count = count;
+
+    BaseFloat beam_cutoff = best_weight + config_.beam,
+              min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
+              max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
+
+    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
+                  << " is " << tmp_array_.size();
+
+    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
+      std::nth_element(tmp_array_.begin(), tmp_array_.begin() + config_.max_active,
+                       tmp_array_.end());
+      max_active_cutoff = tmp_array_[config_.max_active];
+    }
+    if (max_active_cutoff < beam_cutoff) { // max_active is tighter than beam.
+      if (adaptive_beam)
+        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
+      return max_active_cutoff;
+    }
+    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
+      if (config_.min_active == 0)
+        min_active_cutoff = best_weight;
+      else {
+        std::nth_element(tmp_array_.begin(), tmp_array_.begin() + config_.min_active,
+                         tmp_array_.size() > static_cast<size_t>(config_.max_active)
+                             ? tmp_array_.begin() + config_.max_active
+                             : tmp_array_.end());
+        min_active_cutoff = tmp_array_[config_.min_active];
+      }
+    }
+    if (min_active_cutoff > beam_cutoff) { // min_active is looser than beam.
+      if (adaptive_beam)
+        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
+      return min_active_cutoff;
+    } else {
+      *adaptive_beam = config_.beam;
+      return beam_cutoff;
+    }
+  }
+}
+
+template <typename FST, typename Token>
+BaseFloat LatticeIncrementalDecoderTpl<FST, Token>::ProcessEmitting(
+    DecodableInterface *decodable) {
+  KALDI_ASSERT(active_toks_.size() > 0);
+  int32 frame = active_toks_.size() - 1; // frame is the frame-index
+                                         // (zero-based) used to get likelihoods
+                                         // from the decodable object.
+  active_toks_.resize(active_toks_.size() + 1);
+
+  Elem *final_toks = toks_.Clear(); // analogous to swapping prev_toks_ / cur_toks_
+                                    // in simple-decoder.h.   Removes the Elems from
+                                    // being indexed in the hash in toks_.
+  Elem *best_elem = NULL;
+  BaseFloat adaptive_beam;
+  size_t tok_cnt;
+  BaseFloat cur_cutoff = GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
+  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
+                << adaptive_beam;
+
+  PossiblyResizeHash(tok_cnt); // This makes sure the hash is always big enough.
+
+  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
+  // pruning "online" before having seen all tokens
+
+  BaseFloat cost_offset = 0.0; // Used to keep probabilities in a good
+                               // dynamic range.
+
+  // First process the best token to get a hopefully
+  // reasonably tight bound on the next cutoff.  The only
+  // products of the next block are "next_cutoff" and "cost_offset".
+  if (best_elem) {
+    StateId state = best_elem->key;
+    Token *tok = best_elem->val;
+    cost_offset = -tok->tot_cost;
+    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done(); aiter.Next()) {
+      const Arc &arc = aiter.Value();
+      if (arc.ilabel != 0) { // propagate..
+        BaseFloat new_weight = arc.weight.Value() + cost_offset -
+                               decodable->LogLikelihood(frame, arc.ilabel) +
+                               tok->tot_cost;
+        if (new_weight + adaptive_beam < next_cutoff)
+          next_cutoff = new_weight + adaptive_beam;
+      }
+    }
+  }
+
+  // Store the offset on the acoustic likelihoods that we're applying.
+  // Could just do cost_offsets_.push_back(cost_offset), but we
+  // do it this way as it's more robust to future code changes.
+  cost_offsets_.resize(frame + 1, 0.0);
+  cost_offsets_[frame] = cost_offset;
+
+  // the tokens are now owned here, in final_toks, and the hash is empty.
+  // 'owned' is a complex thing here; the point is we need to call DeleteElem
+  // on each elem 'e' to let toks_ know we're done with them.
+  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
+    // loop this way because we delete "e" as we go.
+    StateId state = e->key;
+    Token *tok = e->val;
+    if (tok->tot_cost <= cur_cutoff) {
+      for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done(); aiter.Next()) {
+        const Arc &arc = aiter.Value();
+        if (arc.ilabel != 0) { // propagate..
+          BaseFloat ac_cost =
+                        cost_offset - decodable->LogLikelihood(frame, arc.ilabel),
+                    graph_cost = arc.weight.Value(), cur_cost = tok->tot_cost,
+                    tot_cost = cur_cost + ac_cost + graph_cost;
+          if (tot_cost >= next_cutoff)
+            continue;
+          else if (tot_cost + adaptive_beam < next_cutoff)
+            next_cutoff = tot_cost + adaptive_beam; // prune by best current token
+          // Note: the frame indexes into active_toks_ are one-based,
+          // hence the + 1.
+          Token *next_tok =
+              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, NULL);
+          // NULL: no change indicator needed
+
+          // Add ForwardLink from tok to next_tok (put on head of list tok->links)
+          tok->links = new ForwardLinkT(next_tok, arc.ilabel, arc.olabel, graph_cost,
+                                        ac_cost, tok->links);
+        }
+      } // for all arcs
+    }
+    e_tail = e->tail;
+    toks_.Delete(e); // delete Elem
+  }
+  return next_cutoff;
+}
+
+// static inline
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
+  ForwardLinkT *l = tok->links, *m;
+  while (l != NULL) {
+    m = l->next;
+    delete l;
+    l = m;
+  }
+  tok->links = NULL;
+}
+
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
+  KALDI_ASSERT(!active_toks_.empty());
+  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
+  // Note: "frame" is the time-index we just processed, or -1 if
+  // we are processing the nonemitting transitions before the
+  // first frame (called from InitDecoding()).
+
+  // Processes nonemitting arcs for one frame.  Propagates within toks_.
+  // Note-- this queue structure is is not very optimal as
+  // it may cause us to process states unnecessarily (e.g. more than once),
+  // but in the baseline code, turning this vector into a set to fix this
+  // problem did not improve overall speed.
+
+  KALDI_ASSERT(queue_.empty());
+
+  if (toks_.GetList() == NULL) {
+    if (!warned_) {
+      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
+      warned_ = true;
+    }
+  }
+
+  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
+    StateId state = e->key;
+    if (fst_->NumInputEpsilons(state) != 0) queue_.push_back(state);
+  }
+
+  while (!queue_.empty()) {
+    StateId state = queue_.back();
+    queue_.pop_back();
+
+    Token *tok =
+        toks_.Find(state)
+            ->val; // would segfault if state not in toks_ but this can't happen.
+    BaseFloat cur_cost = tok->tot_cost;
+    if (cur_cost >= cutoff) // Don't bother processing successors.
+      continue;
+    // If "tok" has any existing forward links, delete them,
+    // because we're about to regenerate them.  This is a kind
+    // of non-optimality (remember, this is the simple decoder),
+    // but since most states are emitting it's not a huge issue.
+    DeleteForwardLinks(tok); // necessary when re-visiting
+    tok->links = NULL;
+    for (fst::ArcIterator<FST> aiter(*fst_, state); !aiter.Done(); aiter.Next()) {
+      const Arc &arc = aiter.Value();
+      if (arc.ilabel == 0) { // propagate nonemitting only...
+        BaseFloat graph_cost = arc.weight.Value(), tot_cost = cur_cost + graph_cost;
+        if (tot_cost < cutoff) {
+          bool changed;
+
+          Token *new_tok =
+              FindOrAddToken(arc.nextstate, frame + 1, tot_cost, tok, &changed);
+
+          tok->links =
+              new ForwardLinkT(new_tok, 0, arc.olabel, graph_cost, 0, tok->links);
+
+          // "changed" tells us whether the new token has a different
+          // cost from before, or is new [if so, add into queue].
+          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
+            queue_.push_back(arc.nextstate);
+        }
+      }
+    } // for all arcs
+  }   // while queue not empty
+}
+
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
+  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
+    e_tail = e->tail;
+    toks_.Delete(e);
+  }
+}
+
+template <typename FST, typename Token>
+void LatticeIncrementalDecoderTpl<
+    FST, Token>::ClearActiveTokens() { // a cleanup routine, at utt end/begin
+  for (size_t i = 0; i < active_toks_.size(); i++) {
+    // Delete all tokens alive on this frame, and any forward
+    // links they may have.
+    for (Token *tok = active_toks_[i].toks; tok != NULL;) {
+      DeleteForwardLinks(tok);
+      Token *next_tok = tok->next;
+      delete tok;
+      num_toks_--;
+      tok = next_tok;
+    }
+  }
+  active_toks_.clear();
+  KALDI_ASSERT(num_toks_ == 0);
+}
+
+
+template <typename FST, typename Token>
+const CompactLattice& LatticeIncrementalDecoderTpl<FST, Token>::GetLattice(
+    int32 num_frames_to_include,
+    bool use_final_probs) {
+  KALDI_ASSERT(num_frames_to_include >= num_frames_in_lattice_ &&
+               num_frames_to_include <= NumFramesDecoded());
+
+
+  if (num_frames_in_lattice_ > 0 &&
+      determinizer_.GetLattice().NumStates() == 0) {
+    /* Something went wrong, lattice is empty and will continue to be empty.
+       User-level code should detect and deal with this.
+     */
+    num_frames_in_lattice_ = num_frames_to_include;
+    return determinizer_.GetLattice();
+  }
+
+  if (decoding_finalized_ && !use_final_probs) {
+    // This is not supported
+    KALDI_ERR << "You cannot get the lattice without final-probs after "
+        "calling FinalizeDecoding().";
+  }
+  if (use_final_probs && num_frames_to_include != NumFramesDecoded()) {
+    /* This is because we only remember the relation between HCLG states and
+       Tokens for the current frame; the Token does not have a `state` field. */
+    KALDI_ERR << "use-final-probs may no be true if you are not "
+        "getting a lattice for all frames decoded so far.";
+  }
+
+
+  if (num_frames_to_include > num_frames_in_lattice_) {
+    /* Make sure the token-pruning is up to date.   If we just pruned the tokens,
+       this will do very little work. */
+    PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
+
+    if (determinizer_.GetLattice().NumStates() == 0 ||
+        determinizer_.GetLattice().Final(0) != CompactLatticeWeight::Zero()) {
+      num_frames_in_lattice_ = 0;
+      determinizer_.Init();
+    }
+
+    Lattice chunk_lat;
+
+    unordered_map<Label, LatticeArc::StateId> token_label2state;
+    if (num_frames_in_lattice_ != 0) {
+      determinizer_.InitializeRawLatticeChunk(&chunk_lat,
+                                              &token_label2state);
+    }
+
+    // tok_map will map from Token* to state-id in chunk_lat.
+    // The cur and prev versions alternate on different frames.
+    unordered_map<Token*, StateId> &tok2state_map(temp_token_map_);
+    tok2state_map.clear();
+
+    unordered_map<Token*, Label> &next_token2label_map(token2label_map_temp_);
+    next_token2label_map.clear();
+
+    { // Deal with the last frame in the chunk, the one numbered `num_frames_to_include`.
+      // (Yes, this is backwards).   We allocate token labels, and set tokens as
+      // final, but don't add any transitions.  This may leave some states
+      // disconnected (e.g. due to chains of nonemitting arcs), but it's OK; we'll
+      // fix it when we generate the next chunk of lattice.
+      int32 frame = num_frames_to_include;
+      // Allocate state-ids for all tokens on this frame.
+
+      for (Token *tok = active_toks_[frame].toks; tok != NULL; tok = tok->next) {
+        /* If we included the final-costs at this stage, they will cause
+           non-final states to be pruned out from the end of the lattice. */
+        BaseFloat final_cost;
+        {  // This block computes final_cost
+          if (decoding_finalized_) {
+            if (final_costs_.empty()) {
+              final_cost = 0.0;  /* No final-state survived, so treat all as final
+                                  * with probability One(). */
+            } else {
+              auto iter = final_costs_.find(tok);
+              if (iter == final_costs_.end())
+                final_cost = std::numeric_limits<BaseFloat>::infinity();
+              else
+                final_cost = iter->second;
+            }
+          } else {
+            /* this is a `fake` final-cost used to guide pruning.  It's as if we
+               set the betas (backward-probs) on the final frame to the
+               negatives of the corresponding alphas, so all tokens on the last
+               frae will be on a best path..  the extra_cost for each token
+               always corresponds to its alpha+beta on this assumption.  We want
+               the final_cost here to correspond to the beta (backward-prob), so
+               we get that by final_cost = extra_cost - tot_cost.
+               [The tot_cost is the forward/alpha cost.]
+            */
+            final_cost = tok->extra_cost - tok->tot_cost;
+          }
+        }
+
+        StateId state = chunk_lat.AddState();
+        tok2state_map[tok] = state;
+        if (final_cost < std::numeric_limits<BaseFloat>::infinity()) {
+          next_token2label_map[tok] = AllocateNewTokenLabel();
+          StateId token_final_state = chunk_lat.AddState();
+          LatticeArc::Label ilabel = 0,
+              olabel = (next_token2label_map[tok] = AllocateNewTokenLabel());
+          chunk_lat.AddArc(state,
+                           LatticeArc(ilabel, olabel,
+                                      LatticeWeight::One(),
+                                      token_final_state));
+          chunk_lat.SetFinal(token_final_state, LatticeWeight(final_cost, 0.0));
+        }
+      }
+    }
+
+    // Go in reverse order over the remaining frames so we can create arcs as we
+    // go, and their destination-states will already be in the map.
+    for (int32 frame = num_frames_to_include;
+         frame >= num_frames_in_lattice_; frame--) {
+      // The conditional below is needed for the last frame of the utterance.
+      BaseFloat cost_offset = (frame < cost_offsets_.size() ?
+                               cost_offsets_[frame] : 0.0);
+
+      // For the first frame of the chunk, we need to make sure the states are
+      // the ones created by InitializeRawLatticeChunk() (where not pruned away).
+      if (frame == num_frames_in_lattice_ && num_frames_in_lattice_ != 0) {
+        for (Token *tok = active_toks_[frame].toks; tok != NULL; tok = tok->next) {
+          auto iter = token2label_map_.find(tok);
+          KALDI_ASSERT(iter != token2label_map_.end());
+          Label token_label = iter->second;
+          auto iter2 = token_label2state.find(token_label);
+          if (iter2 != token_label2state.end()) {
+            StateId state = iter2->second;
+            tok2state_map[tok] = state;
+          } else {
+            // Some states may have been pruned out, but we should still allocate
+            // them.  They might have been part of chains of nonemitting arcs
+            // where the state became disconnected because the last chunk didn't
+            // include arcs starting at this frame.
+            StateId state = chunk_lat.AddState();
+            tok2state_map[tok] = state;
+          }
+        }
+      } else if (frame != num_frames_to_include) {  // We already created states
+                                                    // for the last frame.
+        for (Token *tok = active_toks_[frame].toks; tok != NULL; tok = tok->next) {
+          StateId state = chunk_lat.AddState();
+          tok2state_map[tok] = state;
+        }
+      }
+      for (Token *tok = active_toks_[frame].toks; tok != NULL; tok = tok->next) {
+        auto iter = tok2state_map.find(tok);
+        KALDI_ASSERT(iter != tok2state_map.end());
+        StateId cur_state = iter->second;
+        for (ForwardLinkT *l = tok->links; l != NULL; l = l->next) {
+          auto next_iter = tok2state_map.find(l->next_tok);
+          if (next_iter == tok2state_map.end()) {
+            // Emitting arcs from the last frame we're including -- ignore
+            // these.
+            KALDI_ASSERT(frame == num_frames_to_include);
+            continue;
+          }
+          StateId next_state = next_iter->second;
+          BaseFloat this_offset = (l->ilabel != 0 ? cost_offset : 0);
+          LatticeArc arc(l->ilabel, l->olabel,
+                         LatticeWeight(l->graph_cost, l->acoustic_cost - this_offset),
+                         next_state);
+          // Note: the epsilons get redundantly included at the end and beginning
+          // of successive chunks.  These will get removed in the determinization.
+          chunk_lat.AddArc(cur_state, arc);
+        }
+      }
+    }
+    if (num_frames_in_lattice_ == 0) {
+      // This block locates the start token.  NOTE: we use the fact that in the
+      // linked list of tokens, things are added at the head, so the start state
+      // must be at the tail.  If this data structure is changed in future, we
+      // might need to explicitly store the start token as a class member.
+      Token *tok = active_toks_[0].toks;
+      if (tok == NULL) {
+        KALDI_WARN << "No tokens exist on start frame";
+        return determinizer_.GetLattice();  // will be empty.
+      }
+      while (tok->next != NULL)
+        tok = tok->next;
+      Token *start_token = tok;
+      auto iter = tok2state_map.find(start_token);
+      KALDI_ASSERT(iter != tok2state_map.end());
+      StateId start_state = iter->second;
+      chunk_lat.SetStart(start_state);
+    }
+    token2label_map_.swap(next_token2label_map);
+
+    // bool finished_before_beam =
+    determinizer_.AcceptRawLatticeChunk(&chunk_lat);
+    // We are ignoring the return status, which say whether it finished before the beam.
+
+    num_frames_in_lattice_ = num_frames_to_include;
+
+    if (determinizer_.GetLattice().NumStates() == 0)
+      return determinizer_.GetLattice();   // Something went wrong, lattice is empty.
+  }
+
+  unordered_map<Token*, BaseFloat> token2final_cost;
+  unordered_map<Label, BaseFloat> token_label2final_cost;
+  if (use_final_probs) {
+    ComputeFinalCosts(&token2final_cost, NULL, NULL);
+    for (const auto &p: token2final_cost) {
+      Token *tok = p.first;
+      BaseFloat cost = p.second;
+      auto iter = token2label_map_.find(tok);
+      if (iter != token2label_map_.end()) {
+        /* Some tokens may not have survived the pruned determinization. */
+        Label token_label = iter->second;
+        bool ret = token_label2final_cost.insert({token_label, cost}).second;
+        KALDI_ASSERT(ret); /* Make sure it was inserted. */
+      }
+    }
+  }
+  /* Note: these final-probs won't affect the next chunk, only the lattice
+     returned from GetLattice().  They are kind of temporaries. */
+  determinizer_.SetFinalCosts(token_label2final_cost.empty() ? NULL :
+                              &token_label2final_cost);
+
+  return determinizer_.GetLattice();
+}
+
+
+template <typename FST, typename Token>
+int32 LatticeIncrementalDecoderTpl<FST, Token>::GetNumToksForFrame(int32 frame) {
+  int32 r = 0;
+  for (Token *tok = active_toks_[frame].toks; tok; tok = tok->next) r++;
+  return r;
+}
+
+
+
+/* This utility function adds an arc to a Lattice, but where the source is a
+   CompactLatticeArc.  If the CompactLatticeArc has a string with length greater
+   than 1, this will require adding extra states to `lat`.
+ */
+static void AddCompactLatticeArcToLattice(
+    const CompactLatticeArc &clat_arc,
+    LatticeArc::StateId src_state,
+    Lattice *lat) {
+  const std::vector<int32> &string = clat_arc.weight.String();
+  size_t N = string.size();
+  if (N == 0) {
+    LatticeArc arc;
+    arc.ilabel = 0;
+    arc.olabel = clat_arc.ilabel;
+    arc.nextstate = clat_arc.nextstate;
+    arc.weight = clat_arc.weight.Weight();
+    lat->AddArc(src_state, arc);
+  } else {
+    LatticeArc::StateId cur_state = src_state;
+    for (size_t i = 0; i < N; i++) {
+      LatticeArc arc;
+      arc.ilabel = string[i];
+      arc.olabel = (i == 0 ? clat_arc.ilabel : 0);
+      arc.nextstate = (i + 1 == N ? clat_arc.nextstate : lat->AddState());
+      arc.weight = (i == 0 ? clat_arc.weight.Weight() : LatticeWeight::One());
+      lat->AddArc(cur_state, arc);
+      cur_state = arc.nextstate;
+    }
+  }
+}
+
+
+void LatticeIncrementalDeterminizer::Init() {
+  non_final_redet_states_.clear();
+  clat_.DeleteStates();
+  final_arcs_.clear();
+  forward_costs_.clear();
+  arcs_in_.clear();
+}
+
+CompactLattice::StateId LatticeIncrementalDeterminizer::AddStateToClat() {
+  CompactLattice::StateId ans = clat_.AddState();
+  forward_costs_.push_back(std::numeric_limits<BaseFloat>::infinity());
+  KALDI_ASSERT(forward_costs_.size() == ans + 1);
+  arcs_in_.resize(ans + 1);
+  return ans;
+}
+
+void LatticeIncrementalDeterminizer::AddArcToClat(
+    CompactLattice::StateId state,
+    const CompactLatticeArc &arc) {
+  BaseFloat forward_cost = forward_costs_[state] +
+      ConvertToCost(arc.weight);
+  if (forward_cost == std::numeric_limits<BaseFloat>::infinity())
+    return;
+  int32 arc_idx = clat_.NumArcs(state);
+  clat_.AddArc(state, arc);
+  arcs_in_[arc.nextstate].push_back({state, arc_idx});
+  if (forward_cost < forward_costs_[arc.nextstate])
+    forward_costs_[arc.nextstate] = forward_cost;
+}
+
+// See documentation in header
+void LatticeIncrementalDeterminizer::IdentifyTokenFinalStates(
+    const CompactLattice &chunk_clat,
+    std::unordered_map<CompactLattice::StateId, CompactLatticeArc::Label> *token_map) const {
+  token_map->clear();
+  using StateId = CompactLattice::StateId;
+  using Label = CompactLatticeArc::Label;
+
+  StateId num_states = chunk_clat.NumStates();
+  for (StateId state = 0; state < num_states; state++) {
+    for (fst::ArcIterator<CompactLattice> aiter(chunk_clat, state);
+       !aiter.Done(); aiter.Next()) {
+      const CompactLatticeArc &arc = aiter.Value();
+      if (arc.olabel >= kTokenLabelOffset && arc.olabel < kMaxTokenLabel) {
+        StateId nextstate = arc.nextstate;
+        auto r = token_map->insert({nextstate, arc.olabel});
+        // Check consistency of labels on incoming arcs
+        KALDI_ASSERT(r.first->second == arc.olabel);
+      }
+    }
+  }
+}
+
+
+
+
+void LatticeIncrementalDeterminizer::GetNonFinalRedetStates() {
+  using StateId = CompactLattice::StateId;
+  non_final_redet_states_.clear();
+  non_final_redet_states_.reserve(final_arcs_.size());
+
+  std::vector<StateId> state_queue;
+  for (const CompactLatticeArc &arc: final_arcs_) {
+    // Note: we abuse the .nextstate field to store the state which is really
+    // the source of that arc.
+    StateId redet_state = arc.nextstate;
+    if (forward_costs_[redet_state] != std::numeric_limits<BaseFloat>::infinity()) {
+      // if it is accessible..
+      if (non_final_redet_states_.insert(redet_state).second) {
+        // it was not already there
+        state_queue.push_back(redet_state);
+      }
+    }
+  }
+  // Add any states that are reachable from the states above.
+  while (!state_queue.empty()) {
+    StateId s = state_queue.back();
+    state_queue.pop_back();
+    for (fst::ArcIterator<CompactLattice> aiter(clat_, s); !aiter.Done();
+         aiter.Next()) {
+      const CompactLatticeArc &arc = aiter.Value();
+      StateId nextstate = arc.nextstate;
+      if (non_final_redet_states_.insert(nextstate).second)
+        state_queue.push_back(nextstate); // it was not already there
+    }
+  }
+}
+
+
+void LatticeIncrementalDeterminizer::InitializeRawLatticeChunk(
+    Lattice *olat,
+    unordered_map<Label, LatticeArc::StateId> *token_label2state) {
+  using namespace fst;
+
+  olat->DeleteStates();
+  LatticeArc::StateId start_state = olat->AddState();
+  olat->SetStart(start_state);
+  token_label2state->clear();
+
+  // redet_state_map maps from state-ids in clat_ to state-ids in olat.  This
+  // will be the set of states from which the arcs to final-states in the
+  // canonical appended lattice leave (physically, these are in the .nextstate
+  // elements of arcs_, since we use that field for the source state), plus any
+  // states reachable from those states.
+  unordered_map<CompactLattice::StateId, LatticeArc::StateId> redet_state_map;
+
+  for (CompactLattice::StateId redet_state: non_final_redet_states_)
+    redet_state_map[redet_state] = olat->AddState();
+
+  // First, process any arcs leaving the non-final redeterminized states that
+  // are not to final-states.  (What we mean by "not to final states" is, not to
+  // stats that are final in the `canonical appended lattice`.. they may
+  // actually be physically final in clat_, because we make clat_ what we want
+  // to return to the user.
+  for (CompactLattice::StateId redet_state: non_final_redet_states_) {
+    LatticeArc::StateId lat_state = redet_state_map[redet_state];
+
+    for (ArcIterator<CompactLattice> aiter(clat_, redet_state);
+         !aiter.Done(); aiter.Next()) {
+      const CompactLatticeArc &arc = aiter.Value();
+      CompactLattice::StateId nextstate = arc.nextstate;
+      LatticeArc::StateId lat_nextstate = olat->NumStates();
+      auto r = redet_state_map.insert({nextstate, lat_nextstate});
+      if (r.second) {  // Was inserted.
+        LatticeArc::StateId s = olat->AddState();
+        KALDI_ASSERT(s == lat_nextstate);
+      } else {
+        // was not inserted -> was already there.
+        lat_nextstate = r.first->second;
+      }
+      CompactLatticeArc clat_arc(arc);
+      clat_arc.nextstate = lat_nextstate;
+      AddCompactLatticeArcToLattice(clat_arc, lat_state, olat);
+    }
+    clat_.DeleteArcs(redet_state);
+    clat_.SetFinal(redet_state, CompactLatticeWeight::Zero());
+  }
+
+  for (const CompactLatticeArc &arc: final_arcs_) {
+    // We abuse the `nextstate` field to store the source state.
+    CompactLattice::StateId src_state = arc.nextstate;
+    auto iter = redet_state_map.find(src_state);
+    if (forward_costs_[src_state] == std::numeric_limits<BaseFloat>::infinity())
+      continue;  /* Unreachable state */
+    KALDI_ASSERT(iter != redet_state_map.end());
+    LatticeArc::StateId src_lat_state = iter->second;
+    Label token_label = arc.ilabel;  // will be == arc.olabel.
+    KALDI_ASSERT(token_label >= kTokenLabelOffset &&
+                 token_label < kMaxTokenLabel);
+    auto r = token_label2state->insert({token_label,
+            olat->NumStates()});
+    LatticeArc::StateId dest_lat_state = r.first->second;
+    if (r.second) { // was inserted
+      LatticeArc::StateId new_state = olat->AddState();
+      KALDI_ASSERT(new_state == dest_lat_state);
+    }
+    CompactLatticeArc new_arc;
+    new_arc.nextstate = dest_lat_state;
+    /*  We convert the token-label to epsilon; it's not needed anymore. */
+    new_arc.ilabel = new_arc.olabel = 0;
+    new_arc.weight = arc.weight;
+    AddCompactLatticeArcToLattice(new_arc, src_lat_state, olat);
+  }
+
+  // Now deal with the initial-probs.  Arcs from initial-states to
+  // redeterminized-states in the raw lattice have an olabel that identifies the
+  // id of that redeterminized-state in clat_, and a cost that is derived from
+  // its entry in forward_costs_.  These forward-probs are used to get the
+  // pruned lattice determinization to behave correctly, and will be canceled
+  // out later on.
+  //
+  // In the paper this is the second-from-last bullet in Sec. 5.2.  NOTE: in the
+  // paper we state that we only include such arcs for "each redeterminized
+  // state that is either initial in det(A) or that has an arc entering it from
+  // a state that is not a redeterminized state."  In fact, we include these
+  // arcs for all redeterminized states.  I realized that it won't make a
+  // difference to the outcome, and it's easier to do it this way.
+  for (CompactLattice::StateId state_id: non_final_redet_states_) {
+    BaseFloat forward_cost = forward_costs_[state_id];
+    LatticeArc arc;
+    arc.ilabel = 0;
+    // The olabel (which appears where the word-id would) is what
+    // we call a 'state-label'.  It identifies a state in clat_.
+    arc.olabel = state_id + kStateLabelOffset;
+    // It doesn't matter what field we put forward_cost in (or whether we
+    // divide it among them both; the effect on pruning is the same, and
+    // we will cancel it out later anyway.
+    arc.weight = LatticeWeight(forward_cost, 0);
+    auto iter = redet_state_map.find(state_id);
+    KALDI_ASSERT(iter != redet_state_map.end());
+    arc.nextstate = iter->second;
+    olat->AddArc(start_state, arc);
+  }
+}
+
+void LatticeIncrementalDeterminizer::GetRawLatticeFinalCosts(
+    const Lattice &raw_fst,
+    std::unordered_map<Label, BaseFloat> *old_final_costs) {
+  LatticeArc::StateId raw_fst_num_states = raw_fst.NumStates();
+  for (LatticeArc::StateId s = 0; s < raw_fst_num_states; s++) {
+    for (fst::ArcIterator<Lattice> aiter(raw_fst, s); !aiter.Done();
+         aiter.Next()) {
+      const LatticeArc &value = aiter.Value();
+      if (value.olabel >= (Label)kTokenLabelOffset &&
+          value.olabel < (Label)kMaxTokenLabel) {
+        LatticeWeight final_weight = raw_fst.Final(value.nextstate);
+        if (final_weight != LatticeWeight::Zero() &&
+            final_weight.Value2() != 0) {
+          KALDI_ERR << "Label " << value.olabel << " from state " << s
+                    << " looks like a token-label but its next-state "
+                    << value.nextstate <<
+              " has unexpected final-weight " << final_weight.Value1() << ','
+                    << final_weight.Value2();
+        }
+        auto r = old_final_costs->insert({value.olabel,
+                final_weight.Value1()});
+        if (!r.second && r.first->second != final_weight.Value1()) {
+          // For any given token-label, all arcs in raw_fst with that
+          // olabel should go to the same state, so this should be
+          // impossible.
+          KALDI_ERR << "Unexpected mismatch in final-costs for tokens, "
+                    << r.first->second << " vs " << final_weight.Value1();
+        }
+      }
+    }
+  }
+}
+
+
+bool LatticeIncrementalDeterminizer::ProcessArcsFromChunkStartState(
+    const CompactLattice &chunk_clat,
+    std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> *state_map) {
+  using StateId = CompactLattice::StateId;
+  StateId clat_num_states = clat_.NumStates();
+
+  // Process arcs leaving the start state of chunk_clat.  These arcs will have
+  // state-labels on them (unless this is the first chunk).
+  // For destination-states of those arcs, work out which states in
+  // clat_ they correspond to and update their forward_costs.
+  for (fst::ArcIterator<CompactLattice> aiter(chunk_clat, chunk_clat.Start());
+       !aiter.Done(); aiter.Next()) {
+    const CompactLatticeArc &arc = aiter.Value();
+    Label label = arc.ilabel;  // ilabel == olabel; would be the olabel
+                               // in a Lattice.
+    if (!(label >= kStateLabelOffset &&
+          label - kStateLabelOffset < clat_num_states)) {
+      // The label was not a state-label.  This should only be possible on the
+      // first chunk.
+      KALDI_ASSERT(state_map->empty());
+      return true;  // this is the first chunk.
+    }
+    StateId clat_state = label - kStateLabelOffset;
+    StateId chunk_state = arc.nextstate;
+    auto p = state_map->insert({chunk_state, clat_state});
+    StateId dest_clat_state = p.first->second;
+    // We deleted all its arcs in InitializeRawLatticeChunk
+    KALDI_ASSERT(clat_.NumArcs(clat_state) == 0);
+    /*
+      In almost all cases, dest_clat_state and clat_state will be the same state;
+      but there may be situations where two arcs with different state-labels
+      left the start state and entered the same next-state in chunk_clat; and in
+      these cases, they will be different.
+
+      We didn't address this issue in the paper (or actually realize it could be
+      a problem).  What we do is pick one of the clat_states as the "canonical"
+      one, and redirect all incoming transitions of the others to enter the
+      "canonical" one.  (Search below for new_in_arc.nextstate =
+      dest_clat_state).
+     */
+    if (clat_state != dest_clat_state) {
+      // Check that the start state isn't getting merged with any other state.
+      // If this were possible, we'd need to deal with it specially, but it
+      // can't be, because to be merged, 2 states must have identical arcs
+      // leaving them with identical weights, so we'd need to have another state
+      // on frame 0 identical to the start state, which is not possible if the
+      // lattice is deterministic and epsilon-free.
+      KALDI_ASSERT(clat_state != 0 && dest_clat_state != 0);
+    }
+
+    // in_weight is an extra weight that we'll include on arcs entering this
+    // state from the previous chunk.  We need to cancel out
+    // `forward_costs[clat_state]`, which was included in the corresponding arc
+    // in the raw lattice for pruning purposes; and we need to include the
+    // weight on the arc from the start-state of `chunk_clat` to this state.
+    CompactLatticeWeight extra_weight_in = arc.weight;
+    extra_weight_in.SetWeight(
+        fst::Times(extra_weight_in.Weight(),
+                   LatticeWeight(-forward_costs_[clat_state], 0.0)));
+
+    // We don't allow state 0 to be a redeterminized-state; calling code assures
+    // this.  Search for `determinizer_.GetLattice().Final(0) !=
+    // CompactLatticeWeight::Zero())` to find that calling code.
+    KALDI_ASSERT(clat_state != 0);
+
+    // Note: 0 is the start state of clat_.  This was checked.
+    forward_costs_[clat_state] = (clat_state == 0 ? 0 :
+                                  std::numeric_limits<BaseFloat>::infinity());
+    std::vector<std::pair<StateId, int32> > arcs_in;
+    arcs_in.swap(arcs_in_[clat_state]);
+    for (auto p: arcs_in) {
+      // Note: we'll be doing `continue` below if this input arc came from
+      // another redeterminized-state, because we did DeleteArcs() for them in
+      // InitializeRawLatticeChunk().  Those arcs will be transferred
+      // from chunk_clat later on.
+      CompactLattice::StateId src_state = p.first;
+      int32 arc_pos = p.second;
+
+      if (arc_pos >= (int32)clat_.NumArcs(src_state))
+        continue;
+      fst::MutableArcIterator<CompactLattice> aiter(&clat_, src_state);
+      aiter.Seek(arc_pos);
+      if (aiter.Value().nextstate != clat_state)
+        continue;  // This arc record has become invalidated.
+      CompactLatticeArc new_in_arc(aiter.Value());
+      // In most cases we will have dest_clat_state == clat_state, so the next
+      // line won't change the value of .nextstate
+      new_in_arc.nextstate = dest_clat_state;
+      new_in_arc.weight = fst::Times(new_in_arc.weight, extra_weight_in);
+      aiter.SetValue(new_in_arc);
+
+      BaseFloat new_forward_cost = forward_costs_[src_state] +
+          ConvertToCost(new_in_arc.weight);
+      if (new_forward_cost < forward_costs_[dest_clat_state])
+        forward_costs_[dest_clat_state] = new_forward_cost;
+      arcs_in_[dest_clat_state].push_back(p);
+    }
+  }
+  return false;  // this is not the first chunk.
+}
+
+void LatticeIncrementalDeterminizer::TransferArcsToClat(
+    const CompactLattice &chunk_clat,
+    bool is_first_chunk,
+    const std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> &state_map,
+    const std::unordered_map<CompactLattice::StateId, Label> &chunk_state_to_token,
+    const std::unordered_map<Label, BaseFloat> &old_final_costs) {
+  using StateId = CompactLattice::StateId;
+  StateId chunk_num_states = chunk_clat.NumStates();
+
+  // Now transfer arcs from chunk_clat to clat_.
+  for (StateId chunk_state = (is_first_chunk ? 0 : 1);
+       chunk_state < chunk_num_states; chunk_state++) {
+    auto iter = state_map.find(chunk_state);
+    if (iter == state_map.end()) {
+      KALDI_ASSERT(chunk_state_to_token.count(chunk_state) != 0);
+      // Don't process token-final states.  Anyway they have no arcs leaving
+      // them.
+      continue;
+    }
+    StateId clat_state = iter->second;
+
+    // We know that this point that `clat_state` is not a token-final state
+    // (see glossary for definition) as if it were, we would have done
+    // `continue` above.
+    //
+    // Only in the last chunk of the lattice would be there be a final-prob on
+    // states that are not `token-final states`; these final-probs would
+    // normally all be Zero() at this point.  So in almost all cases the following
+    // call will do nothing.
+    clat_.SetFinal(clat_state, chunk_clat.Final(chunk_state));
+
+    // Process arcs leaving this state.
+    for (fst::ArcIterator<CompactLattice> aiter(chunk_clat, chunk_state);
+         !aiter.Done(); aiter.Next()) {
+      CompactLatticeArc arc(aiter.Value());
+
+      auto next_iter = state_map.find(arc.nextstate);
+      if (next_iter != state_map.end()) {
+        // The normal case (when the .nextstate has a corresponding
+        // state in clat_) is very simple.  Just copy the arc over.
+        arc.nextstate = next_iter->second;
+        KALDI_ASSERT(arc.ilabel < kTokenLabelOffset ||
+                     arc.ilabel > kMaxTokenLabel);
+        AddArcToClat(clat_state, arc);
+      } else {
+        // This is the case when the arc is to a `token-final` state (see
+        // glossary.)
+
+        // TODO: remove the following slightly excessive assertion?
+        KALDI_ASSERT(chunk_clat.Final(arc.nextstate) != CompactLatticeWeight::Zero() &&
+                     arc.olabel >= (Label)kTokenLabelOffset &&
+                     arc.olabel < (Label)kMaxTokenLabel &&
+                     chunk_state_to_token.count(arc.nextstate) != 0 &&
+                     old_final_costs.count(arc.olabel) != 0);
+
+        // Include the final-cost of the next state (which should be final)
+        // in arc.weight.
+        arc.weight = fst::Times(arc.weight,
+                                chunk_clat.Final(arc.nextstate));
+
+        auto cost_iter = old_final_costs.find(arc.olabel);
+        KALDI_ASSERT(cost_iter != old_final_costs.end());
+        BaseFloat old_final_cost = cost_iter->second;
+
+        // `arc` is going to become an element of final_arcs_.  These
+        // contain information about transitions from states in clat_ to
+        // `token-final` states (i.e. states that have a token-label on the arc
+        // to them and that are final in the canonical compact lattice).
+        // We subtract the old_final_cost as it was just a temporary cost
+        // introduced for pruning purposes.
+        arc.weight.SetWeight(fst::Times(arc.weight.Weight(),
+                                        LatticeWeight{-old_final_cost, 0.0}));
+        // In a slight abuse of the Arc data structure, the nextstate is set to
+        // the source state.  The label (ilabel == olabel) indicates the
+        // token it is associated with.
+        arc.nextstate = clat_state;
+        final_arcs_.push_back(arc);
+      }
+    }
+  }
+
+}
+
+bool LatticeIncrementalDeterminizer::AcceptRawLatticeChunk(
+    Lattice *raw_fst) {
+  using Label = CompactLatticeArc::Label;
+  using StateId = CompactLattice::StateId;
+
+  // old_final_costs is a map from a `token-label` (see glossary) to the
+  // associated final-prob in a final-state of `raw_fst`, that is associated
+  // with that Token.  These are Tokens that were active at the end of the
+  // chunk.  The final-probs may arise from beta (backward) costs, introduced
+  // for pruning purposes, and/or from final-probs in HCLG.  Those costs will
+  // not be included in anything we store permamently in this class; they used
+  // only to guide pruned determinization, and we will use `old_final_costs`
+  // later to cancel them out.
+  std::unordered_map<Label, BaseFloat> old_final_costs;
+  GetRawLatticeFinalCosts(*raw_fst, &old_final_costs);
+
+  CompactLattice chunk_clat;
+  bool determinized_till_beam = DeterminizeLatticePhonePrunedWrapper(
+      trans_model_, raw_fst, config_.lattice_beam, &chunk_clat,
+      config_.det_opts);
+
+  TopSortCompactLatticeIfNeeded(&chunk_clat);
+
+  std::unordered_map<StateId, Label> chunk_state_to_token;
+  IdentifyTokenFinalStates(chunk_clat,
+                           &chunk_state_to_token);
+
+  StateId chunk_num_states = chunk_clat.NumStates();
+  if (chunk_num_states == 0) {
+    // This will be an error but user-level calling code can detect it from the
+    // lattice being empty.
+    KALDI_WARN << "Empty lattice, something went wrong.";
+    clat_.DeleteStates();
+    return false;
+  }
+
+  StateId start_state = chunk_clat.Start();  // would be 0.
+  KALDI_ASSERT(start_state == 0);
+
+  // Process arcs leaving the start state of chunk_clat. Unless this is the
+  // first chunk in the lattice, all arcs leaving the start state of chunk_clat
+  // will have `state labels` on them (identifying redeterminized-states in
+  // clat_), and will transition to a state in `chunk_clat` that we can identify
+  // with that redeterminized-state.
+
+  // state_map maps from (non-initial, non-token-final state s in chunk_clat) to
+  // a state in clat_.
+  std::unordered_map<StateId, StateId> state_map;
+
+
+  bool is_first_chunk = ProcessArcsFromChunkStartState(chunk_clat, &state_map);
+
+  // Remove any existing arcs in clat_ that leave redeterminized-states, and
+  // make those states non-final.  Below, we'll add arcs leaving those states
+  // (and possibly new final-probs.)
+  for (StateId clat_state: non_final_redet_states_) {
+    clat_.DeleteArcs(clat_state);
+    clat_.SetFinal(clat_state, CompactLatticeWeight::Zero());
+  }
+
+  // The previous final-arc info is no longer relevant; we'll recreate it below.
+  final_arcs_.clear();
+
+  // assume chunk_lat.Start() == 0; we asserted it above.  Allocate state-ids
+  // for all remaining states in chunk_clat, except for token-final states.
+  for (StateId state = (is_first_chunk ? 0 : 1);
+       state < chunk_num_states; state++) {
+    if (chunk_state_to_token.count(state) != 0)
+      continue;  // these `token-final` states don't get a state allocated.
+
+    StateId new_clat_state = clat_.NumStates();
+    if (state_map.insert({state, new_clat_state}).second) {
+      // If it was inserted then we need to actually allocate that state
+      StateId s = AddStateToClat();
+      KALDI_ASSERT(s == new_clat_state);
+    }   // else do nothing; it would have been a redeterminized-state and no
+  }     // allocation is needed since they already exist in clat_. and
+        // in state_map.
+
+  if (is_first_chunk) {
+    auto iter = state_map.find(start_state);
+    KALDI_ASSERT(iter != state_map.end());
+    CompactLattice::StateId clat_start_state = iter->second;
+    KALDI_ASSERT(clat_start_state == 0);  // topological order.
+    clat_.SetStart(clat_start_state);
+    forward_costs_[clat_start_state] = 0.0;
+  }
+
+  TransferArcsToClat(chunk_clat, is_first_chunk,
+                     state_map, chunk_state_to_token, old_final_costs);
+
+  GetNonFinalRedetStates();
+
+  return determinized_till_beam;
+}
+
+
+
+void LatticeIncrementalDeterminizer::SetFinalCosts(
+    const unordered_map<Label, BaseFloat> *token_label2final_cost) {
+  if (final_arcs_.empty()) {
+    KALDI_WARN << "SetFinalCosts() called when final_arcs_.empty()... possibly "
+        "means you are calling this after Finalize()?  Not allowed: could "
+        "indicate a code error.  Or possibly decoding failed somehow.";
+  }
+
+  /*
+    prefinal states a terminology that does not appear in the paper.  What it
+    means is: the set of states that have an arc with a Token-label as the label
+    leaving them in the canonical appended lattice.
+  */
+  std::unordered_set<int32> &prefinal_states(temp_);
+  prefinal_states.clear();
+  for (const auto &arc: final_arcs_) {
+    /* Caution: `state` is actually the state the arc would
+       leave from in the canonical appended lattice; we just store
+       that in the .nextstate field. */
+    CompactLattice::StateId state = arc.nextstate;
+    prefinal_states.insert(state);
+  }
+
+  for (int32 state: prefinal_states)
+    clat_.SetFinal(state, CompactLatticeWeight::Zero());
+
+
+  for (const CompactLatticeArc &arc: final_arcs_) {
+    Label token_label = arc.ilabel;
+    /* Note: we store the source state in the .nextstate field. */
+    CompactLattice::StateId src_state = arc.nextstate;
+    BaseFloat graph_final_cost;
+    if (token_label2final_cost == NULL) {
+      graph_final_cost = 0.0;
+    } else {
+      auto iter = token_label2final_cost->find(token_label);
+      if (iter == token_label2final_cost->end())
+        continue;
+      else
+        graph_final_cost = iter->second;
+    }
+    /* It might seem odd to set a final-prob on the src-state of the arc..
+       the point is that the symbol on the arc is a token-label, which should not
+       appear in the lattice the user sees, so after that token-label is removed
+       the arc would just become a final-prob.
+    */
+    clat_.SetFinal(src_state,
+                   fst::Plus(clat_.Final(src_state),
+                             fst::Times(arc.weight,
+                                        CompactLatticeWeight(
+                                            LatticeWeight(graph_final_cost, 0), {}))));
+  }
+}
+
+
+
+
+// Instantiate the template for the combination of token types and FST types
+// that we'll need.
+template class LatticeIncrementalDecoderTpl<fst::Fst<fst::StdArc>, decoder::StdToken>;
+template class LatticeIncrementalDecoderTpl<fst::VectorFst<fst::StdArc>,
+                                            decoder::StdToken>;
+template class LatticeIncrementalDecoderTpl<fst::ConstFst<fst::StdArc>,
+                                            decoder::StdToken>;
+template class LatticeIncrementalDecoderTpl<fst::GrammarFst, decoder::StdToken>;
+
+template class LatticeIncrementalDecoderTpl<fst::Fst<fst::StdArc>,
+                                            decoder::BackpointerToken>;
+template class LatticeIncrementalDecoderTpl<fst::VectorFst<fst::StdArc>,
+                                            decoder::BackpointerToken>;
+template class LatticeIncrementalDecoderTpl<fst::ConstFst<fst::StdArc>,
+                                            decoder::BackpointerToken>;
+template class LatticeIncrementalDecoderTpl<fst::GrammarFst,
+                                            decoder::BackpointerToken>;
+
+} // end namespace kaldi.
diff --git a/src/decoder/lattice-incremental-decoder.h b/src/decoder/lattice-incremental-decoder.h
new file mode 100644
index 00000000000..76d79b2fe2e
--- /dev/null
+++ b/src/decoder/lattice-incremental-decoder.h
@@ -0,0 +1,729 @@
+// decoder/lattice-incremental-decoder.h
+
+// Copyright      2019  Zhehuai Chen, Daniel Povey
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_DECODER_LATTICE_INCREMENTAL_DECODER_H_
+#define KALDI_DECODER_LATTICE_INCREMENTAL_DECODER_H_
+
+#include "util/stl-utils.h"
+#include "util/hash-list.h"
+#include "fst/fstlib.h"
+#include "itf/decodable-itf.h"
+#include "fstext/fstext-lib.h"
+#include "lat/determinize-lattice-pruned.h"
+#include "lat/kaldi-lattice.h"
+#include "decoder/grammar-fst.h"
+#include "lattice-faster-decoder.h"
+
+namespace kaldi {
+/**
+   The normal decoder, lattice-faster-decoder.h, sometimes has an issue when
+   doing real-time applications with long utterances, that each time you get the
+   lattice the lattice determinization can take a considerable amount of time;
+   this introduces latency.  This version of the decoder spreads the work of
+   lattice determinization out throughout the decoding process.
+
+   NOTE:
+
+   Please see https://www.danielpovey.com/files/ *TBD* .pdf for a technical
+   explanation of what is going on here.
+
+   GLOSSARY OF TERMS:
+      chunk: We do the determinization on chunks of frames; these
+          may coincide with the chunks on which the user calls
+          AdvanceDecoding().  The basic idea is to extract chunks
+          of the raw lattice and determinize them individually, but
+          it gets much more complicated than that.  The chunks
+          should normally be at least as long as a word (let's say,
+          at least 20 frames), or the overhead of this algorithm
+          might become excessive and affect RTF.
+
+      raw lattice chunk: A chunk of raw (i.e. undeterminized) lattice
+          that we will determinize.  In the paper this corresponds
+          to the FST B that is described in Section 5.2.
+
+      token_label, state_label / token-label, state-label:
+
+          In the paper these are both referred to as `state labels` (these are
+          special, large integer id's that refer to states in the undeterminized
+          lattice and in the the determinized lattice); but we use two separate
+          terms here, for more clarity, when referring to the undeterminized
+          vs. determinized lattice.
+
+           token_label conceptually refers to states in the
+           raw lattice, but we don't materialize the entire
+           raw lattice as a physical FST and and these tokens
+           are actually tokens (template type Token) held by
+           the decoder
+
+           state_label when used in this code refers specifically
+           to labels that identify states in the determinized
+           lattice (i.e. state indexes in lat_).
+
+       token-final state
+          A state in a raw lattice or in a determinized chunk that has an arc
+          entering it that has a `token-label` on it (as defined above).
+          These states will have nonzero final-probs.
+
+       redeterminized-non-splice-state, aka ns_redet:
+         A redeterminized state which is not also a splice state;
+         refer to the paper for explanation.  In the already-determinized
+         part this means a redeterminized state which is not final.
+
+       canonical appended lattice:  This is the appended compact lattice
+         that we conceptually have (i.e. what we described in the paper).
+         The difference from the "actual appended lattice" stored
+         in LatticeIncrementalDeterminizer::clat_ is that the
+         actual appended lattice has all its final-arcs replaced with
+         final-probs, and we keep the real final-arcs "on the side" in a
+         separate data structure.  The final-probs in clat_ aren't
+         necessarily related to the costs on the final-arcs; instead
+         they can have arbitrary values passed in by the user (e.g.
+         if we want to include final-probs).  This means that the
+         clat_ can be returned without modification to the user who wants
+         a partially determinized result.
+
+       final-arc:  An arc in the canonical appended CompactLattice which
+         goes to a final-state.  These arcs will have `state-labels` as
+         their labels.
+
+ */
+struct LatticeIncrementalDecoderConfig {
+  // All the configuration values until det_opts are the same as in
+  // LatticeFasterDecoder.  For clarity we repeat them rather than inheriting.
+  BaseFloat beam;
+  int32 max_active;
+  int32 min_active;
+  BaseFloat lattice_beam;
+  int32 prune_interval;
+  BaseFloat beam_delta; // has nothing to do with beam_ratio
+  BaseFloat hash_ratio;
+  BaseFloat prune_scale; // Note: we don't make this configurable on the command line,
+                         // it's not a very important parameter.  It affects the
+                         // algorithm that prunes the tokens as we go.
+  // Most of the options inside det_opts are not actually queried by the
+  // LatticeIncrementalDecoder class itself, but by the code that calls it, for
+  // example in the function DecodeUtteranceLatticeIncremental.
+  fst::DeterminizeLatticePhonePrunedOptions det_opts;
+
+  // The configuration values from this point on are specific to the
+  // incremental determinization.  See where they are registered for
+  // explanation.
+  // Caution: these are only inspected in UpdateLatticeDeterminization().
+  // If you call
+  int32 determinize_max_delay;
+  int32 determinize_min_chunk_size;
+
+
+  LatticeIncrementalDecoderConfig()
+      : beam(16.0),
+        max_active(std::numeric_limits<int32>::max()),
+        min_active(200),
+        lattice_beam(10.0),
+        prune_interval(25),
+        beam_delta(0.5),
+        hash_ratio(2.0),
+        prune_scale(0.01),
+        determinize_max_delay(60),
+        determinize_min_chunk_size(20) {
+    det_opts.minimize = false;
+  }
+  void Register(OptionsItf *opts) {
+    det_opts.Register(opts);
+    opts->Register("beam", &beam, "Decoding beam.  Larger->slower, more accurate.");
+    opts->Register("max-active", &max_active,
+                   "Decoder max active states.  Larger->slower; "
+                   "more accurate");
+    opts->Register("min-active", &min_active, "Decoder minimum #active states.");
+    opts->Register("lattice-beam", &lattice_beam,
+                   "Lattice generation beam.  Larger->slower, "
+                   "and deeper lattices");
+    opts->Register("prune-interval", &prune_interval,
+                   "Interval (in frames) at "
+                   "which to prune tokens");
+    opts->Register("beam-delta", &beam_delta,
+                   "Increment used in decoding-- this "
+                   "parameter is obscure and relates to a speedup in the way the "
+                   "max-active constraint is applied.  Larger is more accurate.");
+    opts->Register("hash-ratio", &hash_ratio,
+                   "Setting used in decoder to "
+                   "control hash behavior");
+    opts->Register("determinize-max-delay", &determinize_max_delay,
+                   "Maximum frames of delay between decoding a frame and "
+                   "determinizing it");
+    opts->Register("determinize-min-chunk-size", &determinize_min_chunk_size,
+                   "Minimum chunk size used in determinization");
+
+  }
+  void Check() const {
+    if (!(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
+          min_active <= max_active && prune_interval > 0 &&
+          beam_delta > 0.0 && hash_ratio >= 1.0 &&
+          prune_scale > 0.0 && prune_scale < 1.0 &&
+          determinize_max_delay > determinize_min_chunk_size &&
+          determinize_min_chunk_size > 0))
+        KALDI_ERR << "Invalid options given to decoder";
+    /* Minimization of the chunks is not compatible withour algorithm (or at
+       least, would require additional complexity to implement.) */
+    if (det_opts.minimize || !det_opts.word_determinize)
+      KALDI_ERR << "Invalid determinization options given to decoder.";
+  }
+};
+
+
+
+/**
+   This class is used inside LatticeIncrementalDecoderTpl; it handles
+   some of the details of incremental determinization.
+   https://www.danielpovey.com/files/ *TBD*.pdf for the paper.
+
+*/
+class LatticeIncrementalDeterminizer {
+ public:
+  using Label = typename LatticeArc::Label;  /* Actualy the same labels appear
+                                                in both lattice and compact
+                                                lattice, so we don't use the
+                                                specific type all the time but
+                                                just say 'Label' */
+  LatticeIncrementalDeterminizer(
+      const TransitionModel &trans_model,
+      const LatticeIncrementalDecoderConfig &config):
+      trans_model_(trans_model), config_(config) { }
+
+  // Resets the lattice determinization data for new utterance
+  void Init();
+
+  // Returns the current determinized lattice.
+  const CompactLattice &GetDeterminizedLattice() const { return clat_; }
+
+  /**
+     Starts the process of creating a raw lattice chunk.  (Search the glossary
+     for "raw lattice chunk").  This just sets up the initial states and
+     redeterminized-states in the chunk.  Relates to sec. 5.2 in the paper,
+     specifically the initial-state i and the redeterminized-states.
+
+     After calling this, the caller would add the remaining arcs and states
+     to `olat` and then call AcceptRawLatticeChunk() with the result.
+
+        @param [out] olat    The lattice to be (partially) created
+
+        @param [out] token_label2state  This function outputs to here
+                a map from `token-label` to the state we created for
+                it in *olat.  See glossary for `token-label`.
+                The keys actually correspond to the .nextstate fields
+                in the arcs in final_arcs_; values are states in `olat`.
+                See the last bullet point before Sec. 5.3 in the paper.
+  */
+  void InitializeRawLatticeChunk(
+      Lattice *olat,
+      unordered_map<Label, LatticeArc::StateId> *token_label2state);
+
+  /**
+     This function accepts the raw FST (state-level lattice) corresponding to a
+     single chunk of the lattice, determinizes it and appends it to this->clat_.
+     Unless this was the
+
+     Note: final-probs in `raw_fst` are treated specially: they are used to
+     guide the pruned determinization, but when you call GetLattice() it will be
+     -- except for pruning effects-- as if all nonzero final-probs in `raw_fst`
+     were: One() if final_costs == NULL; else the value present in `final_costs`.
+
+       @param [in] raw_fst  (Consumed destructively).  The input
+                  raw (state-level) lattice.  Would correspond to the
+                  FST A in the paper if first_frame == 0, and B
+                  otherwise.
+
+     @return returns false if determinization finished earlier than the beam
+         or the determinized lattice was empty; true otherwise.
+
+     NOTE: if this is not the final chunk, you will probably want to call
+     SetFinalCosts() directly after calling this.
+  */
+  bool AcceptRawLatticeChunk(Lattice *raw_fst);
+
+  /*
+    Sets final-probs in `clat_`.  Must only be called if the final chunk
+    has not been processed.  (The final chunk is whenever GetLattice() is
+    called with finalize == true).
+
+    The reason this is a separate function from AcceptRawLatticeChunk() is that
+    there may be situations where a user wants to get the latice with
+    final-probs in it, after previously getting it without final-probs; or
+    vice versa.  By final-probs, we mean the Final() probabilities in the
+    HCLG (decoding graph; this->fst_).
+
+       @param [in] token_label2final_cost   A map from the token-label
+              corresponding to Tokens active on the final frame of the
+              lattice in the object, to the final-cost we want to use for
+              those tokens.  If NULL, it means all Tokens should be treated
+              as final with probability One().  If non-NULL, and a particular
+              token-label is not a key of this map, it means that Token
+              corresponded to a state that was not final in HCLG; and
+              such tokens will be treated as non-final.  However,
+              if this would result in no states in the lattice being final,
+              we will treat all Tokens as final with probability One(),
+              a warning will be printed (this should not happen.)
+  */
+  void SetFinalCosts(const unordered_map<Label, BaseFloat> *token_label2final_cost = NULL);
+
+  const CompactLattice &GetLattice() { return clat_; }
+
+  // kStateLabelOffset is what we add to state-ids in clat_ to produce labels
+  // to identify them in the raw lattice chunk
+  // kTokenLabelOffset is where we start allocating labels corresponding to Tokens
+  // (these correspond with raw lattice states);
+  enum  { kStateLabelOffset = (int)1e8,  kTokenLabelOffset = (int)2e8, kMaxTokenLabel = (int)3e8 };
+
+ private:
+
+  // [called from AcceptRawLatticeChunk()]
+  // Gets the final costs from token-final states in the raw lattice (see
+  // glossary for definition).  These final costs will be subtracted after
+  // determinization; in the normal case they are `temporaries` used to guide
+  // pruning.  NOTE: the index of the array is not the FST state that is final,
+  // but the label on arcs entering it (these will be `token-labels`).  Each
+  // token-final state will have the same label on all arcs entering it.
+  //
+  // `old_final_costs` is assumed to be empty at entry.
+  void GetRawLatticeFinalCosts(const Lattice &raw_fst,
+                               std::unordered_map<Label, BaseFloat> *old_final_costs);
+
+  // Sets up non_final_redet_states_.  See documentation for that variable.
+  void GetNonFinalRedetStates();
+
+  /** [called from AcceptRawLatticeChunk()] Processes arcs that leave the
+      start-state of `chunk_clat` (if this is not the first chunk); does nothing
+      if this is the first chunk.  This includes using the `state-labels` to
+      work out which states in clat_ these states correspond to, and writing
+      that mapping to `state_map`.
+
+      Also modifies forward_costs_, because it has to do a kind of reweighting
+      of the clat states that are the values it puts in `state_map`, to take
+      account of the probabilities on the arcs from the start state of
+      chunk_clat to the states corresponding to those redeterminized-states
+      (i.e. the states in clat corresponding to the values it puts in
+      `*state_map`).  It also modifies arcs_in_, mostly because there
+      are rare cases when we end up `merging` sets of those redeterminized-states,
+      because the determinization process mapped them to a single state,
+      and that means we need to reroute the arcs into members of that
+      set into one single member (which will appear as a value in
+      `*state_map`).
+
+        @param [in] chunk_clat   The determinized chunk of lattice we are
+                          processing
+        @param [out] state_map    Mapping from states in chunk_clat to
+                          the state in clat_ they correspond to.
+        @return     Returns true if this is the first chunk.
+  */
+  bool ProcessArcsFromChunkStartState(
+      const CompactLattice &chunk_clat,
+      std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> *state_map);
+
+  /**
+     This function, called from AcceptRawLatticeChunk(), transfers arcs from
+     `chunk_clat` to clat_.  For those arcs that have `token-labels` on them,
+     they don't get written to clat_ but instead are stored in the arcs_ array.
+
+        @param [in] chunk_clat    The determinized lattice for the chunk
+                         we are processing; this is the source of the arcs
+                         we are moving.
+        @param [in] is_first_chunk  True if this is the first chunk in the
+                         utterance; it's needed because if it is, we
+                         will also transfer arcs from the start state of
+                         chunk_clat.
+        @param [in] state_map  Map from state-ids in chunk_clat to state-ids
+                         in clat_.
+        @param [in] chunk_state_to_token  Map from `token-final states`
+                         (see glossary) in chunk_clat, to the token-label
+                         on arcs entering those states.
+        @param [in] old_final_costs  Map from token-label to the
+                         final-costs that were on the corresponding
+                         token-final states in the undeterminized lattice;
+                         these final-costs need to be removed when
+                         we record the weights in final_arcs_, because
+                         they were just temporary.
+   */
+  void TransferArcsToClat(
+      const CompactLattice &chunk_clat,
+      bool is_first_chunk,
+      const std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> &state_map,
+      const std::unordered_map<CompactLattice::StateId, Label> &chunk_state_to_token,
+      const std::unordered_map<Label, BaseFloat> &old_final_costs);
+
+
+
+  /**
+     Adds one arc to `clat_`.  It's like clat_.AddArc(state, arc), except
+     it also modifies arcs_in_ and forward_costs_.
+   */
+  void AddArcToClat(CompactLattice::StateId state,
+                    const CompactLatticeArc &arc);
+  CompactLattice::StateId AddStateToClat();
+
+
+  // Identifies token-final states in `chunk_clat`; see glossary above for
+  // definition of `token-final`.  This function outputs a map from such states
+  // in chunk_clat, to the `token-label` on arcs entering them.  (It is not
+  // possible that the same state would have multiple arcs entering it with
+  // different token-labels, or some arcs entering with one token-label and some
+  // another, or be both initial and have such arcs; this is true due to how we
+  // construct the raw lattice.)
+  void IdentifyTokenFinalStates(
+      const CompactLattice &chunk_clat,
+      std::unordered_map<CompactLattice::StateId, CompactLatticeArc::Label> *token_map) const;
+
+  // trans_model_ is needed by DeterminizeLatticePhonePrunedWrapper() which this
+  // class calls.
+  const TransitionModel &trans_model_;
+  // config_ is needed by DeterminizeLatticePhonePrunedWrapper() which this
+  // class calls.
+  const LatticeIncrementalDecoderConfig &config_;
+
+
+  // Contains the set of redeterminized-states which are not final in the
+  // canonical appended lattice.  Since the final ones don't physically appear
+  // in clat_, this means the set of redeterminized-states which are physically
+  // in clat_.  In code terms, this means set of .first elements in final_arcs,
+  // plus whatever other states in clat_ are reachable from such states.
+  std::unordered_set<CompactLattice::StateId> non_final_redet_states_;
+
+
+  // clat_ is the appended lattice (containing all chunks processed so
+  // far), except its `final-arcs` (i.e. arcs which in the canonical
+  // lattice would go to final-states) are not present (they are stored
+  // separately in final_arcs_) and states which in the canonical lattice
+  // should have final-arcs leaving them will instead have a final-prob.
+  CompactLattice clat_;
+
+
+  // arcs_in_ is indexed by (state-id in clat_), and is a list of
+  // arcs that come into this state, in the form (prev-state,
+  // arc-index).  CAUTION: not all these input-arc records will always
+  // be valid (some may be out-of-date, and may refer to an out-of-range
+  // arc or an arc that does not point to this state).  But all
+  // input arcs will always be listed.
+  std::vector<std::vector<std::pair<CompactLattice::StateId, int32> > > arcs_in_;
+
+  // final_arcs_ contains arcs which would appear in the canonical appended
+  // lattice but for implementation reasons are not physically present in clat_.
+  // These are arcs to final states in the canonical appended lattice.  The
+  // .first elements are the source states in clat_ (these will all be elements
+  // of non_final_redet_states_); the .nextstate elements of the arcs does not
+  // contain a physical state, but contain state-labels allocated by
+  // AllocateNewStateLabel().
+  std::vector<CompactLatticeArc> final_arcs_;
+
+  // forward_costs_, indexed by the state-id in clat_, stores the alpha
+  // (forward) costs, i.e. the minimum cost from the start state to each state
+  // in clat_.  This is relevant for pruned determinization.  The BaseFloat can
+  // be thought of as the sum of a Value1() + Value2() in a LatticeWeight.
+  std::vector<BaseFloat> forward_costs_;
+
+  // temporary used in a function, kept here to avoid excessive reallocation.
+  std::unordered_set<int32> temp_;
+
+  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeIncrementalDeterminizer);
+};
+
+
+/** This is an extention to the "normal" lattice-generating decoder.
+   See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
+    for more information.
+
+   The main difference is the incremental determinization which will be
+   discussed in the function GetLattice().  This means that the work of determinizatin
+   isn't done all at once at the end of the file, but incrementally while decoding.
+   See the comment at the top of this file for more explanation.
+
+   The decoder is templated on the FST type and the token type.  The token type
+   will normally be StdToken, but also may be BackpointerToken which is to support
+   quick lookup of the current best path (see lattice-faster-online-decoder.h)
+
+   The FST you invoke this decoder with is expected to be of type
+   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
+   FST == StdFst and it notices that the actual FST type is
+   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
+   will internally cast itself to one that is templated on those more specific
+   types; this is an optimization for speed.
+ */
+template <typename FST, typename Token = decoder::StdToken>
+class LatticeIncrementalDecoderTpl {
+ public:
+  using Arc = typename FST::Arc;
+  using Label = typename Arc::Label;
+  using StateId = typename Arc::StateId;
+  using Weight = typename Arc::Weight;
+  using ForwardLinkT = decoder::ForwardLink<Token>;
+
+  // Instantiate this class once for each thing you have to decode.
+  // This version of the constructor does not take ownership of
+  // 'fst'.
+  LatticeIncrementalDecoderTpl(const FST &fst, const TransitionModel &trans_model,
+                               const LatticeIncrementalDecoderConfig &config);
+
+  // This version of the constructor takes ownership of the fst, and will delete
+  // it when this object is destroyed.
+  LatticeIncrementalDecoderTpl(const LatticeIncrementalDecoderConfig &config,
+                               FST *fst, const TransitionModel &trans_model);
+
+  void SetOptions(const LatticeIncrementalDecoderConfig &config) { config_ = config; }
+
+  const LatticeIncrementalDecoderConfig &GetOptions() const { return config_; }
+
+  ~LatticeIncrementalDecoderTpl();
+
+  /**
+     CAUTION: it's unlikely that you will ever want to call this function.  In a
+     scenario where you have the entire file and just want to decode it, there
+     is no point using this decoder.
+
+     An example of how to do decoding together with incremental
+     determinization. It decodes until there are no more frames left in the
+     "decodable" object.
+
+     In this example, config_.determinize_delay, config_.determinize_period
+     and config_.determinize_max_active are used to determine the time to
+     call GetLattice().
+
+     Users will probably want to use appropriate combinations of
+     AdvanceDecoding() and GetLattice() to build their application; this just
+     gives you some idea how.
+
+     The function returns true if any kind of traceback is available (not
+     necessarily from a final state).
+  */
+  bool Decode(DecodableInterface *decodable);
+
+  /// says whether a final-state was active on the last frame.  If it was not,
+  /// the lattice (or traceback) will end with states that are not final-states.
+  bool ReachedFinal() const {
+    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
+  }
+
+  /**
+     This decoder has no GetBestPath() function.
+     If you need that functionality you should probably use lattice-incremental-online-decoder.h,
+     which makes it very efficient to obtain the best path. */
+
+  /**
+     This GetLattice() function returns the lattice containing
+     `num_frames_to_decode` frames; this will be all frames decoded so
+     far, if you let num_frames_to_decode == NumFramesDecoded(),
+     but it will generally be better to make it a few frames less than
+     that to avoid the lattice having too many active states at
+     the end.
+
+     @param [in] num_frames_to_include  The number of frames that you want
+                     to be included in the lattice.  Must be >=
+                     NumFramesInLattice() and <= NumFramesDecoded().
+
+     @param [in] use_final_probs  True if you want the final-probs
+                    of HCLG to be included in the output lattice.  Must not
+                    be set to true if num_frames_to_include !=
+                    NumFramesDecoded().  Must be set to true if you have
+                    previously called FinalizeDecoding().
+
+                    (If no state was final on frame `num_frames_to_include`, the
+                    final-probs won't be included regardless of
+                    `use_final_probs`; you can test whether this
+                    was the case by calling ReachedFinal().
+
+      @return clat   The CompactLattice representing what has been decoded
+                     up until `num_frames_to_include` (e.g., LatticeStateTimes()
+                     on this lattice would return `num_frames_to_include`).
+
+     See also UpdateLatticeDeterminizaton().  Caution: this const ref
+     is only valid until the next time you call AdvanceDecoding() or
+     GetLattice().
+
+     CAUTION: the lattice may contain disconnnected states; you should
+     call Connect() on the output before writing it out.
+  */
+  const CompactLattice &GetLattice(int32 num_frames_to_include,
+                                   bool use_final_probs = false);
+
+  /*
+    Returns the number of frames in the currently-determinized part of the
+    lattice which will be a number in [0, NumFramesDecoded()].  It will
+    be the largest number that GetLattice() was called with, but note
+    that GetLattice() may be called from UpdateLatticeDeterminization().
+
+    Made available in case the user wants to give that same number to
+    GetLattice().
+   */
+  int NumFramesInLattice() const { return num_frames_in_lattice_; }
+
+  /**
+     InitDecoding initializes the decoding, and should only be used if you
+     intend to call AdvanceDecoding().  If you call Decode(), you don't need to
+     call this.  You can also call InitDecoding if you have already decoded an
+     utterance and want to start with a new utterance.
+  */
+  void InitDecoding();
+
+  /**
+     This will decode until there are no more frames ready in the decodable
+     object.  You can keep calling it each time more frames become available
+     (this is the normal pattern in a real-time/online decoding scenario).
+     If max_num_frames is specified, it specifies the maximum number of frames
+     the function will decode before returning.
+  */
+  void AdvanceDecoding(DecodableInterface *decodable, int32 max_num_frames = -1);
+
+
+  /** FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
+      more information.  It returns the difference between the best (final-cost
+      plus cost) of any token on the final frame, and the best cost of any token
+      on the final frame.  If it is infinity it means no final-states were
+      present on the final frame.  It will usually be nonnegative.  If it not
+      too positive (e.g. < 5 is my first guess, but this is not tested) you can
+      take it as a good indication that we reached the final-state with
+      reasonable likelihood. */
+  BaseFloat FinalRelativeCost() const;
+
+  /** Returns the number of frames decoded so far. */
+  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
+
+  /**
+     Finalizes the decoding, doing an extra pruning step on the last frame
+     that uses the final-probs.  May be called only once.
+  */
+  void FinalizeDecoding();
+
+ protected:
+  /* Some protected things are needed in LatticeIncrementalOnlineDecoderTpl. */
+
+  /** NOTE: for parts the internal implementation that are shared with LatticeFasterDecoer,
+      we have removed the comments.*/
+  inline static void DeleteForwardLinks(Token *tok);
+  struct TokenList {
+    Token *toks;
+    bool must_prune_forward_links;
+    bool must_prune_tokens;
+    int32 num_toks;  /* Note: you can only trust `num_toks` if must_prune_tokens
+                      * == false, because it is only set in
+                      * PruneTokensForFrame(). */
+    TokenList()
+        : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true),
+          num_toks(-1) {}
+  };
+  using Elem = typename HashList<StateId, Token *>::Elem;
+  void PossiblyResizeHash(size_t num_toks);
+  inline Token *FindOrAddToken(StateId state, int32 frame_plus_one,
+                               BaseFloat tot_cost, Token *backpointer, bool *changed);
+  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
+                         bool *links_pruned, BaseFloat delta);
+  void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
+                         BaseFloat *final_relative_cost,
+                         BaseFloat *final_best_cost) const;
+  void PruneForwardLinksFinal();
+  void PruneTokensForFrame(int32 frame_plus_one);
+  void PruneActiveTokens(BaseFloat delta);
+  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
+                      Elem **best_elem);
+  BaseFloat ProcessEmitting(DecodableInterface *decodable);
+  void ProcessNonemitting(BaseFloat cost_cutoff);
+
+  HashList<StateId, Token *> toks_;
+  std::vector<TokenList> active_toks_;  // indexed by frame.
+  std::vector<StateId> queue_;       // temp variable used in ProcessNonemitting,
+  std::vector<BaseFloat> tmp_array_; // used in GetCutoff.
+  const FST *fst_;
+  bool delete_fst_;
+  std::vector<BaseFloat> cost_offsets_;
+  int32 num_toks_;
+  bool warned_;
+  bool decoding_finalized_;
+
+  unordered_map<Token *, BaseFloat> final_costs_;
+  BaseFloat final_relative_cost_;
+  BaseFloat final_best_cost_;
+
+  /***********************
+      Variables below this point relate to the incremental
+      determinization.
+  *********************/
+  LatticeIncrementalDecoderConfig config_;
+  /** Much of the the incremental determinization algorithm is encapsulated in
+      the determinize_ object.  */
+  LatticeIncrementalDeterminizer determinizer_;
+
+
+  /* Just a temporary used in a function; stored here to avoid reallocation. */
+  unordered_map<Token*, StateId> temp_token_map_;
+
+  /** num_frames_in_lattice_ is the highest `num_frames_to_include_` argument
+      for any prior call to GetLattice(). */
+  int32 num_frames_in_lattice_;
+
+  // A map from Token to its token_label.  Will contain an entry for
+  // each Token in active_toks_[num_frames_in_lattice_].
+  unordered_map<Token*, Label> token2label_map_;
+
+  // A temporary used in a function, kept here to avoid reallocation.
+  unordered_map<Token*, Label> token2label_map_temp_;
+
+  // we allocate a unique id for each Token
+  Label next_token_label_;
+
+  inline Label AllocateNewTokenLabel() { return next_token_label_++; }
+
+
+  // There are various cleanup tasks... the the toks_ structure contains
+  // singly linked lists of Token pointers, where Elem is the list type.
+  // It also indexes them in a hash, indexed by state (this hash is only
+  // maintained for the most recent frame).  toks_.Clear()
+  // deletes them from the hash and returns the list of Elems.  The
+  // function DeleteElems calls toks_.Delete(elem) for each elem in
+  // the list, which returns ownership of the Elem to the toks_ structure
+  // for reuse, but does not delete the Token pointer.  The Token pointers
+  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
+  // but are also linked together on each frame by their own linked-list,
+  // using the "next" pointer.  We delete them manually.
+  void DeleteElems(Elem *list);
+
+  void ClearActiveTokens();
+
+
+  // Returns the number of active tokens on frame `frame`.  Can be used as part
+  // of a heuristic to decide which frame to determinize until, if you are not
+  // at the end of an utterance.
+  int32 GetNumToksForFrame(int32 frame);
+
+  /**
+     UpdateLatticeDeterminization() ensures the work of determinization is kept
+     up to date so that when you do need the lattice you can get it fast.  It
+     uses the configuration values `determinize_delay`, `determinize_max_delay`
+     and `determinize_min_chunk_size` to decide whether and when to call
+     GetLattice().  You can safely call this as often as you want (e.g.  after
+     each time you call AdvanceDecoding(); it won't do subtantially more work if
+     it is called frequently.
+  */
+  void UpdateLatticeDeterminization();
+
+
+  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeIncrementalDecoderTpl);
+};
+
+typedef LatticeIncrementalDecoderTpl<fst::StdFst, decoder::StdToken>
+    LatticeIncrementalDecoder;
+
+
+} // end namespace kaldi.
+
+#endif
diff --git a/src/decoder/lattice-incremental-online-decoder.cc b/src/decoder/lattice-incremental-online-decoder.cc
new file mode 100644
index 00000000000..ebc508746e0
--- /dev/null
+++ b/src/decoder/lattice-incremental-online-decoder.cc
@@ -0,0 +1,150 @@
+// decoder/lattice-incremental-online-decoder.cc
+
+// Copyright      2019  Zhehuai Chen
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+// see note at the top of lattice-faster-decoder.cc, about how to maintain this
+// file in sync with lattice-faster-decoder.cc
+
+#include "decoder/lattice-incremental-decoder.h"
+#include "decoder/lattice-incremental-online-decoder.h"
+#include "lat/lattice-functions.h"
+#include "base/timer.h"
+
+namespace kaldi {
+
+// Outputs an FST corresponding to the single best path through the lattice.
+template <typename FST>
+bool LatticeIncrementalOnlineDecoderTpl<FST>::GetBestPath(Lattice *olat,
+                                                     bool use_final_probs) const {
+  olat->DeleteStates();
+  BaseFloat final_graph_cost;
+  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
+  if (iter.Done())
+    return false;  // would have printed warning.
+  StateId state = olat->AddState();
+  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
+  while (!iter.Done()) {
+    LatticeArc arc;
+    iter = TraceBackBestPath(iter, &arc);
+    arc.nextstate = state;
+    StateId new_state = olat->AddState();
+    olat->AddArc(new_state, arc);
+    state = new_state;
+  }
+  olat->SetStart(state);
+  return true;
+}
+
+template <typename FST>
+typename LatticeIncrementalOnlineDecoderTpl<FST>::BestPathIterator LatticeIncrementalOnlineDecoderTpl<FST>::BestPathEnd(
+    bool use_final_probs,
+    BaseFloat *final_cost_out) const {
+  if (this->decoding_finalized_ && !use_final_probs)
+    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
+              << "BestPathEnd() with use_final_probs == false";
+  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
+               "You cannot call BestPathEnd if no frames were decoded.");
+
+  unordered_map<Token*, BaseFloat> final_costs_local;
+
+  const unordered_map<Token*, BaseFloat> &final_costs =
+      (this->decoding_finalized_ ? this->final_costs_ :final_costs_local);
+  if (!this->decoding_finalized_ && use_final_probs)
+    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);
+
+  // Singly linked list of tokens on last frame (access list through "next"
+  // pointer).
+  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
+  BaseFloat best_final_cost = 0;
+  Token *best_tok = NULL;
+  for (Token *tok = this->active_toks_.back().toks;
+       tok != NULL; tok = tok->next) {
+    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
+    if (use_final_probs && !final_costs.empty()) {
+      // if we are instructed to use final-probs, and any final tokens were
+      // active on final frame, include the final-prob in the cost of the token.
+      typename unordered_map<Token*, BaseFloat>::const_iterator
+          iter = final_costs.find(tok);
+      if (iter != final_costs.end()) {
+        final_cost = iter->second;
+        cost += final_cost;
+      } else {
+        cost = std::numeric_limits<BaseFloat>::infinity();
+      }
+    }
+    if (cost < best_cost) {
+      best_cost = cost;
+      best_tok = tok;
+      best_final_cost = final_cost;
+    }
+  }
+  if (best_tok == NULL) {  // this should not happen, and is likely a code error or
+    // caused by infinities in likelihoods, but I'm not making
+    // it a fatal error for now.
+    KALDI_WARN << "No final token found.";
+  }
+  if (final_cost_out != NULL)
+    *final_cost_out = best_final_cost;
+  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
+}
+
+
+template <typename FST>
+typename LatticeIncrementalOnlineDecoderTpl<FST>::BestPathIterator LatticeIncrementalOnlineDecoderTpl<FST>::TraceBackBestPath(
+    BestPathIterator iter, LatticeArc *oarc) const {
+  KALDI_ASSERT(!iter.Done() && oarc != NULL);
+  Token *tok = static_cast<Token*>(iter.tok);
+  int32 cur_t = iter.frame, ret_t = cur_t;
+  if (tok->backpointer != NULL) {
+    ForwardLinkT *link;
+    for (link = tok->backpointer->links;
+         link != NULL; link = link->next) {
+      if (link->next_tok == tok) { // this is the link to "tok"
+        oarc->ilabel = link->ilabel;
+        oarc->olabel = link->olabel;
+        BaseFloat graph_cost = link->graph_cost,
+            acoustic_cost = link->acoustic_cost;
+        if (link->ilabel != 0) {
+          KALDI_ASSERT(static_cast<size_t>(cur_t) < this->cost_offsets_.size());
+          acoustic_cost -= this->cost_offsets_[cur_t];
+          ret_t--;
+        }
+        oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
+        break;
+      }
+    }
+    if (link == NULL) { // Did not find correct link.
+      KALDI_ERR << "Error tracing best-path back (likely "
+                << "bug in token-pruning algorithm)";
+    }
+  } else {
+    oarc->ilabel = 0;
+    oarc->olabel = 0;
+    oarc->weight = LatticeWeight::One(); // zero costs.
+  }
+  return BestPathIterator(tok->backpointer, ret_t);
+}
+
+// Instantiate the template for the FST types that we'll need.
+template class LatticeIncrementalOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
+template class LatticeIncrementalOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
+template class LatticeIncrementalOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
+template class LatticeIncrementalOnlineDecoderTpl<fst::GrammarFst>;
+
+
+} // end namespace kaldi.
diff --git a/src/decoder/lattice-incremental-online-decoder.h b/src/decoder/lattice-incremental-online-decoder.h
new file mode 100644
index 00000000000..8bd41c851ab
--- /dev/null
+++ b/src/decoder/lattice-incremental-online-decoder.h
@@ -0,0 +1,132 @@
+// decoder/lattice-incremental-online-decoder.h
+
+// Copyright      2019  Zhehuai Chen
+//
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+// see note at the top of lattice-faster-decoder.h, about how to maintain this
+// file in sync with lattice-faster-decoder.h
+
+
+#ifndef KALDI_DECODER_LATTICE_INCREMENTAL_ONLINE_DECODER_H_
+#define KALDI_DECODER_LATTICE_INCREMENTAL_ONLINE_DECODER_H_
+
+#include "util/stl-utils.h"
+#include "util/hash-list.h"
+#include "fst/fstlib.h"
+#include "itf/decodable-itf.h"
+#include "fstext/fstext-lib.h"
+#include "lat/determinize-lattice-pruned.h"
+#include "lat/kaldi-lattice.h"
+#include "decoder/lattice-incremental-decoder.h"
+
+namespace kaldi {
+
+
+
+/** LatticeIncrementalOnlineDecoderTpl is as LatticeIncrementalDecoderTpl but also
+    supports an efficient way to get the best path (see the function
+    BestPathEnd()), which is useful in endpointing and in situations where you
+    might want to frequently access the best path.
+
+    This is only templated on the FST type, since the Token type is required to
+    be BackpointerToken.  Actually it only makes sense to instantiate
+    LatticeIncrementalDecoderTpl with Token == BackpointerToken if you do so indirectly via
+    this child class.
+ */
+template <typename FST>
+class LatticeIncrementalOnlineDecoderTpl:
+      public LatticeIncrementalDecoderTpl<FST, decoder::BackpointerToken> {
+ public:
+  using Arc = typename FST::Arc;
+  using Label = typename Arc::Label;
+  using StateId = typename Arc::StateId;
+  using Weight = typename Arc::Weight;
+  using Token = decoder::BackpointerToken;
+  using ForwardLinkT = decoder::ForwardLink<Token>;
+
+  // Instantiate this class once for each thing you have to decode.
+  // This version of the constructor does not take ownership of
+  // 'fst'.
+  LatticeIncrementalOnlineDecoderTpl(const FST &fst,
+    const TransitionModel &trans_model,
+                                const LatticeIncrementalDecoderConfig &config):
+      LatticeIncrementalDecoderTpl<FST, Token>(fst, trans_model, config) { }
+
+  // This version of the initializer takes ownership of 'fst', and will delete
+  // it when this object is destroyed.
+  LatticeIncrementalOnlineDecoderTpl(const LatticeIncrementalDecoderConfig &config,
+                                FST *fst,
+    const TransitionModel &trans_model):
+      LatticeIncrementalDecoderTpl<FST, Token>(config, fst, trans_model) { }
+
+
+  struct BestPathIterator {
+    void *tok;
+    int32 frame;
+    // note, "frame" is the frame-index of the frame you'll get the
+    // transition-id for next time, if you call TraceBackBestPath on this
+    // iterator (assuming it's not an epsilon transition).  Note that this
+    // is one less than you might reasonably expect, e.g. it's -1 for
+    // the nonemitting transitions before the first frame.
+    BestPathIterator(void *t, int32 f): tok(t), frame(f) { }
+    bool Done() { return tok == NULL; }
+  };
+
+
+  /// Outputs an FST corresponding to the single best path through the lattice.
+  /// This is quite efficient because it doesn't get the entire raw lattice and find
+  /// the best path through it; instead, it uses the BestPathEnd and BestPathIterator
+  /// so it basically traces it back through the lattice.
+  /// Returns true if result is nonempty (using the return status is deprecated,
+  /// it will become void).  If "use_final_probs" is true AND we reached the
+  /// final-state of the graph then it will include those as final-probs, else
+  /// it will treat all final-probs as one.
+  bool GetBestPath(Lattice *ofst,
+                   bool use_final_probs = true) const;
+
+
+
+  /// This function returns an iterator that can be used to trace back
+  /// the best path.  If use_final_probs == true and at least one final state
+  /// survived till the end, it will use the final-probs in working out the best
+  /// final Token, and will output the final cost to *final_cost (if non-NULL),
+  /// else it will use only the forward likelihood, and will put zero in
+  /// *final_cost (if non-NULL).
+  /// Requires that NumFramesDecoded() > 0.
+  BestPathIterator BestPathEnd(bool use_final_probs,
+                               BaseFloat *final_cost = NULL) const;
+
+
+  /// This function can be used in conjunction with BestPathEnd() to trace back
+  /// the best path one link at a time (e.g. this can be useful in endpoint
+  /// detection).  By "link" we mean a link in the graph; not all links cross
+  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
+  /// that as a frame.  The return value is the updated iterator.  It outputs
+  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc" pointer,
+  /// while leaving its "nextstate" variable unchanged.
+  BestPathIterator TraceBackBestPath(
+      BestPathIterator iter, LatticeArc *arc) const;
+
+  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeIncrementalOnlineDecoderTpl);
+};
+
+typedef LatticeIncrementalOnlineDecoderTpl<fst::StdFst> LatticeIncrementalOnlineDecoder;
+
+
+} // end namespace kaldi.
+
+#endif
diff --git a/src/decoder/lattice-simple-decoder.cc b/src/decoder/lattice-simple-decoder.cc
index f2b16782827..a924b6d1c2f 100644
--- a/src/decoder/lattice-simple-decoder.cc
+++ b/src/decoder/lattice-simple-decoder.cc
@@ -532,7 +532,7 @@ void LatticeSimpleDecoder::ProcessEmitting(DecodableInterface *decodable) {
             graph_cost = arc.weight.Value(),
             cur_cost = tok->tot_cost,
             tot_cost = cur_cost + ac_cost + graph_cost;
-        if (tot_cost > cutoff) continue;
+        if (tot_cost >= cutoff) continue;
         else if (tot_cost + config_.beam < cutoff)
           cutoff = tot_cost + config_.beam;
         // AddToken adds the next_tok to cur_toks_ (if not already present).
diff --git a/src/decoder/simple-decoder.cc b/src/decoder/simple-decoder.cc
index 0b42c0993d6..f6d799aa897 100644
--- a/src/decoder/simple-decoder.cc
+++ b/src/decoder/simple-decoder.cc
@@ -183,7 +183,7 @@ void SimpleDecoder::ProcessEmitting(DecodableInterface *decodable) {
         BaseFloat acoustic_cost = -decodable->LogLikelihood(frame, arc.ilabel);
         double total_cost = tok->cost_ + arc.weight.Value() + acoustic_cost;
 
-        if (total_cost > cutoff) continue;
+        if (total_cost >= cutoff) continue;
         if (total_cost + beam_  < cutoff)
           cutoff = total_cost + beam_;
         Token *new_tok = new Token(arc, acoustic_cost, tok);
diff --git a/src/doc/cudamatrix.dox b/src/doc/cudamatrix.dox
index 21087da4dd6..a1f02ce0c85 100644
--- a/src/doc/cudamatrix.dox
+++ b/src/doc/cudamatrix.dox
@@ -103,8 +103,8 @@ on the GPU- mainly neural net training.
  caused by the virtualization, and we're not sure whether that problem still exists.
  Anyway, the memory caching can cause a problem if for some reason you run using
  the default (non-exclusive) compute mode, because it can cause allocation
- failures.  You can disable it at the code level by calling
- <tt>CuDevice::Instantiate().DisableCaching()</tt>, if needed.
+ failures.  You can disable it at the code level by setting
+ <tt>CuAllocatorOptions::cache_memory</tt> to <tt>false</tt>, if needed.
 
 
 
diff --git a/src/doc/faq.dox b/src/doc/faq.dox
index 7ed9c3dce41..46a1d9c56ed 100644
--- a/src/doc/faq.dox
+++ b/src/doc/faq.dox
@@ -31,7 +31,193 @@ namespace kaldi {
   finding such answers: the mailing lists and github contain many more
   discussions, and a web search may be the easiest way to find answers.
   
-  Below are FAQ candidates (with some TODOs) from the mailing lists, we will update these candidates to make them more readable.
+  \section kaldi_name_and_logo About the name and logo of Kaldi
+  According to legend, Kaldi was the Ethiopian goatherder who discovered the coffee plant. The name was chosen by sponsors of this project because they drank a lot of coffee that time (in 2009 according to <a href="https://groups.google.com/forum/#!topic/kaldi-help/J2F3y6KOejY"> Ondrej Glembek </a>). 
+  Then the logo symbolizes those guys working on a speech project (the microphone in the logo) while drinking coffee (the coffee bean in the logo). There are some <a href=https://groups.google.com/forum/#!topic/kaldi-help/DmbFnaDi7Co>dissenting opinion</a> about the logo, they suggest we should use more awesome logo. Generally, we would like to change logo if someone comes up with a well-designed new logo.  
+
+  \section how_to_check_version How to check the Kaldi version?  
+  Refer to version.h.
+  
+  \section reading_materials_for_beginners Reading materials for beginners in speech recognition.  
+  We notice that there are more and more beginners in speech recognition starting using Kaldi as their first toolkit for speech recognition. For those guys, we recommend them first to read these basic materials to get started:
+  - HTK book (at least reading the Tutorial Overview part)
+  - The Application of Hidden Markov Models in Speech Recognition
+  - Speech Recognition with Weighted Finite-State Transducers for WSFT
+  - A Bit of Progress in Language Modeling (Extended Version)
+  
+  For those who may want a "Kaldi Book" with tutorial on theory and implementation like what HTK Book does, we would generally just say sorry. As Dan explains in this <a href=https://groups.google.com/forum/#!topic/kaldi-help/3LBSzmploC0> post</a>, the field of speech recognition is moving so fast that we need to implement too many things in Kaldi and have no time to write such a book.
+  
+  \section free_data_to_train_model Free dataset to get started
+  If you have not bought any LDC license, there are also some free dataset for you to get started, that is, Librispeech, Tedlium and AMI. 
+
+  \section about_timit About TIMIT
+  There are many people asked questions about TIMIT on mailing lists, as Dan says in this <a href=https://groups.google.com/forum/#!topic/kaldi-help/YUbX_XUkFCw> post</a>, generally we'll suggest you do not use TIMIT. 
+  
+  \section windows_asr_toolkit Windows ASR toolkit based on Kaldi
+  <a href=https://ai-toolkit.blogspot.com/p/voicebridge.html> VoiceBridge</a> is a ASR toolkit which is designed for windows developers and based on Kaldi. Currently it only supports GMM-based ASR but it will be updated with more models added as the author declared <a href=https://groups.google.com/forum/#!topic/kaldi-help/-CBE3qoXyeU> here</a>. 
+  Of course, if anyone create or know any other windows ASR toolkit based on Kaldi, please feel free to let us know and we will add it in this section. 
+
+  \section python_wrapper_for_kaldi Python wrapper for Kaldi
+  There are a few Python wrappers for Kaldi including:
+  - <a href=https://github.com/pykaldi/pykaldi> PyKaldi </a>
+  - <a href=https://github.com/jzlianglu/pykaldi2> PyKaldi2 </a>
+  - <a href=https://github.com/mravanelli/pytorch-kaldi> PyTorch-Kaldi </a>
+  - <a href=https://github.com/gooofy/py-kaldi-asr> py-kaldi-asr</a>: just for nnet3 online decoder
+  
+  People may wonder why TensorFlow or PyTorch isn't used in Kaldi DNN setup. It is mainly a historical reason as Dan explained <a href=https://groups.google.com/forum/#!topic/kaldi-help/DO_m3KwXr70> here</a>. A good news is that a PyTorch-integrated version of Kaldi that Dan declared <a href=https://groups.google.com/forum/#!topic/kaldi-help/ueXh-xvzZxo>here</a> is already in the planning stage. Dan may announce it when it's ready.
+  
+  \section docker_for_kaldi Docker for Kaldi
+  Kaldi offers two set of images: CPU-based images and GPU-based images, please see <a href=https://github.com/kaldi-asr/kaldi/tree/master/docker> here</a>.
+  
+  \section kaldi_for_android Kaldi for Android
+  A guide for compiling Kaldi for Android with the corresponding Dockerfile can be found <a href=http://jcsilva.github.io/2017/03/18/compile-kaldi-android/>here</a>. Note that this build is just based on Ubuntu and does not continue to update for new version of NDK, so if you build Kaldi for Android on different computing platform or using different toolchain (e.g. CMake instead of ndk-build in this post), please let us know. 
+  
+  \section naming_conventions_of_tool Naming conventions of common tools in Kaldi
+  There are many tools in Kaldi following simple and consistent naming conventions. Three typical frequently used tools with self-explanatory names are: 
+  - <b>copy-* (or *-copy)</b>: e.g. copy-matrix, copy-feats, copy-feats-to-htk, copy-tree, copy-transition-model, copy-posts, wav-copy, gmm-copy, sgmm2-copy, nnet3-copy, lattice-copy.  
+  - <b>*-info</b>: e.g. tree-info, hmm-info, gmm-info, am-info, nnet3-am-info, nnet3-info.
+  - <b>*-to*</b>: e.g. feat-to-dim, feat-to-len, ali-to-phones, ali-to-post, lattice-to-nbest, lattice-to-post, nbest-to-lattice.
+  
+  We strongly suggest you search first in your build output directory to find tools you need before seeking help from others in mailing lists. Here I'll just give some example usages which are asked in the mailing lists.
+  
+  \verbatim
+  copy-feats ark:data/raw_mfcc.ark ark,t:data/mfcc.txt  # copy binary feature archive to text archive format
+  \endverbatim
+  
+  \verbatim
+  cat feats_with_range.scp
+  utt_id_1 raw_mfcc.1.ark:9[0:2,0:5]
+  utt_id_2 raw_mfcc.1.ark:16965[0:3]
+  
+  # copy ranges of feature archive to stdout with text archive format
+  copy-feats scp:feats_with_range.scp ark,t:- 
+  \endverbatim
+  
+  \verbatim
+  cat cmvn.scp
+  speaker_id_1 data/cmvn_test.ark:4
+  speaker_id_2 data/cmvn_test.ark:247
+  speaker_id_3 data/cmvn_test.ark:490
+  speaker_id_4 data/cmvn_test.ark:733
+  speaker_id_5 data/cmvn_test.ark:976
+  
+  # copy specific speaker's cmvn vector to stdout with text format
+  copy-feats --binary=false $(grep speaker_id_2 cmvn.scp | awk '{print $2}') -    
+  \endverbatim
+  
+  \verbatim
+  # copy GMM model to text format
+  gmm-copy --binary=false final.mdl final_text.mdl  
+  \endverbatim
+  
+  \verbatim
+  hmm-info final.mdl
+  number of phones 351
+  number of pdfs 3400
+  number of transition-ids 47952
+  number of transition-states 23916
+  \endverbatim
+  
+  \verbatim
+  nnet3-am-info final.mdl | head
+  input-dim: 40
+  ivector-dim: 100
+  num-pdfs: 2856
+  prior-dimension: 0
+  # Nnet info follows.
+  left-context: 29
+  right-context: 29
+  num-parameters: 8355408
+  modulus: 1
+  input-node name=ivector dim=100
+  input-node name=input dim=40
+  component-node name=idct component=idct input=input input-dim=40 output-dim=40
+  \endverbatim
+  
+  \verbatim
+  # write utterance length in frames to stdout with text archive format
+  feat-to-len scp:feats.scp ark,t:- | head  
+  \endverbatim
+
+  \section some_useful_script_for_data_processing Some useful scripts for data preparation and processing
+  Besides tools mentioned above, there are also some useful scripts in Kaldi in the directory of "steps" and "utils". Here we will list some frequently used scripts in data preparation and processing and leave other important scripts to be illustrated in the corresponding sections below.
+  
+  \verbatim
+  steps/combine_ali_dirs.sh              # combine alignment directories
+  steps/combine_lat_dirs.sh              # combine lattice directories
+  
+  # create lattices for the aug dirs by copying the lattices of original train dir
+  steps/copy_lat_dir.sh 
+  # create alignments for the aug dirs by copying the alignments of original train dir  
+  steps/copy_ali_dir.sh                 
+  
+  steps/cleanup/split_long_utterance.sh  # truncate the long audio into smaller overlapping segments
+  
+  # perform segmentation of the input data based on the transcription
+  # and outputs segmented data along with the corresponding aligned transcription
+  steps/cleanup/segment_long_utterances[_nnet3].sh                                                     
+  \endverbatim
+  
+  \verbatim
+  # copy train/test data directory to another directory, 
+  # possibly adding a specified prefix or a suffix to the utterance and/or speaker names
+  utils/copy_data_dir.sh
+  
+  # combine the data from multiple source directories into a single destination directory
+  utils/combine_data.sh 
+  # split data-dir to multiple subsets according to num-to-split or speaker numbers
+  utils/split_data.sh
+  # split scp file up with an approximately equal number of lines in each output file 
+  utils/split_scp.pl
+  # create a subset of train/test data with different options, consisting of some specified number of utterances 
+  utils/subset_data_dir.sh
+
+  # filter a scp file by list of utterance-ids
+  utils/filter_scp.pl
+  
+  utils/int2sym.pl   # map from integers to symbols (e.g. word-ids to transcript)
+  utils/sym2int.pl   # map from symbols to integers
+  # like ./sym2int.pl, but a bit more general in that it doesn't assume the things being mapped to are single tokens
+  utils/apply_map.pl 
+  
+  utils/utt2spk_to_spk2utt.pl   # convert an utt2spk file to a spk2utt file
+  utils/spk2utt_to_utt2spk.pl   # convert a spk2utt file to an utt2spk file
+
+  \endverbatim
+  
+  \verbatim
+  # get file 'utt2dur' which maps from utterance to the duration of the utterance in seconds
+  utils/data/get_utt2dur.sh
+  # get file 'reco2durr' which maps from recording to the duration of the recording in seconds 
+  utils/data/get_reco2dur.sh
+  
+  # copy the data directory and modify it to use the recording-id as the speaker 
+  utils/data/modify_speaker_info_to_recording.sh
+  
+  # remove excess utterances once they appear more than a specified number of times with the same transcription
+  utils/data/remove_dup_utts.sh
+  
+  # create a new subsegmented output directory from an existing data-dir with 'segments' file
+  utils/data/subsegment_data_dir.sh
+  
+  # do the standard 3-way speed perturbing of a data directory (it operates on the wav.scp).
+  utils/data/perturb_data_dir_speed_3way.sh
+  # generate the files which are used for perturbing the speed of the original data
+  utils/perturb_data_dir_speed.sh
+  
+  \endverbatim
+  
+  \section indeterminacy_in_feature_extraction Indeterminacy in feature extraction
+  Uses may notice that there is tiny difference when they run two rounds of feature extraction including MFCC, Fbank and PLP. This is because the random signal-level ‘dithering’ used in the extraction process to prevent zeros in the filterbank energy computation. The corresponding code is 'Dither' function in file feature-window.cc. For those who want deterministic result, just set 
+  \verbatim
+  --dither=0 and --energy-floor=1
+  \endverbatim
+  or do srand(0) at the start of program. 
+  BTW, we really do NOT think the determinism is worth considering, users should measure WER instead of the output of feature extraction. For more discussions, please refer to:
+  - <a href=https://groups.google.com/forum/#!topic/kaldi-help/fQS3VmQ0eCA> MFCC feature extraction </a>
+  - <a href=https://groups.google.com/forum/#!topic/kaldi-help/LOD4A7Z9hYY> mfcc features significantly different if run more than once </a>
+  
+  \section faq_candidates Below are FAQ candidates (with some TODOs) from the mailing lists, we will update these candidates to make them more readable.
 
  1. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/zMK8pnHJmHE> How to interpret the final.mdl </a> </b>
 	- interpret final.mdl with an example
@@ -47,11 +233,6 @@ namespace kaldi {
 	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/ialNoCbuB-w> arpa to fst</a>
 	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/NUW9ornQHzo> fst to carpa </a>
 
-3. <b> Feat related tools </b>
-	- list feat related tools: feat-to-len, feat-to-dim, etc.
-	- related questions:
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/HC638Dw618Y> Number of frames in an mfcc file </a>
-
 4. <b> Lattice </b>
 	- list lattice related tools with example: nbest-to-linear, linear-to-1best, etc.
 	- list alignment related tools with example: ali-to-phones, ali-to-post, etc.
@@ -88,28 +269,8 @@ namespace kaldi {
 	    - - <a href=https://groups.google.com/forum/#!topic/kaldi-help/g36HqAEC9lI> How to get timestamps while decoding with mfcc feature? </a>
 	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/6FjW0tWPjFI> How to get the phones from recognition </a>
 	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/5nfBCim6Ptw> Getting a CTM file of the phone alignment of the training data </a>
-
-6. <b> Random in Feature Extraction </b>
-	- related questions:
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/fQS3VmQ0eCA> MFCC feature extraction </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/mT8BMJSmU1M> different results for every decoding with same data </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/qyt1cXtd91E> Inconsistent mfcc feature computed between linux and windows </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/hyViCDw3gMU> when i increase the number of threads while extracting MFCC features, the cost time doubled and redoubled. What's the reason? </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/LOD4A7Z9hYY> mfcc features significantly different if run more than once </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/lk2CtNT8aBM> About the randomness in feature extraction. </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/WzzvmiyxUcc> The same wav file, different decoding results？ </a>
-
-7. <b> Copy related tools (copy-feats, copy-matrix, etc ) </b>
-	- related questions:
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/0pEdzBAcdL8> how to extract pre-defined feature matrix from ark file </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/V177P1oZfjI> How can I see feature extraction results in .ark files? </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/uEQ-zIK954U> Extract a matrix from a table of matrices </a>
-
-8. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/klthQqkmDCM> Usage of Common tools (in data preparation?)</a> </b>
-	- utt2spk_to_spk2utt.pl, spk2utt_to_utt2spk.pl, etc.
-	- related questions:
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/px-rPmGIK7A> Creation of reco2dur </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/_btaMRJpRrk> how to merge ali.JOB.gz </a>
+		- <a href=https://groups.google.com/forum/#!topic/kaldi-help/lbo9IRSqXgk> ctm file with words, times and confidences for each n-best paths </a>
+		- <a href=https://groups.google.com/forum/#!topic/kaldi-help/PPlvG_Vfmto> Phone level output </a>
 
 9. <b> Resume training </b>
 	- related questions:
@@ -168,13 +329,9 @@ namespace kaldi {
 
 15. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/L6ZBsYaAExE> Nnet-align-compiled used too much memory? </a> </b>
 
-16. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/YCSezfmsDC8> How do I check the kaldi version? </a> </b>
-	- related questions:
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/Ep2q0-DiLLc> How to check the version after installation </a>
-
 17. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/7AxZqD69vvY> Getting acoustic scores on state level in decoding </a> </b>
 	- related questions:
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/d7ZjCLveU8c> How do I get nnet3 computed matrix (socres) from online-wav-nnet3-latgen-faster </a>
+	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/d7ZjCLveU8c> How do I get nnet3 computed matrix (scores) from online-wav-nnet3-latgen-faster </a>
 
 18. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/jnkXeHVASo0> Mandarin: Pitch vs No Pitch </a> </b>
 
@@ -186,33 +343,21 @@ namespace kaldi {
 	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/jDG5CmAy_3w> How to accelerate the rescoring </a>
 	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/LsfPUwHj3d8> Kaldi with Huge LM </a>
 	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/DddJe3_d5wY> what does rescoring mean?? </a>
-
-21. <b> Free training data </b>
-	- related questions:
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/YUbX_XUkFCw> Re: [kaldi-help] Result on word level </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/GcS51souK8s> People who did not buy LDC license </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/Y3kk0pxk6t4> Neural network </a>
+		- <a href=https://groups.google.com/forum/#!topic/kaldi-help/LNJrE93J3UI> Use the same n-gram to do the lattice rescore but lost accuracy </a>
+		- <a href=https://groups.google.com/forum/#!topic/kaldi-help/sZQ4LE9J6ig> worse result on lattice rescoring </a>
+		- <a href=https://groups.google.com/forum/#!topic/kaldi-help/bWoq7vcRCvw> please help, can i do the lattice-rescore with a big lm trained by different and larger corpus </a>
 
 22. <b> Thread safe in Kaldi </b>
 	- related questions
 	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/x1Vozgv9HSI> is kaldi thread safe ? </a>
 	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/HjraFqQFSnI> MfccComputer is not thread-safe? </a>
 
-23. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/DmbFnaDi7Co> Kaldi logo </a>  </b>
-
 24. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/qVT1EziyqpI> Lexicon free Text recognition </a> </b>
 
 25. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/xs4wSKSK3vk> Decoding .wav files </a> </b>
 
 26. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/3o8JKY17tcE> How to remove the silence modeling during training and testing </a> </b>
 
-27. <b> Python wrapper for Kaldi </b>
-	- related questions:
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/TDfvnPa1PfE> Simple python wrapper for Kaldi's nnet3 online decoder </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/Gaf8t56o24s> PyKaldi - A Python Wrapper for Kaldi </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/DO_m3KwXr70> Have you tried this Kaldi-PyTorch integration? </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/ueXh-xvzZxo> Pykaldi2: Yet another speech toolkit based on Kaldi and Pytorch </a>
-
 28. <b> Examples for different task </b>
 	- related questions:
 	    -  <a href=https://groups.google.com/forum/#!topic/kaldi-help/5TDgpcWQhos> There are no examples of identifying music and speech </a> 
@@ -237,10 +382,6 @@ namespace kaldi {
 
 32. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/vioP5AhdzSg> Why is there a disambiguation symbol in L_disambig.fst after optional silence? </a> </b>
 
-33. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/3LBSzmploC0> Kaldi Book for beginners </a> </b>
-
-34. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/AcOmp5958rU> Docker for kaldi </a> </b>
-
 35. <b> RNNLM </b>
 	- related questions:
 	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/fd_RQT15NpU> How can I use a RNNLM to get a final result of a WER </a>
@@ -299,8 +440,6 @@ namespace kaldi {
 
 45. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/BPZKi9YhwOw> Kaldi supported gpus </a> </b>
 
-46. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/-CBE3qoXyeU> Windows ASR toolkit based on Kaldi </a>  </b>
-
 47. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/faZtacN7CrU> Optimizing model load time? </a> </b>
 
 48. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/iIvV81L77Dk> DNN input feature</a> </b>
@@ -363,14 +502,6 @@ namespace kaldi {
 
 64. <b> <a href=https://groups.google.com/forum/#!forum/kaldi-help> Python3 vs. Python2.7 in Kaldi scripts </a> </b>
 
-65. <b> Kaldi for Android </b>
-	- related questions:
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/8QNPRd9hiwc> kaldi compilation on android </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/VRCwuEUvrZg> Is kaldi compatible with Android ??? </a>
-	    - <a href=https://groups.google.com/forum/#!topic/kaldi-help/TkNKrc6HPUs> Android application using kaldi speech recognition </a>
-
-66. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/J2F3y6KOejY> Why the name is kaldi? </a> </b>
-
 67. <b> <a href=https://groups.google.com/forum/#!topic/kaldi-help/s7zq30ckaEM> Adapt speaker recognition model </a> </b>
 
 68. <b> Teacher-student model in Kaldi </b>
diff --git a/src/doc/get_version_info.sh b/src/doc/get_version_info.sh
index 3b9b8e1f2fe..2f37ea55498 100755
--- a/src/doc/get_version_info.sh
+++ b/src/doc/get_version_info.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # search for VERSION below to see how to change this when
 # Kaldi's version number increases.
diff --git a/src/doc/lattices.dox b/src/doc/lattices.dox
index 1b05102c721..15b476825ec 100644
--- a/src/doc/lattices.dox
+++ b/src/doc/lattices.dox
@@ -808,7 +808,7 @@ or:
 \verbatim
   lattice-to-nbest --acoustic-scale=0.1 --n=10 ark:1.lats ark:1.nbest
 \endverbatim
- then the archive 1.nbest will contain lattices for uttA-1, uttA-2, ... uttA-2,
+ then the archive 1.nbest will contain lattices for uttA-1, uttA-2, ... uttA-10,
  for uttB-1 ... uttB-10, and so on.  Of course some of the utterances may not
  have that many N-best entries if the lattice did not contain that many distinct
  word sequences.  The program lattice-to-nbest needs the acoustic scale as this
diff --git a/src/doc/make_tools.sh b/src/doc/make_tools.sh
index e46b2cc757a..b969dd7d6f8 100755
--- a/src/doc/make_tools.sh
+++ b/src/doc/make_tools.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2012  Daniel Povey.  Apache 2.0.
 # Creates the file tools.dox from tools.dox.input
diff --git a/src/feat/feature-spectrogram.cc b/src/feat/feature-spectrogram.cc
index 7eee2643cf5..1a9ea3a93ee 100644
--- a/src/feat/feature-spectrogram.cc
+++ b/src/feat/feature-spectrogram.cc
@@ -62,6 +62,11 @@ void SpectrogramComputer::Compute(BaseFloat signal_raw_log_energy,
   else  // An alternative algorithm that works for non-powers-of-two
     RealFft(signal_frame, true);
 
+  if (opts_.return_raw_fft) {
+    feature->CopyFromVec(*signal_frame);
+    return;
+  }
+
   // Convert the FFT into a power spectrum.
   ComputePowerSpectrum(signal_frame);
   SubVector<BaseFloat> power_spectrum(*signal_frame,
diff --git a/src/feat/feature-spectrogram.h b/src/feat/feature-spectrogram.h
index 132a6875e00..0b1061ad991 100644
--- a/src/feat/feature-spectrogram.h
+++ b/src/feat/feature-spectrogram.h
@@ -39,10 +39,14 @@ struct SpectrogramOptions {
   FrameExtractionOptions frame_opts;
   BaseFloat energy_floor;
   bool raw_energy;  // If true, compute energy before preemphasis and windowing
+  bool return_raw_fft; // If true, return the raw FFT spectrum
+                       // Note that in that case the Dim() will return double
+                       // the expected dimension (because of the complex domain of it)
 
   SpectrogramOptions() :
     energy_floor(0.0),
-    raw_energy(true) {}
+    raw_energy(true),
+    return_raw_fft(false) {}
 
   void Register(OptionsItf *opts) {
     frame_opts.Register(opts);
@@ -54,6 +58,8 @@ struct SpectrogramOptions {
                    "std::numeric_limits<float>::epsilon().");
     opts->Register("raw-energy", &raw_energy,
                    "If true, compute energy before preemphasis and windowing");
+    opts->Register("return-raw-fft", &return_raw_fft,
+                   "If true, return raw FFT complex numbers instead of log magnitudes");
   }
 };
 
@@ -68,7 +74,13 @@ class SpectrogramComputer {
     return opts_.frame_opts;
   }
 
-  int32 Dim() const { return opts_.frame_opts.PaddedWindowSize() / 2 + 1; }
+  int32 Dim() const {
+    if (opts_.return_raw_fft) {
+      return opts_.frame_opts.PaddedWindowSize();
+    } else {
+      return opts_.frame_opts.PaddedWindowSize() / 2 + 1;
+    }
+  }
 
   bool NeedRawLogEnergy() const { return opts_.raw_energy; }
 
diff --git a/src/feat/feature-window.cc b/src/feat/feature-window.cc
index c5d4cc29831..1dea03d6791 100644
--- a/src/feat/feature-window.cc
+++ b/src/feat/feature-window.cc
@@ -115,6 +115,10 @@ FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts)
     double i_fl = static_cast<double>(i);
     if (opts.window_type == "hanning") {
       window(i) = 0.5  - 0.5*cos(a * i_fl);
+    } else if (opts.window_type == "sine") {
+      // when you are checking ws wikipedia, please
+      // note that 0.5 * a = M_PI/(frame_length-1)
+      window(i) = sin(0.5 * a * i_fl);
     } else if (opts.window_type == "hamming") {
       window(i) = 0.54 - 0.46*cos(a * i_fl);
     } else if (opts.window_type == "povey") {  // like hamming but goes to zero at edges.
diff --git a/src/feat/feature-window.h b/src/feat/feature-window.h
index a7abba50eca..e6d673937ac 100644
--- a/src/feat/feature-window.h
+++ b/src/feat/feature-window.h
@@ -40,7 +40,7 @@ struct FrameExtractionOptions {
   BaseFloat preemph_coeff;  // Preemphasis coefficient.
   bool remove_dc_offset;  // Subtract mean of wave before FFT.
   std::string window_type;  // e.g. Hamming window
-  // May be "hamming", "rectangular", "povey", "hanning", "blackman"
+  // May be "hamming", "rectangular", "povey", "hanning", "sine", "blackman"
   // "povey" is a window I made to be similar to Hamming but to go to zero at the
   // edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
   // I just don't think the Hamming window makes sense as a windowing function.
@@ -81,7 +81,7 @@ struct FrameExtractionOptions {
                    "option, e.g. to 1.0 or 0.1");
     opts->Register("window-type", &window_type, "Type of window "
                    "(\"hamming\"|\"hanning\"|\"povey\"|\"rectangular\""
-                   "|\"blackmann\")");
+                   "|\"sine\"|\"blackmann\")");
     opts->Register("blackman-coeff", &blackman_coeff,
                    "Constant coefficient for generalized Blackman window.");
     opts->Register("round-to-power-of-two", &round_to_power_of_two,
diff --git a/src/feat/online-feature.cc b/src/feat/online-feature.cc
index 047909e7a21..acf2bb9cfa9 100644
--- a/src/feat/online-feature.cc
+++ b/src/feat/online-feature.cc
@@ -88,9 +88,9 @@ void OnlineGenericBaseFeature<C>::MaybeCreateResampler(
   if (resampler_ != nullptr) {
     KALDI_ASSERT(resampler_->GetInputSamplingRate() == sampling_rate);
     KALDI_ASSERT(resampler_->GetOutputSamplingRate() == expected_sampling_rate);
-  } else if (((sampling_rate < expected_sampling_rate) &&
+  } else if (((sampling_rate > expected_sampling_rate) &&
               computer_.GetFrameOptions().allow_downsample) ||
-             ((sampling_rate > expected_sampling_rate) &&
+             ((sampling_rate < expected_sampling_rate) &&
               computer_.GetFrameOptions().allow_upsample)) {
     resampler_.reset(new LinearResample(
         sampling_rate, expected_sampling_rate,
diff --git a/src/feat/wave-reader.h b/src/feat/wave-reader.h
index dae74139232..0c387e0885f 100644
--- a/src/feat/wave-reader.h
+++ b/src/feat/wave-reader.h
@@ -22,7 +22,7 @@
 
 /*
 // THE WAVE FORMAT IS SPECIFIED IN:
-// https:// ccrma.stanford.edu/courses/422/projects/WaveFormat/
+// https://en.wikipedia.org/wiki/WAV
 //
 //
 //
diff --git a/src/featbin/wav-reverberate.cc b/src/featbin/wav-reverberate.cc
index a9e6d3509c1..56201943b5d 100644
--- a/src/featbin/wav-reverberate.cc
+++ b/src/featbin/wav-reverberate.cc
@@ -370,7 +370,7 @@ int main(int argc, char *argv[]) {
     }
 
     WaveData out_wave(samp_freq_input, out_matrix);
-    Output ko(output_wave_file, false);
+    Output ko(output_wave_file, true, false);
     out_wave.Write(ko.Stream());
 
     return 0;
diff --git a/src/fstbin/fstdeterminizestar.cc b/src/fstbin/fstdeterminizestar.cc
index 5e3de3e7ef9..35cb51aa8de 100644
--- a/src/fstbin/fstdeterminizestar.cc
+++ b/src/fstbin/fstdeterminizestar.cc
@@ -129,7 +129,7 @@ int main(int argc, char *argv[]) {
             // of scope anyway.
           }
           fst_writer.Write(key, fst);
-        } catch (const std::runtime_error e) {
+        } catch (const std::runtime_error &e) {
           KALDI_WARN << "Error during determinization for key " << key;
         }
       }
diff --git a/src/fstbin/fstmakecontextfst.cc b/src/fstbin/fstmakecontextfst.cc
index 59655a61e9e..818bf00f50a 100644
--- a/src/fstbin/fstmakecontextfst.cc
+++ b/src/fstbin/fstmakecontextfst.cc
@@ -114,10 +114,6 @@ int main(int argc, char *argv[]) {
       int32 sym = phone_syms[i];
       loop_fst.AddArc(0, StdArc(sym, sym, TropicalWeight::One(), 0));
     }
-    for (size_t i = 0; i < disambig_in.size(); i++) {
-      int32 sym = disambig_in[i];
-      loop_fst.AddArc(0, StdArc(sym, sym, TropicalWeight::One(), 0));
-    }
 
     std::vector<std::vector<int32> > ilabels;
     VectorFst<StdArc> context_fst;
diff --git a/src/fstext/factor-inl.h b/src/fstext/factor-inl.h
index 5958dc6ab88..44fc6f214a2 100644
--- a/src/fstext/factor-inl.h
+++ b/src/fstext/factor-inl.h
@@ -219,32 +219,6 @@ void ExpandInputSequences(const std::vector<std::vector<I> > &sequences,
 }
 
 
-template<class Arc, class I>
-class RemoveSomeInputSymbolsMapper {
-public:
-  Arc operator ()(const Arc &arc_in) {
-    Arc ans = arc_in;
-    if (to_remove_set_.count(ans.ilabel) != 0) ans.ilabel = 0;  // remove this symbol
-    return ans;
-  }
-  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
-  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
-  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
-  uint64 Properties(uint64 props) const {
-    // remove the following as we don't know now if any of them are true.
-    uint64 to_remove = kAcceptor|kNotAcceptor|kIDeterministic|kNonIDeterministic|
-        kNoEpsilons|kNoIEpsilons|kILabelSorted|kNotILabelSorted;
-    return props & ~to_remove;
-  }
-  RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove):
-      to_remove_set_(to_remove) {
-    KALDI_ASSERT_IS_INTEGER_TYPE(I);
-         assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
-       }
-private:
-  kaldi::ConstIntegerSet<I> to_remove_set_;
-};
-
 
 template<class Arc, class I>
 void CreateFactorFst(const std::vector<std::vector<I> > &sequences,
diff --git a/src/fstext/fstext-utils-inl.h b/src/fstext/fstext-utils-inl.h
index 37d50fa5d80..8f1a67b5f18 100644
--- a/src/fstext/fstext-utils-inl.h
+++ b/src/fstext/fstext-utils-inl.h
@@ -114,6 +114,49 @@ void GetInputSymbols(const Fst<Arc> &fst,
   std::sort(symbols->begin(), symbols->end());
 }
 
+template<class Arc, class I>
+class RemoveSomeInputSymbolsMapper {
+public:
+  Arc operator ()(const Arc &arc_in) {
+    Arc ans = arc_in;
+    if (to_remove_set_.count(ans.ilabel) != 0) ans.ilabel = 0;  // remove this symbol
+    return ans;
+  }
+  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
+  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
+  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
+  uint64 Properties(uint64 props) const {
+    // remove the following as we don't know now if any of them are true.
+    uint64 to_remove = kAcceptor|kNotAcceptor|kIDeterministic|kNonIDeterministic|
+        kNoEpsilons|kNoIEpsilons|kILabelSorted|kNotILabelSorted;
+    return props & ~to_remove;
+  }
+  RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove):
+      to_remove_set_(to_remove) {
+    KALDI_ASSERT_IS_INTEGER_TYPE(I);
+         assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
+       }
+private:
+  kaldi::ConstIntegerSet<I> to_remove_set_;
+};
+
+template<class Arc, class I>
+using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;
+
+// Lookahead composition is used for optimized online
+// composition of FSTs during decoding. See
+// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
+// see DefaultLookAhead in fst/compose.h
+template<class Arc, class I>
+LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
+                                          const Fst<Arc> &ifst2,
+                                          const std::vector<I> &to_remove) {
+  fst::CacheOptions cache_opts(true, 1 << 25LL);
+  fst::CacheOptions cache_opts_map(true, 0);
+  fst::ArcMapFstOptions arcmap_opts(cache_opts);
+  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
+  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts), mapper, arcmap_opts);
+}
 
 template<class Arc, class I>
 void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
diff --git a/src/fstext/fstext-utils.h b/src/fstext/fstext-utils.h
index fb55ad69f72..2810ef0de13 100644
--- a/src/fstext/fstext-utils.h
+++ b/src/fstext/fstext-utils.h
@@ -392,8 +392,6 @@ void RhoCompose(const Fst<Arc> &fst1,
                 typename Arc::Label rho_label,
                 MutableFst<Arc> *fst);
 
-
-
 /** This function returns true if, in the semiring of the FST, the sum (within
     the semiring) of all the arcs out of each state in the FST is one, to within
     delta.  After MakeStochasticFst, this should be true (for a connected FST).
diff --git a/src/fstext/kaldi-fst-io.cc b/src/fstext/kaldi-fst-io.cc
index 276baeb7535..61d6cc74724 100644
--- a/src/fstext/kaldi-fst-io.cc
+++ b/src/fstext/kaldi-fst-io.cc
@@ -42,6 +42,12 @@ VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
   return fst;
 }
 
+// Register const fst to load it automatically. Other types like
+// olabel_lookahead or ngram or compact_fst should be registered
+// through OpenFst registration API.
+static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
+static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;
+
 Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
   if (rxfilename == "") rxfilename = "-"; // interpret "" as stdin,
   // for compatibility with OpenFst conventions.
@@ -71,15 +77,10 @@ Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
   }
   // Read the FST
   FstReadOptions ropts("<unspecified>", &hdr);
-  Fst<StdArc> *fst = NULL;
-  if (hdr.FstType() == "const") {
-    fst = ConstFst<StdArc>::Read(ki.Stream(), ropts);
-  } else if (hdr.FstType() == "vector") {
-    fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
-  }
+  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
   if (!fst) {
     if(throw_on_err) {
-     KALDI_ERR << "Could not read fst from "
+      KALDI_ERR << "Could not read fst from "
                << kaldi::PrintableRxfilename(rxfilename);
     } else {
       KALDI_WARN << "Could not read fst from "
diff --git a/src/fstext/kaldi-fst-io.h b/src/fstext/kaldi-fst-io.h
index 206dc71238a..a45920936ec 100644
--- a/src/fstext/kaldi-fst-io.h
+++ b/src/fstext/kaldi-fst-io.h
@@ -37,8 +37,8 @@
 namespace fst {
 
 // Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
-// On error, throws using KALDI_ERR.  Note: this
-// doesn't support the text-mode option that we generally like to support.
+// On error returns NULL. Only supports VectorFst and exists
+// mainly for backward code compabibility.
 VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
 
 // Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
@@ -46,7 +46,8 @@ VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);
 // otherwise it prints a warning and returns. Note:this
 // doesn't support the text-mode option that we generally like to support.
 // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
-// (const-fst can give better performance for decoding).
+// (const-fst can give better performance for decoding). Other
+// types could be also loaded if registered inside OpenFst.
 Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
                                  bool throw_on_err = true);
 
diff --git a/src/gmm/diag-gmm.h b/src/gmm/diag-gmm.h
index 4a10ea34471..7aefc93cdfe 100644
--- a/src/gmm/diag-gmm.h
+++ b/src/gmm/diag-gmm.h
@@ -188,7 +188,7 @@ class DiagGmm {
 
   /// Mutators for both float or double
   template<class Real>
-  void SetWeights(const VectorBase<Real> &w);    ///< Set mixure weights
+  void SetWeights(const VectorBase<Real> &w);    ///< Set mixture weights
 
   /// Use SetMeans to update only the Gaussian means (and not variances)
   template<class Real>
diff --git a/src/hmm/hmm-utils.cc b/src/hmm/hmm-utils.cc
index 06edf8d5976..15a1edfd255 100644
--- a/src/hmm/hmm-utils.cc
+++ b/src/hmm/hmm-utils.cc
@@ -1289,5 +1289,16 @@ void ChangeReorderingOfAlignment(const TransitionModel &trans_model,
   }
 }
 
+void GetPdfToPhonesMap(const TransitionModel &trans_model,
+                       std::vector<std::set<int32> > *pdf2phones) {
+  pdf2phones->clear();
+  pdf2phones->resize(trans_model.NumPdfs());
+  for (int32 i = 0; i < trans_model.NumTransitionIds(); i++) {
+    int32 trans_id = i + 1;
+    int32 pdf_id = trans_model.TransitionIdToPdf(trans_id);
+    int32 phone = trans_model.TransitionIdToPhone(trans_id);
+    (*pdf2phones)[pdf_id].insert(phone);
+  }
+}
 
 } // namespace kaldi
diff --git a/src/hmm/hmm-utils.h b/src/hmm/hmm-utils.h
index a8ad846949e..4415927df4e 100644
--- a/src/hmm/hmm-utils.h
+++ b/src/hmm/hmm-utils.h
@@ -329,6 +329,12 @@ void GetRandomAlignmentForPhone(const ContextDependencyInterface &ctx_dep,
 void ChangeReorderingOfAlignment(const TransitionModel &trans_model,
                                  std::vector<int32> *alignment);
 
+
+// GetPdfToPhonesMap creates a map which maps each pdf-id into its
+// corresponding monophones.
+void GetPdfToPhonesMap(const TransitionModel &trans_model,
+                       std::vector<std::set<int32> > *pdf2phones);
+
 /// @} end "addtogroup hmm_group"
 
 } // end namespace kaldi
diff --git a/src/lat/determinize-lattice-pruned.h b/src/lat/determinize-lattice-pruned.h
index 8e1858aa2b1..35323709815 100644
--- a/src/lat/determinize-lattice-pruned.h
+++ b/src/lat/determinize-lattice-pruned.h
@@ -105,8 +105,8 @@ namespace fst {
    representation" and hence the "minimal representation" will be the same.  We
    can use this to reduce compute.  Note that if two initial representations are
    different, this does not preclude the other representations from being the same.
-   
-*/   
+
+*/
 
 
 struct DeterminizeLatticePrunedOptions {
@@ -190,7 +190,7 @@ template<class Weight>
 bool DeterminizeLatticePruned(
     const ExpandedFst<ArcTpl<Weight> > &ifst,
     double prune,
-    MutableFst<ArcTpl<Weight> > *ofst, 
+    MutableFst<ArcTpl<Weight> > *ofst,
     DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());
 
 
@@ -199,7 +199,7 @@ bool DeterminizeLatticePruned(
     (i.e. the sequences of output symbols are represented directly as strings The input
     FST must be topologically sorted in order for the algorithm to work. For efficiency
     it is recommended to sort the ilabel for the input FST as well.
-    Returns true on success, and false if it had to terminate the determinization
+    Returns true on normal success, and false if it had to terminate the determinization
     earlier than specified by the "prune" beam-- that is, if it terminated because
     of the max_mem, max_loop or max_arcs constraints in the options.
     CAUTION: if Lattice is the input, you need to Invert() before calling this,
@@ -261,7 +261,7 @@ bool DeterminizeLatticePhonePruned(
       = DeterminizeLatticePhonePrunedOptions());
 
 /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
-    lattice might be changed. 
+    lattice might be changed.
 */
 template<class Weight, class IntType>
 bool DeterminizeLatticePhonePruned(
diff --git a/src/lat/lattice-functions.cc b/src/lat/lattice-functions.cc
index 7f484f95233..f4a184f3cd4 100644
--- a/src/lat/lattice-functions.cc
+++ b/src/lat/lattice-functions.cc
@@ -1107,7 +1107,6 @@ void CompactLatticeShortestPath(const CompactLattice &clat,
   // Now we can assume it's topologically sorted.
   shortest_path->DeleteStates();
   if (clat.Start() == kNoStateId) return;
-  KALDI_ASSERT(clat.Start() == 0); // since top-sorted.
   typedef CompactLatticeArc Arc;
   typedef Arc::StateId StateId;
   typedef CompactLatticeWeight Weight;
@@ -1117,7 +1116,7 @@ void CompactLatticeShortestPath(const CompactLattice &clat,
     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
     best_cost_and_pred[s].second = fst::kNoStateId;
   }
-  best_cost_and_pred[0].first = 0;
+  best_cost_and_pred[clat.Start()].first = 0;
   for (StateId s = 0; s < clat.NumStates(); s++) {
     double my_cost = best_cost_and_pred[s].first;
     for (ArcIterator<CompactLattice> aiter(clat, s);
@@ -1139,8 +1138,8 @@ void CompactLatticeShortestPath(const CompactLattice &clat,
     }
   }
   std::vector<StateId> states; // states on best path.
-  StateId cur_state = superfinal;
-  while (cur_state != 0) {
+  StateId cur_state = superfinal, start_state = clat.Start();
+  while (cur_state != start_state) {
     StateId prev_state = best_cost_and_pred[cur_state].second;
     if (prev_state == kNoStateId) {
       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
diff --git a/src/lat/word-align-lattice-lexicon.cc b/src/lat/word-align-lattice-lexicon.cc
index 63284b771de..60f094b1cc8 100644
--- a/src/lat/word-align-lattice-lexicon.cc
+++ b/src/lat/word-align-lattice-lexicon.cc
@@ -116,7 +116,7 @@ class LatticeLexiconWordAligner {
     size_t Hash() const {
       VectorHasher<int32> vh;
       const int32 p1 = 11117, p2 = 90647, p3 = 3967, p4 = 3557; // primes.
-      int32 ans = 0;
+      size_t ans = 0;
       for (int32 i = 0; i < static_cast<int32>(transition_ids_.size()); i++) {
         ans *= p1;
         ans += vh(transition_ids_[i]);
diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk
index bcbd019c020..bd3086e0c7e 100644
--- a/src/makefiles/linux_atlas.mk
+++ b/src/makefiles/linux_atlas.mk
@@ -1,5 +1,8 @@
 # ATLAS specific Linux configuration
 
+ifndef DEBUG_LEVEL
+$(error DEBUG_LEVEL not defined.)
+endif
 ifndef DOUBLE_PRECISION
 $(error DOUBLE_PRECISION not defined.)
 endif
@@ -22,12 +25,19 @@ CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
            -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
            -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \
            -msse -msse2 -pthread \
-           -g # -O0 -DKALDI_PARANOID
+           -g
 
 ifeq ($(KALDI_FLAVOR), dynamic)
 CXXFLAGS += -fPIC
 endif
 
+ifeq ($(DEBUG_LEVEL), 0)
+CXXFLAGS += -DNDEBUG
+endif
+ifeq ($(DEBUG_LEVEL), 2)
+CXXFLAGS += -O0 -DKALDI_PARANOID
+endif
+
 # Compiler specific flags
 COMPILER = $(shell $(CXX) -v 2>&1)
 ifeq ($(findstring clang,$(COMPILER)),clang)
diff --git a/src/makefiles/linux_atlas_ppc64le.mk b/src/makefiles/linux_atlas_ppc64le.mk
index 58b41c8e81b..fdb2618c91b 100644
--- a/src/makefiles/linux_atlas_ppc64le.mk
+++ b/src/makefiles/linux_atlas_ppc64le.mk
@@ -1,5 +1,8 @@
 # ATLAS specific Linux ppc64le configuration
 
+ifndef DEBUG_LEVEL
+$(error DEBUG_LEVEL not defined.)
+endif
 ifndef DOUBLE_PRECISION
 $(error DOUBLE_PRECISION not defined.)
 endif
@@ -23,12 +26,19 @@ CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
            -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \
            -m64 -maltivec -mcpu=power8 -mtune=power8 -mpower8-vector -mvsx \
            -pthread \
-           -g # -O0 -DKALDI_PARANOID
+           -g
 
 ifeq ($(KALDI_FLAVOR), dynamic)
 CXXFLAGS += -fPIC
 endif
 
+ifeq ($(DEBUG_LEVEL), 0)
+CXXFLAGS += -DNDEBUG
+endif
+ifeq ($(DEBUG_LEVEL), 2)
+CXXFLAGS += -O0 -DKALDI_PARANOID
+endif
+
 # Compiler specific flags
 COMPILER = $(shell $(CXX) -v 2>&1)
 ifeq ($(findstring clang,$(COMPILER)),clang)
diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk
index 5c670bfb89d..058c4eeab1d 100644
--- a/src/makefiles/linux_clapack.mk
+++ b/src/makefiles/linux_clapack.mk
@@ -1,5 +1,8 @@
 # CLAPACK specific Linux configuration
 
+ifndef DEBUG_LEVEL
+$(error DEBUG_LEVEL not defined.)
+endif
 ifndef DOUBLE_PRECISION
 $(error DOUBLE_PRECISION not defined.)
 endif
@@ -16,12 +19,19 @@ CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
            -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
            -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK -I../../tools/CLAPACK \
            -msse -msse2 -pthread \
-           -g # -O0 -DKALDI_PARANOID
+           -g
 
 ifeq ($(KALDI_FLAVOR), dynamic)
 CXXFLAGS += -fPIC
 endif
 
+ifeq ($(DEBUG_LEVEL), 0)
+CXXFLAGS += -DNDEBUG
+endif
+ifeq ($(DEBUG_LEVEL), 2)
+CXXFLAGS += -O0 -DKALDI_PARANOID
+endif
+
 # Compiler specific flags
 COMPILER = $(shell $(CXX) -v 2>&1)
 ifeq ($(findstring clang,$(COMPILER)),clang)
diff --git a/src/makefiles/linux_clapack_arm.mk b/src/makefiles/linux_clapack_arm.mk
index fb5a3821fb7..c80710bd0a0 100644
--- a/src/makefiles/linux_clapack_arm.mk
+++ b/src/makefiles/linux_clapack_arm.mk
@@ -1,5 +1,8 @@
 # CLAPACK specific Linux ARM configuration
 
+ifndef DEBUG_LEVEL
+$(error DEBUG_LEVEL not defined.)
+endif
 ifndef DOUBLE_PRECISION
 $(error DOUBLE_PRECISION not defined.)
 endif
@@ -16,12 +19,19 @@ CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
            -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
            -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK -I../../tools/CLAPACK \
            -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread \
-           -g # -O0 -DKALDI_PARANOID
+           -g
 
 ifeq ($(KALDI_FLAVOR), dynamic)
 CXXFLAGS += -fPIC
 endif
 
+ifeq ($(DEBUG_LEVEL), 0)
+CXXFLAGS += -DNDEBUG
+endif
+ifeq ($(DEBUG_LEVEL), 2)
+CXXFLAGS += -O0 -DKALDI_PARANOID
+endif
+
 # Compiler specific flags
 COMPILER = $(shell $(CXX) -v 2>&1)
 ifeq ($(findstring clang,$(COMPILER)),clang)
diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk
index 8135f1e9127..f2bd7ec42ab 100644
--- a/src/makefiles/linux_openblas.mk
+++ b/src/makefiles/linux_openblas.mk
@@ -1,5 +1,8 @@
 # OpenBLAS specific Linux configuration
 
+ifndef DEBUG_LEVEL
+$(error DEBUG_LEVEL not defined.)
+endif
 ifndef DOUBLE_PRECISION
 $(error DOUBLE_PRECISION not defined.)
 endif
@@ -22,12 +25,19 @@ CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
            -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
            -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \
            -msse -msse2 -pthread \
-           -g # -O0 -DKALDI_PARANOID
+           -g
 
 ifeq ($(KALDI_FLAVOR), dynamic)
 CXXFLAGS += -fPIC
 endif
 
+ifeq ($(DEBUG_LEVEL), 0)
+CXXFLAGS += -DNDEBUG
+endif
+ifeq ($(DEBUG_LEVEL), 2)
+CXXFLAGS += -O0 -DKALDI_PARANOID
+endif
+
 # Compiler specific flags
 COMPILER = $(shell $(CXX) -v 2>&1)
 ifeq ($(findstring clang,$(COMPILER)),clang)
diff --git a/src/makefiles/linux_openblas_aarch64.mk b/src/makefiles/linux_openblas_aarch64.mk
index 55287d34420..7098f8b6a54 100644
--- a/src/makefiles/linux_openblas_aarch64.mk
+++ b/src/makefiles/linux_openblas_aarch64.mk
@@ -1,5 +1,8 @@
 # OpenBLAS specific Linux ARM configuration
 
+ifndef DEBUG_LEVEL
+$(error DEBUG_LEVEL not defined.)
+endif
 ifndef DOUBLE_PRECISION
 $(error DOUBLE_PRECISION not defined.)
 endif
@@ -22,12 +25,19 @@ CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
            -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
            -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \
            -ftree-vectorize -pthread \
-           -g # -O0 -DKALDI_PARANOID
+           -g
 
 ifeq ($(KALDI_FLAVOR), dynamic)
 CXXFLAGS += -fPIC
 endif
 
+ifeq ($(DEBUG_LEVEL), 0)
+CXXFLAGS += -DNDEBUG
+endif
+ifeq ($(DEBUG_LEVEL), 2)
+CXXFLAGS += -O0 -DKALDI_PARANOID
+endif
+
 # Compiler specific flags
 COMPILER = $(shell $(CXX) -v 2>&1)
 ifeq ($(findstring clang,$(COMPILER)),clang)
diff --git a/src/makefiles/linux_openblas_arm.mk b/src/makefiles/linux_openblas_arm.mk
index 30603c1b860..5a79d8244fa 100644
--- a/src/makefiles/linux_openblas_arm.mk
+++ b/src/makefiles/linux_openblas_arm.mk
@@ -1,5 +1,8 @@
 # OpenBLAS specific Linux ARM configuration
 
+ifndef DEBUG_LEVEL
+$(error DEBUG_LEVEL not defined.)
+endif
 ifndef DOUBLE_PRECISION
 $(error DOUBLE_PRECISION not defined.)
 endif
@@ -22,12 +25,19 @@ CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
            -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
            -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \
            -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread \
-           -g # -O0 -DKALDI_PARANOID
+           -g
 
 ifeq ($(KALDI_FLAVOR), dynamic)
 CXXFLAGS += -fPIC
 endif
 
+ifeq ($(DEBUG_LEVEL), 0)
+CXXFLAGS += -DNDEBUG
+endif
+ifeq ($(DEBUG_LEVEL), 2)
+CXXFLAGS += -O0 -DKALDI_PARANOID
+endif
+
 # Compiler specific flags
 COMPILER = $(shell $(CXX) -v 2>&1)
 ifeq ($(findstring clang,$(COMPILER)),clang)
diff --git a/src/makefiles/linux_openblas_ppc64le.mk b/src/makefiles/linux_openblas_ppc64le.mk
index 89e882cb223..4d3919e1f98 100644
--- a/src/makefiles/linux_openblas_ppc64le.mk
+++ b/src/makefiles/linux_openblas_ppc64le.mk
@@ -1,5 +1,8 @@
 # OpenBLAS specific Linux configuration
 
+ifndef DEBUG_LEVEL
+$(error DEBUG_LEVEL not defined.)
+endif
 ifndef DOUBLE_PRECISION
 $(error DOUBLE_PRECISION not defined.)
 endif
@@ -23,12 +26,19 @@ CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
            -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \
            -m64 -maltivec -mcpu=power8 -mtune=power8 -mpower8-vector -mvsx \
            -pthread \
-           -g # -O0 -DKALDI_PARANOID
+           -g
 
 ifeq ($(KALDI_FLAVOR), dynamic)
 CXXFLAGS += -fPIC
 endif
 
+ifeq ($(DEBUG_LEVEL), 0)
+CXXFLAGS += -DNDEBUG
+endif
+ifeq ($(DEBUG_LEVEL), 2)
+CXXFLAGS += -O0 -DKALDI_PARANOID
+endif
+
 # Compiler specific flags
 COMPILER = $(shell $(CXX) -v 2>&1)
 ifeq ($(findstring clang,$(COMPILER)),clang)
diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk
index d1c399d9796..dc1fa7a738a 100644
--- a/src/makefiles/linux_x86_64_mkl.mk
+++ b/src/makefiles/linux_x86_64_mkl.mk
@@ -9,6 +9,9 @@
 # Use the options obtained from this website to manually configure for other
 # platforms using MKL.
 
+ifndef DEBUG_LEVEL
+$(error DEBUG_LEVEL not defined.)
+endif
 ifndef DOUBLE_PRECISION
 $(error DOUBLE_PRECISION not defined.)
 endif
@@ -30,12 +33,19 @@ CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \
            -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \
            -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_MKL -I$(MKLROOT)/include \
            -m64 -msse -msse2 -pthread \
-           -g # -O0 -DKALDI_PARANOID
+           -g
 
 ifeq ($(KALDI_FLAVOR), dynamic)
 CXXFLAGS += -fPIC
 endif
 
+ifeq ($(DEBUG_LEVEL), 0)
+CXXFLAGS += -DNDEBUG
+endif
+ifeq ($(DEBUG_LEVEL), 2)
+CXXFLAGS += -O0 -DKALDI_PARANOID
+endif
+
 # Compiler specific flags
 COMPILER = $(shell $(CXX) -v 2>&1)
 ifeq ($(findstring clang,$(COMPILER)),clang)
diff --git a/src/matrix/kaldi-blas.h b/src/matrix/kaldi-blas.h
index 8a06540bba2..215f596cc9c 100644
--- a/src/matrix/kaldi-blas.h
+++ b/src/matrix/kaldi-blas.h
@@ -93,6 +93,7 @@
     #include <mkl.h>
   }
 #elif defined(HAVE_OPENBLAS)
+  #include <complex>
   // getting cblas.h and lapacke.h from <openblas-install-dir>/.
   // putting in "" not <> to search -I before system libraries.
   #include "cblas.h"
diff --git a/src/matrix/kaldi-matrix.h b/src/matrix/kaldi-matrix.h
index 4387538c472..bf634b0ec2a 100644
--- a/src/matrix/kaldi-matrix.h
+++ b/src/matrix/kaldi-matrix.h
@@ -574,6 +574,11 @@ class MatrixBase {
   void SymPosSemiDefEig(VectorBase<Real> *s, MatrixBase<Real> *P,
                         Real check_thresh = 0.001);
 
+  // There are some weird issue with template friend function in a class
+  // template in Windows version of nvcc. This is simple an ugly walkaround.
+#if defined(__NVCC__) && defined(_MSC_VER)
+  template<typename Real>
+#endif
   friend Real kaldi::TraceMatMat<Real>(const MatrixBase<Real> &A,
       const MatrixBase<Real> &B, MatrixTransposeType trans);  // tr (A B)
 
diff --git a/src/matrix/kaldi-vector.h b/src/matrix/kaldi-vector.h
index 2a032354b5b..a5baa3c2d24 100644
--- a/src/matrix/kaldi-vector.h
+++ b/src/matrix/kaldi-vector.h
@@ -510,36 +510,36 @@ class SubVector : public VectorBase<Real> {
     KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+
                  static_cast<UnsignedMatrixIndexT>(length) <=
                  static_cast<UnsignedMatrixIndexT>(t.Dim()));
-    VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin);
-    VectorBase<Real>::dim_   = length;
+    this->data_ = const_cast<Real*> (t.Data()+origin);
+    this->dim_   = length;
   }
 
   /// This constructor initializes the vector to point at the contents
   /// of this packed matrix (SpMatrix or TpMatrix).
   SubVector(const PackedMatrix<Real> &M) {
-    VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
-    VectorBase<Real>::dim_   = (M.NumRows()*(M.NumRows()+1))/2;
+    this->data_ = const_cast<Real*> (M.Data());
+    this->dim_   = (M.NumRows()*(M.NumRows()+1))/2;
   }
 
   /// Copy constructor
   SubVector(const SubVector &other) : VectorBase<Real> () {
     // this copy constructor needed for Range() to work in base class.
-    VectorBase<Real>::data_ = other.data_;
-    VectorBase<Real>::dim_ = other.dim_;
+    this->data_ = other.data_;
+    this->dim_ = other.dim_;
   }
 
   /// Constructor from a pointer to memory and a length.  Keeps a pointer
   /// to the data but does not take ownership (will never delete).
   /// Caution: this constructor enables you to evade const constraints.
   SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real> () {
-    VectorBase<Real>::data_ = const_cast<Real*>(data);
-    VectorBase<Real>::dim_   = length;
+    this->data_ = const_cast<Real*>(data);
+    this->dim_   = length;
   }
 
   /// This operation does not preserve const-ness, so be careful.
   SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
-    VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row));
-    VectorBase<Real>::dim_   = matrix.NumCols();
+    this->data_ = const_cast<Real*>(matrix.RowData(row));
+    this->dim_   = matrix.NumCols();
   }
 
   ~SubVector() {}  ///< Destructor (does nothing; no pointers are owned here).
diff --git a/src/matrix/matrix-lib-test.cc b/src/matrix/matrix-lib-test.cc
index 7db0d8d822c..1cd0c14c9af 100644
--- a/src/matrix/matrix-lib-test.cc
+++ b/src/matrix/matrix-lib-test.cc
@@ -2695,14 +2695,14 @@ template<typename Real> static void UnitTestRange() {  // Testing SubMatrix clas
 
     SubVector<Real> sub(V, lenStart, lenEnd-lenStart);
 
-    KALDI_ASSERT(sub.Sum() == V.Range(lenStart, lenEnd-lenStart).Sum());
+    KALDI_ASSERT(ApproxEqual(sub.Sum(), V.Range(lenStart, lenEnd-lenStart).Sum()));
 
     for (MatrixIndexT i = lenStart;i < lenEnd;i++)
       KALDI_ASSERT(V(i) == sub(i-lenStart));
 
     sub.SetRandn();
 
-    KALDI_ASSERT(sub.Sum() == V.Range(lenStart, lenEnd-lenStart).Sum());
+    KALDI_ASSERT(ApproxEqual(sub.Sum(), V.Range(lenStart, lenEnd-lenStart).Sum()));
 
     for (MatrixIndexT i = lenStart;i < lenEnd;i++)
       KALDI_ASSERT(V(i) == sub(i-lenStart));
diff --git a/src/nnet2/nnet-compute-test.cc b/src/nnet2/nnet-compute-test.cc
index 6f1ff5e2a9b..b49979db19f 100644
--- a/src/nnet2/nnet-compute-test.cc
+++ b/src/nnet2/nnet-compute-test.cc
@@ -44,7 +44,6 @@ void UnitTestNnetCompute() {
     return;
   CuMatrix<BaseFloat> output1(num_output_rows, output_dim);
   NnetComputation(*nnet, input, pad_input, &output1);
-
   CuMatrix<BaseFloat> output2(output1.NumRows(), output1.NumCols());
   int32 cur_input_pos = 0, cur_output_pos = 0;
 
@@ -98,11 +97,12 @@ void UnitTestNnetComputeChunked() {
 
   int32 num_output_rows = num_feats;
   CuMatrix<BaseFloat> cu_output1(num_output_rows, output_dim);
-  Matrix<BaseFloat> output2(num_output_rows, output_dim);
+  CuMatrix<BaseFloat> cu_output2(num_output_rows, output_dim);
   NnetComputation(*nnet, input, pad_input, &cu_output1);
-  NnetComputationChunked(*nnet, Matrix<BaseFloat>(input), chunk_size, 
-                         &output2);
+  NnetComputationChunked(*nnet, CuMatrix<BaseFloat>(input), chunk_size, 
+                         &cu_output2);
   Matrix<BaseFloat> output1(cu_output1);
+  Matrix<BaseFloat> output2(cu_output2);
   AssertEqual(output1, output2);
   for (int32 i = 0; i < output1.NumRows(); i++) {
     // just double-check that the frames near the end are right, in case
diff --git a/src/nnet2/nnet-compute.cc b/src/nnet2/nnet-compute.cc
index 9f2fe1ebcc8..e3544b217de 100644
--- a/src/nnet2/nnet-compute.cc
+++ b/src/nnet2/nnet-compute.cc
@@ -167,15 +167,15 @@ void NnetComputation(const Nnet &nnet,
 }
 
 void NnetComputationChunked(const Nnet &nnet,
-                     const Matrix<BaseFloat> &input,  // features
+                     const CuMatrixBase<BaseFloat> &input,  // features
                      int32 chunk_size,
-                     Matrix<BaseFloat> *output) {
+                     CuMatrixBase<BaseFloat> *output) {
   int32 num_rows,
        num_chunks = ceil((BaseFloat)input.NumRows() / chunk_size),
        dim = input.NumCols(),
        left_context = nnet.LeftContext(),
        right_context = nnet.RightContext();
-  Matrix<BaseFloat> full_input;
+  CuMatrix<BaseFloat> full_input;
   num_rows = left_context + input.NumRows() + right_context;
   full_input.Resize(num_rows, dim);
   full_input.Range(left_context, input.NumRows(),
@@ -190,7 +190,7 @@ void NnetComputationChunked(const Nnet &nnet,
     int32 index = i * chunk_size,
           offset = std::min(num_rows - chunk_size * i, 
                             left_context + chunk_size + right_context);
-    SubMatrix<BaseFloat> chunk_input(full_input, index, offset, 0, dim);
+    CuSubMatrix<BaseFloat> chunk_input(full_input, index, offset, 0, dim);
     CuMatrix<BaseFloat> cu_chunk_input(chunk_input);
 
     // Note: we have already accounted for input padding, so we pass
@@ -198,7 +198,7 @@ void NnetComputationChunked(const Nnet &nnet,
     NnetComputer nnet_computer(nnet, cu_chunk_input, false, NULL);
     nnet_computer.Propagate();
     CuMatrix<BaseFloat> cu_chunk_output(nnet_computer.GetOutput());
-    SubMatrix<BaseFloat> chunk_out(*output, i * chunk_size, 
+    CuSubMatrix<BaseFloat> chunk_out(*output, i * chunk_size, 
                            cu_chunk_output.NumRows(), 0, 
                            cu_chunk_output.NumCols());
     chunk_out.CopyFromMat(cu_chunk_output);
diff --git a/src/nnet2/nnet-compute.h b/src/nnet2/nnet-compute.h
index 875252fd260..8b2578b4bf8 100644
--- a/src/nnet2/nnet-compute.h
+++ b/src/nnet2/nnet-compute.h
@@ -56,9 +56,9 @@ void NnetComputation(const Nnet &nnet,
   input.NumRows().
 */
 void NnetComputationChunked(const Nnet &nnet,
-                     const Matrix<BaseFloat> &input,  // features
+                     const CuMatrixBase<BaseFloat> &input,  // features
                      int32 chunk_size,
-                     Matrix<BaseFloat> *output); // posteriors.
+                     CuMatrixBase<BaseFloat> *output); // posteriors.
 
 /** Does the neural net computation and backprop, given input and labels.
     Note: if pad_input==true the number of rows of input should be the
diff --git a/src/nnet2bin/nnet-am-compute.cc b/src/nnet2bin/nnet-am-compute.cc
index 2b50f7cc656..d40c288031c 100644
--- a/src/nnet2bin/nnet-am-compute.cc
+++ b/src/nnet2bin/nnet-am-compute.cc
@@ -115,15 +115,15 @@ int main(int argc, char *argv[]) {
       }
 
       Matrix<BaseFloat> output(output_frames, output_dim);
-      if (chunk_size > 0 && chunk_size < feats.NumRows()) {
-        NnetComputationChunked(nnet, feats, chunk_size, &output);
+      CuMatrix<BaseFloat> cu_feats(feats);
+      CuMatrix<BaseFloat> cu_output(output);
+      if (chunk_size > 0 && chunk_size < feats.NumRows()) {      
+        NnetComputationChunked(nnet, cu_feats, chunk_size, &cu_output);
       } else {
-        CuMatrix<BaseFloat> cu_feats(feats);
-        CuMatrix<BaseFloat> cu_output(output);
         NnetComputation(nnet, cu_feats, pad_input, &cu_output);
-        output.CopyFromMat(cu_output);
       }
-
+      cu_output.Swap(&output);
+      
       if (divide_by_priors) {
         output.MulColsVec(inv_priors); // scales each column by the corresponding element
         // of inv_priors.
diff --git a/src/nnet3/Makefile b/src/nnet3/Makefile
index 5e67211c3a7..3721843f4b3 100644
--- a/src/nnet3/Makefile
+++ b/src/nnet3/Makefile
@@ -31,7 +31,8 @@ OBJFILES = nnet-common.o nnet-compile.o nnet-component-itf.o \
   nnet-compile-looped.o decodable-simple-looped.o \
   decodable-online-looped.o convolution.o \
   nnet-convolutional-component.o attention.o \
-  nnet-attention-component.o nnet-tdnn-component.o nnet-batch-compute.o
+  nnet-attention-component.o nnet-tdnn-component.o nnet-batch-compute.o \
+  nnet-chain-training2.o nnet-chain-diagnostics2.o
 
 
 LIBNAME = kaldi-nnet3
diff --git a/src/nnet3/natural-gradient-online.cc b/src/nnet3/natural-gradient-online.cc
index a205490ee3f..f3d076d4196 100644
--- a/src/nnet3/natural-gradient-online.cc
+++ b/src/nnet3/natural-gradient-online.cc
@@ -159,6 +159,7 @@ void OnlineNaturalGradient::Init(const CuMatrixBase<BaseFloat> &X0) {
 void OnlineNaturalGradient::PreconditionDirections(
     CuMatrixBase<BaseFloat> *X_t,
     BaseFloat *scale) {
+  NVTX_RANGE(__func__);
   if (X_t->NumCols() == 1) {
     // If the dimension of the space equals one then our natural gradient update
     // with rescaling becomes a no-op, but the code wouldn't naturally handle it
@@ -327,6 +328,7 @@ void OnlineNaturalGradient::PreconditionDirectionsInternal(
     const Vector<BaseFloat> &d_t,
     CuMatrixBase<BaseFloat> *WJKL_t,
     CuMatrixBase<BaseFloat> *X_t) {
+  NVTX_RANGE(__func__);
   int32 N = X_t->NumRows(),  // Minibatch size.
       D = X_t->NumCols(),  // Dimensions of vectors we're preconditioning
       R = rank_;  // Rank of correction to unit matrix.
diff --git a/src/nnet3/nnet-am-decodable-simple.h b/src/nnet3/nnet-am-decodable-simple.h
index e83b9e4bab2..74a1e75b59a 100644
--- a/src/nnet3/nnet-am-decodable-simple.h
+++ b/src/nnet3/nnet-am-decodable-simple.h
@@ -103,6 +103,39 @@ struct NnetSimpleComputationOptions {
     ParseOptions compute_opts("computation", opts);
     compute_config.Register(&compute_opts);
   }
+
+  void CheckAndFixConfigs(int32 nnet_modulus) {
+    static bool warned_frames_per_chunk = false;
+    if (frame_subsampling_factor < 1 || frames_per_chunk < 1) {
+      KALDI_ERR << "--frame-subsampling-factor and "
+                << "--frames-per-chunk must be > 0";
+    }
+    KALDI_ASSERT(nnet_modulus > 0);
+    int32 n = Lcm(frame_subsampling_factor, nnet_modulus);
+
+    if (frames_per_chunk % n != 0) {
+      // round up to the nearest multiple of n.
+      int32 new_frames_per_chunk = n * ((frames_per_chunk + n - 1) / n);
+      if (!warned_frames_per_chunk) {
+        warned_frames_per_chunk = true;
+        if (nnet_modulus == 1) {
+          // simpler error message.
+          KALDI_LOG << "Increasing --frames-per-chunk from " << frames_per_chunk
+                    << " to " << new_frames_per_chunk
+                    << " to make it a multiple of "
+                    << "--frame-subsampling-factor="
+                    << frame_subsampling_factor;
+        } else {
+          KALDI_LOG << "Increasing --frames-per-chunk from " << frames_per_chunk
+                    << " to " << new_frames_per_chunk << " due to "
+                    << "--frame-subsampling-factor=" << frame_subsampling_factor
+                    << " and "
+                    << "nnet shift-invariance modulus = " << nnet_modulus;
+        }
+      }
+      frames_per_chunk = new_frames_per_chunk;
+    }
+  }
 };
 
 /*
diff --git a/src/nnet3/nnet-attention-component.cc b/src/nnet3/nnet-attention-component.cc
index 4dd9f606f0b..c3388d92842 100644
--- a/src/nnet3/nnet-attention-component.cc
+++ b/src/nnet3/nnet-attention-component.cc
@@ -298,6 +298,7 @@ void RestrictedAttentionComponent::Backprop(
     void *memo_in,
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("RestrictedAttentionComponent::Backprop");
   const PrecomputedIndexes *indexes =
       dynamic_cast<const PrecomputedIndexes*>(indexes_in);
   KALDI_ASSERT(indexes != NULL);
diff --git a/src/nnet3/nnet-batch-compute.cc b/src/nnet3/nnet-batch-compute.cc
index 9d71a021f05..0e07834ed3d 100644
--- a/src/nnet3/nnet-batch-compute.cc
+++ b/src/nnet3/nnet-batch-compute.cc
@@ -38,9 +38,12 @@ NnetBatchComputer::NnetBatchComputer(
     log_priors_(priors),
     num_full_minibatches_(0) {
   log_priors_.ApplyLog();
-  CheckAndFixConfigs();
-  ComputeSimpleNnetContext(nnet, &nnet_left_context_,
-                           &nnet_right_context_);
+  opts_.CheckAndFixConfigs(nnet_.Modulus());
+  KALDI_ASSERT(opts_.minibatch_size >= 1 && opts_.edge_minibatch_size >= 1 &&
+               opts_.partial_minibatch_factor < 1.0 &&
+               opts_.partial_minibatch_factor >= 0.0);
+
+  ComputeSimpleNnetContext(nnet, &nnet_left_context_, &nnet_right_context_);
   input_dim_ = nnet.InputDim("input");
   ivector_dim_ = std::max<int32>(0, nnet.InputDim("ivector"));
   output_dim_ = nnet.OutputDim("output");
@@ -340,49 +343,6 @@ void NnetBatchComputer::GetComputationRequest(
   request->outputs.push_back(IoSpecification("output", output_indexes));
 }
 
-
-
-void NnetBatchComputer::CheckAndFixConfigs() {
-  static bool warned_frames_per_chunk = false;
-  int32 nnet_modulus = nnet_.Modulus();
-  if (opts_.frame_subsampling_factor < 1 ||
-      opts_.frames_per_chunk < 1) {
-    KALDI_ERR << "--frame-subsampling-factor and "
-              << "--frames-per-chunk must be > 0";
-  }
-  KALDI_ASSERT(nnet_modulus > 0);
-  int32 n = Lcm(opts_.frame_subsampling_factor, nnet_modulus);
-
-  if (opts_.frames_per_chunk % n != 0) {
-    // round up to the nearest multiple of n.
-    int32 frames_per_chunk = n * ((opts_.frames_per_chunk + n - 1) / n);
-    if (!warned_frames_per_chunk) {
-      warned_frames_per_chunk = true;
-      if (nnet_modulus == 1) {
-        // simpler error message.
-        KALDI_LOG << "Increasing --frames-per-chunk from "
-                  << opts_.frames_per_chunk << " to "
-                  << frames_per_chunk << " to make it a multiple of "
-                  << "--frame-subsampling-factor="
-                  << opts_.frame_subsampling_factor;
-      } else {
-        KALDI_LOG << "Increasing --frames-per-chunk from "
-                  << opts_.frames_per_chunk << " to "
-                  << frames_per_chunk << " due to "
-                  << "--frame-subsampling-factor="
-                  << opts_.frame_subsampling_factor << " and "
-                  << "nnet shift-invariance modulus = " << nnet_modulus;
-      }
-    }
-    opts_.frames_per_chunk = frames_per_chunk;
-  }
-  KALDI_ASSERT(opts_.minibatch_size >= 1 &&
-               opts_.edge_minibatch_size >= 1 &&
-               opts_.partial_minibatch_factor < 1.0 &&
-               opts_.partial_minibatch_factor >= 0.0);
-}
-
-
 void NnetBatchComputer::FormatInputs(
     int32 minibatch_size,
     const std::vector<NnetInferenceTask*> &tasks,
diff --git a/src/nnet3/nnet-chain-diagnostics2.cc b/src/nnet3/nnet-chain-diagnostics2.cc
new file mode 100644
index 00000000000..795cb985fa0
--- /dev/null
+++ b/src/nnet3/nnet-chain-diagnostics2.cc
@@ -0,0 +1,317 @@
+// nnet3/nnet-chain-diagnostics.cc
+
+// Copyright      2015    Johns Hopkins University (author: Daniel Povey)
+//                2019    Idiap Research Institute (author: Srikanth Madikeri)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "nnet3/nnet-chain-diagnostics2.h"
+#include "nnet3/nnet-utils.h"
+
+namespace kaldi {
+namespace nnet3 {
+
+NnetChainComputeProb2::NnetChainComputeProb2(
+    const NnetComputeProbOptions &nnet_config,
+    const chain::ChainTrainingOptions &chain_config,
+    NnetChainModel2 &model,
+    const Nnet &nnet):
+    nnet_config_(nnet_config),
+    chain_config_(chain_config),
+    nnet_(nnet),
+    compiler_(nnet, nnet_config_.optimize_config, nnet_config_.compiler_config),
+    deriv_nnet_owned_(true),
+    deriv_nnet_(NULL),
+    model_(model),
+    num_minibatches_processed_(0) {
+  if (nnet_config_.compute_deriv) {
+    deriv_nnet_ = new Nnet(nnet_);
+    ScaleNnet(0.0, deriv_nnet_);
+    SetNnetAsGradient(deriv_nnet_); // force simple update
+  } else if (nnet_config_.store_component_stats) {
+    KALDI_ERR << "If you set store_component_stats == true and "
+              << "compute_deriv == false, use the other constructor.";
+  }
+}
+
+NnetChainComputeProb2::NnetChainComputeProb2(
+    const NnetComputeProbOptions &nnet_config,
+    const chain::ChainTrainingOptions &chain_config,
+    NnetChainModel2 &model,
+    Nnet *nnet): 
+    nnet_config_(nnet_config),
+    chain_config_(chain_config),
+    nnet_(*nnet),
+    compiler_(*nnet, nnet_config_.optimize_config, nnet_config_.compiler_config),
+    deriv_nnet_owned_(false),
+    deriv_nnet_(nnet),
+    model_(model),
+    num_minibatches_processed_(0) {
+  KALDI_ASSERT(nnet_config.store_component_stats && !nnet_config.compute_deriv);
+}
+
+const Nnet &NnetChainComputeProb2::GetDeriv() const {
+  if (!nnet_config_.compute_deriv)
+    KALDI_ERR << "GetDeriv() called when no derivatives were requested.";
+  return *deriv_nnet_;
+}
+
+NnetChainComputeProb2::~NnetChainComputeProb2() {
+  if (deriv_nnet_owned_)
+    delete deriv_nnet_;  // delete does nothing if pointer is NULL.
+}
+
+void NnetChainComputeProb2::Reset() {
+  num_minibatches_processed_ = 0;
+  objf_info_.clear();
+  if (deriv_nnet_) {
+    ScaleNnet(0.0, deriv_nnet_);
+    SetNnetAsGradient(deriv_nnet_);
+  }
+}
+
+void NnetChainComputeProb2::Compute(NnetChainExample &chain_eg) {
+    std::string lang_name = "default";
+    Compute(lang_name, chain_eg);
+}
+
+void NnetChainComputeProb2::Compute(const std::string &lang_key, NnetChainExample &chain_eg) {
+  bool need_model_derivative = nnet_config_.compute_deriv,
+      store_component_stats = nnet_config_.store_component_stats;
+  ComputationRequest request;
+  // if the options specify cross-entropy regularization, we'll be computing
+  // this objective (not interpolated with the regular objective-- we give it a
+  // separate name), but currently we won't make it contribute to the
+  // derivative-- we just compute the derivative of the regular output.
+  // This is because in the place where we use the derivative (the
+  // model-combination code) we decided to keep it simple and just use the
+  // regular objective.
+  bool use_xent_regularization = (chain_config_.xent_regularize != 0.0),
+      use_xent_derivative = false;
+  for (size_t i = 0; i < chain_eg.outputs.size(); i++) {
+    // there will normally be exactly one output , named "output"
+      if(chain_eg.outputs[i].name.compare("output")==0) {
+          chain_eg.outputs[i].name = "output-" + lang_key;
+          break;
+      }
+  }
+
+  GetChainComputationRequest(nnet_, chain_eg, need_model_derivative,
+                             store_component_stats, use_xent_regularization,
+                             use_xent_derivative, &request);
+  std::shared_ptr<const NnetComputation> computation = compiler_.Compile(request);
+  NnetComputer computer(nnet_config_.compute_config, *computation,
+                        nnet_, deriv_nnet_);
+  // give the inputs to the computer object.
+  computer.AcceptInputs(nnet_, chain_eg.inputs);
+  computer.Run();
+  this->ProcessOutputs(lang_key, chain_eg, &computer);
+  if (nnet_config_.compute_deriv)
+    computer.Run();
+}
+
+void NnetChainComputeProb2::ProcessOutputs(const std::string &lang_name, NnetChainExample &eg,                                         
+                                         NnetComputer *computer) {
+  // There will normally be just one output here, named 'output',
+  // but the code is more general than this.
+  std::vector<NnetChainSupervision>::const_iterator iter = eg.outputs.begin(),
+      end = eg.outputs.end();
+  for (; iter != end; ++iter) {
+    const NnetChainSupervision &sup = *iter;
+    int32 node_index = nnet_.GetNodeIndex(sup.name);
+    if (node_index < 0 ||
+        !nnet_.IsOutputNode(node_index))
+      KALDI_ERR << "Network has no output named " << sup.name;
+
+    const CuMatrixBase<BaseFloat> &nnet_output = computer->GetOutput(sup.name);
+    bool use_xent = (chain_config_.xent_regularize != 0.0);
+    std::string xent_name = sup.name + "-xent";  // typically "output-xent".
+    CuMatrix<BaseFloat> nnet_output_deriv, xent_deriv;
+    if (nnet_config_.compute_deriv)
+      nnet_output_deriv.Resize(nnet_output.NumRows(), nnet_output.NumCols(),
+                               kUndefined);
+    if (use_xent)
+      xent_deriv.Resize(nnet_output.NumRows(), nnet_output.NumCols(),
+                        kUndefined);
+
+    BaseFloat tot_like, tot_l2_term, tot_weight;
+
+    ComputeChainObjfAndDeriv(chain_config_, *(model_.GetDenGraphForLang(lang_name)),
+                             sup.supervision, nnet_output,
+                             &tot_like, &tot_l2_term, &tot_weight,
+                             (nnet_config_.compute_deriv ? &nnet_output_deriv :
+                              NULL), (use_xent ? &xent_deriv : NULL));
+
+    // note: in this context we don't want to apply 'sup.deriv_weights' because
+    // this code is used only in combination, where it's part of an L-BFGS
+    // optimization algorithm, and in that case if there is a mismatch between
+    // the computed objective function and the derivatives, it may cause errors
+    // in the optimization procedure such as early termination.  (line search
+    // and conjugate gradient descent both rely on the derivatives being
+    // accurate, and don't fail gracefully if the derivatives are not accurate).
+
+    ChainObjectiveInfo &totals = objf_info_[sup.name];
+    totals.tot_weight += tot_weight;
+    totals.tot_like += tot_like;
+    totals.tot_l2_term += tot_l2_term;
+
+    if (nnet_config_.compute_deriv)
+      computer->AcceptInput(sup.name, &nnet_output_deriv);
+
+    if (use_xent) {
+      ChainObjectiveInfo &xent_totals = objf_info_[xent_name];
+      // this block computes the cross-entropy objective.
+      const CuMatrixBase<BaseFloat> &xent_output = computer->GetOutput(
+          xent_name);
+      // at this point, xent_deriv is posteriors derived from the numerator
+      // computation.  note, xent_deriv has a factor of '.supervision.weight',
+      // but so does tot_weight.
+      BaseFloat xent_objf = TraceMatMat(xent_output, xent_deriv, kTrans);
+      xent_totals.tot_weight += tot_weight;
+      xent_totals.tot_like += xent_objf;
+    }
+    num_minibatches_processed_++;
+  }
+}
+
+bool NnetChainComputeProb2::PrintTotalStats() const {
+  bool ans = false;
+  unordered_map<std::string, ChainObjectiveInfo, StringHasher>::const_iterator
+      iter, end;
+  iter = objf_info_.begin();
+  end = objf_info_.end();
+  for (; iter != end; ++iter) {
+    const std::string &name = iter->first;
+    int32 node_index = nnet_.GetNodeIndex(name);
+    KALDI_ASSERT(node_index >= 0);
+    const ChainObjectiveInfo &info = iter->second;
+    BaseFloat like = (info.tot_like / info.tot_weight),
+        l2_term = (info.tot_l2_term / info.tot_weight),
+        tot_objf = like + l2_term;
+    if (info.tot_l2_term == 0.0) {
+      KALDI_LOG << "Overall log-probability for '"
+                << name << "' is "
+                << like << " per frame"
+                << ", over " << info.tot_weight << " frames.";
+    } else {
+      KALDI_LOG << "Overall log-probability for '"
+                << name << "' is "
+                << like << " + " << l2_term << " = " << tot_objf << " per frame"
+                << ", over " << info.tot_weight << " frames.";
+    }
+    if (info.tot_weight > 0)
+      ans = true;
+  }
+  return ans;
+}
+
+
+const ChainObjectiveInfo* NnetChainComputeProb2::GetObjective(
+    const std::string &output_name) const {
+  unordered_map<std::string, ChainObjectiveInfo, StringHasher>::const_iterator
+      iter = objf_info_.find(output_name);
+  if (iter != objf_info_.end())
+    return &(iter->second);
+  else
+    return NULL;
+}
+
+double NnetChainComputeProb2::GetTotalObjective(double *total_weight) const {
+  double tot_objectives = 0.0;
+  double tot_weight = 0.0;
+  unordered_map<std::string, ChainObjectiveInfo, StringHasher>::const_iterator
+    iter = objf_info_.begin(), end = objf_info_.end();
+  for (; iter != end; ++iter) {
+    tot_objectives += iter->second.tot_like + iter->second.tot_l2_term;
+    tot_weight += iter->second.tot_weight;
+  }
+
+  if (total_weight) *total_weight = tot_weight;
+  return tot_objectives;
+}
+
+static bool HasXentOutputs2(const Nnet &nnet) {
+  const std::vector<std::string> node_names = nnet.GetNodeNames();
+  for (std::vector<std::string>::const_iterator it = node_names.begin();
+        it != node_names.end(); ++it) {
+    int32 node_index = nnet.GetNodeIndex(*it);
+    if (nnet.IsOutputNode(node_index) && 
+        it->find("-xent") != std::string::npos) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void RecomputeStats2(std::vector<NnetChainExample> &egs,
+                    const chain::ChainTrainingOptions &chain_config_in,
+                    NnetChainModel2 &model,
+                    Nnet *nnet) {
+    RecomputeStats2("default", egs, chain_config_in, model, nnet);       
+}
+
+// TODO: Merge the next two functions. There is some duplication going on
+void RecomputeStats2(std::vector<std::pair<std::string, NnetChainExample> > &egs,
+                    const chain::ChainTrainingOptions &chain_config_in,
+                    NnetChainModel2 &model,
+                    Nnet *nnet) {
+  KALDI_LOG << "Recomputing stats on nnet (affects batch-norm)";
+  chain::ChainTrainingOptions chain_config(chain_config_in);
+  if (HasXentOutputs2(*nnet) &&
+      chain_config.xent_regularize == 0) {
+    chain_config.xent_regularize = 0.1;
+  }
+  ZeroComponentStats(nnet);
+  NnetComputeProbOptions nnet_config;
+  nnet_config.store_component_stats = true;
+  NnetChainComputeProb2 prob_computer(nnet_config, chain_config, model, nnet);
+  std::vector<std::pair<std::string, NnetChainExample> >::iterator iter = egs.begin(),
+                                                   end = egs.end();
+   for (; iter != end; ++iter) {
+      std::string lang_name = "default";
+      ParseFromQueryString(iter->first, "lang", &lang_name);
+      prob_computer.Compute(lang_name, (*iter).second);
+   }
+  KALDI_LOG << "Done recomputing stats.";
+}
+
+// TODO: Note this only works for lang=default for now. So we will have to generalize this later
+void RecomputeStats2(const std::string &lang_name, std::vector<NnetChainExample> &egs,
+                    const chain::ChainTrainingOptions &chain_config_in,
+                    NnetChainModel2 &model,
+                    Nnet *nnet) {
+  KALDI_LOG << "Recomputing stats on nnet (affects batch-norm)";
+  chain::ChainTrainingOptions chain_config(chain_config_in);
+  if (HasXentOutputs2(*nnet) &&
+      chain_config.xent_regularize == 0) {
+    // this forces it to compute the output for xent outputs, 
+    // usually 'output-xent', which
+    // means that we'll be computing batch-norm stats for any
+    // components in that branch that have batch-norm.
+    chain_config.xent_regularize = 0.1;
+  }
+
+  ZeroComponentStats(nnet);
+  NnetComputeProbOptions nnet_config;
+  nnet_config.store_component_stats = true;
+  NnetChainComputeProb2 prob_computer(nnet_config, chain_config, model, nnet);
+  for (size_t i = 0; i < egs.size(); i++)
+    prob_computer.Compute(lang_name, egs[i]);
+  KALDI_LOG << "Done recomputing stats.";
+}
+} // namespace nnet3
+} // namespace kaldi
+
diff --git a/src/nnet3/nnet-chain-diagnostics2.h b/src/nnet3/nnet-chain-diagnostics2.h
new file mode 100644
index 00000000000..959f3c2b4e1
--- /dev/null
+++ b/src/nnet3/nnet-chain-diagnostics2.h
@@ -0,0 +1,119 @@
+// nnet3/nnet-chain-diagnostics.h
+
+// Copyright    2015  Johns Hopkins University (author: Daniel Povey)
+//                2019    Idiap Research Institute (author: Srikanth Madikeri)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_NNET3_NNET_CHAIN_DIAGNOSTICS2_H_
+#define KALDI_NNET3_NNET_CHAIN_DIAGNOSTICS2_H_
+
+#include "nnet3/nnet-example.h"
+#include "nnet3/nnet-computation.h"
+#include "nnet3/nnet-compute.h"
+#include "nnet3/nnet-optimize.h"
+#include "nnet3/nnet-chain-example.h"
+#include "nnet3/nnet-chain-training2.h"
+#include "nnet3/nnet-diagnostics.h"
+#include "nnet3/nnet-chain-diagnostics.h"
+#include "chain/chain-training.h"
+#include "chain/chain-den-graph.h"
+
+namespace kaldi {
+namespace nnet3 {
+
+
+/** This class is for computing objective-function values in a nnet3+chain
+    setup, for diagnostics.  It also supports computing model derivatives.
+    Note: if the --xent-regularization option is nonzero, the cross-entropy
+    objective will be computed, and displayed when you call PrintTotalStats(),
+    but it will not contribute to model derivatives (there is no code to compute
+    the regularized objective function, and anyway it's not clear that we really
+    need this regularization in the combination phase).
+ */
+class NnetChainComputeProb2 {
+ public:
+  // does not store a reference to 'config' but does store one to 'nnet'.
+  NnetChainComputeProb2(const NnetComputeProbOptions &nnet_config,
+                       const chain::ChainTrainingOptions &chain_config,
+                       NnetChainModel2 &model,
+                       const Nnet &nnet);
+
+  NnetChainComputeProb2(const NnetComputeProbOptions &nnet_config,
+                       const chain::ChainTrainingOptions &chain_config,
+                       NnetChainModel2 &model,
+                       Nnet *nnet);
+
+  // Reset the likelihood stats, and the derivative stats (if computed).
+  void Reset();
+
+  // compute objective on one minibatch.
+  void Compute(NnetChainExample &chain_eg);
+  void Compute(const std::string &lang_key, NnetChainExample &chain_eg);
+
+  // Prints out the final stats, and return true if there was a nonzero count.
+  bool PrintTotalStats() const;
+
+  // returns the objective-function info for this output name (e.g. "output"),
+  // or NULL if there is no such info.
+  const ChainObjectiveInfo *GetObjective(const std::string &output_name) const;
+
+  // This function returns the total objective over all output nodes recorded here, and
+  // outputs to 'tot_weight' the total weight (typically the number of frames)
+  // corresponding to it.
+  double GetTotalObjective(double *tot_weight) const;
+
+  // if config.compute_deriv == true, returns a reference to the
+  // computed derivative.  Otherwise crashes.
+  const Nnet &GetDeriv() const;
+
+  ~NnetChainComputeProb2();
+ private:
+  void ProcessOutputs(const std::string &key, NnetChainExample &chain_eg,
+                      NnetComputer *computer);
+
+  NnetComputeProbOptions nnet_config_;
+  chain::ChainTrainingOptions chain_config_;
+  const Nnet &nnet_;
+  CachingOptimizingCompiler compiler_;
+  bool deriv_nnet_owned_;
+  Nnet *deriv_nnet_;
+  NnetChainModel2 &model_;
+  int32 num_minibatches_processed_;  // this is only for diagnostics
+
+  unordered_map<std::string, ChainObjectiveInfo, StringHasher> objf_info_;
+
+};
+
+void RecomputeStats2(const std::string &lang_name, std::vector<NnetChainExample> &egs,
+                    const chain::ChainTrainingOptions &chain_config_in,
+                    NnetChainModel2 &model,
+                    Nnet *nnet);
+
+void RecomputeStats2(std::vector<std::pair<std::string, NnetChainExample> > &egs,
+                    const chain::ChainTrainingOptions &chain_config_in,
+                    NnetChainModel2 &model,
+                    Nnet *nnet);
+
+void RecomputeStats2(std::vector<NnetChainExample> &egs,
+                    const chain::ChainTrainingOptions &chain_config_in,
+                    NnetChainModel2 &model,
+                    Nnet *nnet);
+} // namespace nnet3
+} // namespace kaldi
+
+#endif // KALDI_NNET3_NNET_CHAIN_DIAGNOSTICS2_H_
+
diff --git a/src/nnet3/nnet-chain-example.cc b/src/nnet3/nnet-chain-example.cc
index 53da15d6f4e..f0bf8f868be 100644
--- a/src/nnet3/nnet-chain-example.cc
+++ b/src/nnet3/nnet-chain-example.cc
@@ -350,6 +350,29 @@ void GetChainComputationRequest(const Nnet &nnet,
     KALDI_ERR << "No outputs in computation request.";
 }
 
+// Returns the frame subsampling factor, which is the difference between the
+// first 't' value we encounter in 'indexes', and the next 't' value that is
+// different from the first 't'.  It will typically be 3.
+// This function will crash if it could not figure it out (e.g. because
+// 'indexes' was empty or had only one element).
+static int32 GetFrameSubsamplingFactor(const std::vector<Index> &indexes) {
+
+  auto iter = indexes.begin(), end = indexes.end();
+  int32 cur_t_value;
+  if (iter != end) {
+    cur_t_value = iter->t;
+    ++iter;
+  }
+  for (; iter != end; ++iter) {
+    if (iter->t != cur_t_value) {
+      KALDI_ASSERT(iter->t > cur_t_value);
+      return iter->t - cur_t_value;
+    }
+  }
+  KALDI_ERR << "Error getting frame subsampling factor";
+  return 0;  // Shouldn't be reached, this is to avoid compiler warnings.
+}
+
 void ShiftChainExampleTimes(int32 frame_shift,
                             const std::vector<std::string> &exclude_names,
                             NnetChainExample *eg) {
@@ -377,10 +400,11 @@ void ShiftChainExampleTimes(int32 frame_shift,
       sup_end = eg->outputs.end();
   for (; sup_iter != sup_end; ++sup_iter) {
     std::vector<Index> &indexes = sup_iter->indexes;
-    KALDI_ASSERT(indexes.size() >= 2 && indexes[0].n == indexes[1].n &&
-                 indexes[0].x == indexes[1].x);
-    int32 frame_subsampling_factor = indexes[1].t - indexes[0].t;
-    KALDI_ASSERT(frame_subsampling_factor > 0);
+    int32 frame_subsampling_factor = GetFrameSubsamplingFactor(indexes);
+    /* KALDI_ASSERT(indexes.size() >= 2 && indexes[0].n == indexes[1].n && */
+    /*              indexes[0].x == indexes[1].x); */
+    /* int32 frame_subsampling_factor = indexes[1].t - indexes[0].t; */
+    /* KALDI_ASSERT(frame_subsampling_factor > 0); */
 
     // We need to shift by a multiple of frame_subsampling_factor.
     // Round to the closest multiple.
@@ -498,7 +522,15 @@ void ChainExampleMerger::WriteMinibatch(
   NnetChainExample merged_eg;
   MergeChainExamples(config_.compress, egs, &merged_eg);
   std::ostringstream key;
-  key << "merged-" << (num_egs_written_++) << "-" << minibatch_size;
+  std::string suffix = "";
+  if(config_.multilingual_eg) {
+      // pick the first output's suffix
+      std::string output_name = merged_eg.outputs[0].name;
+      const size_t pos = output_name.find('-');
+      const size_t len = output_name.length();
+      suffix = "?lang=" + output_name.substr(pos+1, len);
+  }
+  key << "merged-" << (num_egs_written_++) << "-" << minibatch_size << suffix;
   writer_->Write(key.str(), merged_eg);
 }
 
@@ -551,6 +583,52 @@ void ChainExampleMerger::Finish() {
 }
 
 
+bool ParseFromQueryString(const std::string &string,
+                          const std::string &key_name,
+                          std::string *value) {
+  size_t question_mark_location = string.find_last_of("?");
+  if (question_mark_location == std::string::npos)
+    return false;
+  std::string key_name_plus_equals = key_name + "=";
+  // the following do/while and the initialization of key_name_location is a
+  // little convoluted.  We want to find "key_name_plus_equals" but if we find
+  // it and it's not preceded by '?' or '&' then it's part of a longer key and we
+  // need to ignore it and see if there's a next one.
+  size_t key_name_location = question_mark_location;
+  do {
+    key_name_location = string.find(key_name_plus_equals,
+                                    key_name_location + 1);
+  } while (key_name_location != std::string::npos &&
+           key_name_location != question_mark_location + 1 &&
+           string[key_name_location - 1] != '&');
+
+  if (key_name_location == std::string::npos)
+    return false;
+  size_t value_location = key_name_location + key_name_plus_equals.length();
+  size_t next_ampersand = string.find_first_of("&", value_location);
+  size_t value_len;
+  if (next_ampersand == std::string::npos)
+    value_len = std::string::npos;  // will mean "rest of string"
+  else
+    value_len = next_ampersand - value_location;
+  *value = string.substr(value_location, value_len);
+  return true;
+}
+
+
+bool ParseFromQueryString(const std::string &string,
+                          const std::string &key_name,
+                          BaseFloat *value) {
+  std::string s;
+  if (!ParseFromQueryString(string, key_name, &s))
+    return false;
+  bool ans = ConvertStringToReal(s, value);
+  if (!ans)
+    KALDI_ERR << "For key " << key_name << ", expected float but found '"
+              << s << "', in string: " << string;
+  return true;
+}
+
 
 } // namespace nnet3
 } // namespace kaldi
diff --git a/src/nnet3/nnet-chain-example.h b/src/nnet3/nnet-chain-example.h
index 187bb4ef3a3..a5c894cdddd 100644
--- a/src/nnet3/nnet-chain-example.h
+++ b/src/nnet3/nnet-chain-example.h
@@ -1,6 +1,7 @@
 // nnet3/nnet-chain-example.h
 
 // Copyright      2015  Johns Hopkins University (author: Daniel Povey)
+// Copyright      2020  Idiap Research Institute (author: Srikanth Madikeri)
 
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -274,6 +275,13 @@ MapType eg_to_egs_;
 };
 
 
+bool ParseFromQueryString(const std::string &string,
+                          const std::string &key_name,
+                          std::string *value);
+
+bool ParseFromQueryString(const std::string &string,
+                          const std::string &key_name,
+                          BaseFloat *value);
 
 } // namespace nnet3
 } // namespace kaldi
diff --git a/src/nnet3/nnet-chain-training.cc b/src/nnet3/nnet-chain-training.cc
index d9562887817..e16039fd5e1 100644
--- a/src/nnet3/nnet-chain-training.cc
+++ b/src/nnet3/nnet-chain-training.cc
@@ -58,6 +58,7 @@ NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts,
 
 
 void NnetChainTrainer::Train(const NnetChainExample &chain_eg) {
+  NVTX_RANGE(__func__);
   bool need_model_derivative = true;
   const NnetTrainerOptions &nnet_config = opts_.nnet_config;
   bool use_xent_regularization = (opts_.chain_config.xent_regularize != 0.0);
@@ -95,6 +96,7 @@ void NnetChainTrainer::Train(const NnetChainExample &chain_eg) {
 
 void NnetChainTrainer::TrainInternal(const NnetChainExample &eg,
                                      const NnetComputation &computation) {
+  NVTX_RANGE(__func__);
   const NnetTrainerOptions &nnet_config = opts_.nnet_config;
   // note: because we give the 1st arg (nnet_) as a pointer to the
   // constructor of 'computer', it will use that copy of the nnet to
@@ -202,6 +204,7 @@ void NnetChainTrainer::TrainInternalBackstitch(const NnetChainExample &eg,
 void NnetChainTrainer::ProcessOutputs(bool is_backstitch_step2,
                                       const NnetChainExample &eg,
                                       NnetComputer *computer) {
+  NVTX_RANGE(__func__);
   // normally the eg will have just one output named 'output', but
   // we don't assume this.
   // In backstitch training, the output-name with the "_backstitch" suffix is
diff --git a/src/nnet3/nnet-chain-training2.cc b/src/nnet3/nnet-chain-training2.cc
new file mode 100644
index 00000000000..1441c870165
--- /dev/null
+++ b/src/nnet3/nnet-chain-training2.cc
@@ -0,0 +1,386 @@
+// nnet3/nnet-chain-training.cc
+
+// Copyright      2015    Johns Hopkins University (author: Daniel Povey)
+//                2016    Xiaohui Zhang
+//                2019    Idiap Research Institute (author: Srikanth Madikeri)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "nnet3/nnet-chain-training2.h"
+#include "nnet3/nnet-utils.h"
+
+namespace kaldi {
+namespace nnet3 {
+
+NnetChainTrainer2::NnetChainTrainer2(const NnetChainTraining2Options &opts,
+                                   const NnetChainModel2 &model, 
+                                   Nnet *nnet):
+    opts_(opts),
+    model_(model),
+    nnet_(nnet),
+    compiler_(*nnet, opts_.nnet_config.optimize_config,
+              opts_.nnet_config.compiler_config),
+    num_minibatches_processed_(0),
+    max_change_stats_(*nnet),
+    srand_seed_(RandInt(0, 100000)) {
+
+  if (opts.nnet_config.zero_component_stats)
+    ZeroComponentStats(nnet);
+  KALDI_ASSERT(opts.nnet_config.momentum >= 0.0 &&
+               opts.nnet_config.max_param_change >= 0.0 &&
+               opts.nnet_config.backstitch_training_interval > 0);
+  delta_nnet_ = nnet_->Copy();
+  ScaleNnet(0.0, delta_nnet_);
+
+  if (opts.nnet_config.read_cache != "") {
+    bool binary;
+    try {
+      Input ki(opts.nnet_config.read_cache, &binary);
+      compiler_.ReadCache(ki.Stream(), binary);
+      KALDI_LOG << "Read computation cache from " << opts.nnet_config.read_cache;
+    } catch (...) {
+      KALDI_WARN << "Could not open cached computation. "
+                    "Probably this is the first training iteration.";
+    }
+  }
+}
+
+
+void NnetChainTrainer2::Train(const std::string &key, NnetChainExample &chain_eg) {
+  bool need_model_derivative = true;
+  const NnetTrainerOptions &nnet_config = opts_.nnet_config;
+  bool use_xent_regularization = (opts_.chain_config.xent_regularize != 0.0);
+  ComputationRequest request;
+  std::string lang_name = "default";
+  ParseFromQueryString(key, "lang", &lang_name);
+  for (size_t i = 0; i < chain_eg.outputs.size(); i++) {
+    // there will normally be exactly one output , named "output"
+      if(chain_eg.outputs[i].name.compare("output")==0)
+          chain_eg.outputs[i].name = "output-" + lang_name;
+  }
+  GetChainComputationRequest(*nnet_, chain_eg, need_model_derivative,
+                             nnet_config.store_component_stats,
+                             use_xent_regularization, need_model_derivative,
+                             &request);
+  std::shared_ptr<const NnetComputation> computation = compiler_.Compile(request);
+
+  if (nnet_config.backstitch_training_scale > 0.0 && num_minibatches_processed_
+      % nnet_config.backstitch_training_interval ==
+      srand_seed_ % nnet_config.backstitch_training_interval) {
+    // backstitch training is incompatible with momentum > 0
+    KALDI_ASSERT(nnet_config.momentum == 0.0);
+    FreezeNaturalGradient(true, delta_nnet_);
+    bool is_backstitch_step1 = true;
+    srand(srand_seed_ + num_minibatches_processed_);
+    ResetGenerators(nnet_);
+    TrainInternalBackstitch(key, chain_eg, *computation, is_backstitch_step1);
+    FreezeNaturalGradient(false, delta_nnet_); // un-freeze natural gradient
+    is_backstitch_step1 = false;
+    srand(srand_seed_ + num_minibatches_processed_);
+    ResetGenerators(nnet_);
+    TrainInternalBackstitch(key, chain_eg, *computation, is_backstitch_step1);
+  } else { // conventional training
+    TrainInternal(key, chain_eg, *computation, lang_name);
+  }
+  if (num_minibatches_processed_ == 0) {
+    ConsolidateMemory(nnet_);
+    ConsolidateMemory(delta_nnet_);
+  }
+  num_minibatches_processed_++;
+}
+
+void NnetChainTrainer2::TrainInternal(const std::string &key,
+                                     const NnetChainExample &eg,
+                                     const NnetComputation &computation,
+                                     const std::string &lang_name) {
+  const NnetTrainerOptions &nnet_config = opts_.nnet_config;
+  // note: because we give the 1st arg (nnet_) as a pointer to the
+  // constructor of 'computer', it will use that copy of the nnet to
+  // store stats.
+  NnetComputer computer(nnet_config.compute_config, computation,
+                        nnet_, delta_nnet_);
+
+  // give the inputs to the computer object.
+  computer.AcceptInputs(*nnet_, eg.inputs);
+  computer.Run();
+
+  this->ProcessOutputs(false, lang_name, eg, &computer);
+  computer.Run();
+
+  // If relevant, add in the part of the gradient that comes from
+  // parameter-level L2 regularization.
+  ApplyL2Regularization(*nnet_,
+                        GetNumNvalues(eg.inputs, false) *
+                        nnet_config.l2_regularize_factor,
+                        delta_nnet_);
+
+  // Updates the parameters of nnet
+  bool success = UpdateNnetWithMaxChange(
+      *delta_nnet_,
+      nnet_config.max_param_change,
+      1.0, 1.0 - nnet_config.momentum, nnet_,
+      &max_change_stats_);
+
+  // Scale down the batchnorm stats (keeps them fresh... this affects what
+  // happens when we use the model with batchnorm test-mode set).
+  ScaleBatchnormStats(nnet_config.batchnorm_stats_scale, nnet_);
+
+  // The following will only do something if we have a LinearComponent
+  // or AffineComponent with orthonormal-constraint set to a nonzero value.
+  ConstrainOrthonormal(nnet_);
+
+  // Scale delta_nnet
+  if (success)
+    ScaleNnet(nnet_config.momentum, delta_nnet_);
+  else
+    ScaleNnet(0.0, delta_nnet_);
+}
+
+void NnetChainTrainer2::TrainInternalBackstitch(const std::string key, const NnetChainExample &eg,
+                                               const NnetComputation &computation,
+                                               bool is_backstitch_step1) {
+  const NnetTrainerOptions &nnet_config = opts_.nnet_config;
+  // note: because we give the 1st arg (nnet_) as a pointer to the
+  // constructor of 'computer', it will use that copy of the nnet to
+  // store stats.
+  NnetComputer computer(nnet_config.compute_config, computation,
+                        nnet_, delta_nnet_);
+  // give the inputs to the computer object.
+  computer.AcceptInputs(*nnet_, eg.inputs);
+  computer.Run();
+
+  bool is_backstitch_step2 = !is_backstitch_step1;
+  this->ProcessOutputs(is_backstitch_step2, key, eg, &computer);
+  computer.Run();
+
+  BaseFloat max_change_scale, scale_adding;
+  if (is_backstitch_step1) {
+    // max-change is scaled by backstitch_training_scale;
+    // delta_nnet is scaled by -backstitch_training_scale when added to nnet;
+    max_change_scale = nnet_config.backstitch_training_scale;
+    scale_adding = -nnet_config.backstitch_training_scale;
+  } else {
+    // max-change is scaled by 1 + backstitch_training_scale;
+    // delta_nnet is scaled by 1 + backstitch_training_scale when added to nnet;
+    max_change_scale = 1.0 + nnet_config.backstitch_training_scale;
+    scale_adding = 1.0 + nnet_config.backstitch_training_scale;
+    // If relevant, add in the part of the gradient that comes from L2
+    // regularization.  It may not be optimally inefficient to do it on both
+    // passes of the backstitch, like we do here, but it probably minimizes
+    // any harmful interactions with the max-change.
+    ApplyL2Regularization(*nnet_,
+        1.0 / scale_adding * GetNumNvalues(eg.inputs, false) *
+        nnet_config.l2_regularize_factor, delta_nnet_);
+  }
+
+  // Updates the parameters of nnet
+  UpdateNnetWithMaxChange(
+      *delta_nnet_, nnet_config.max_param_change,
+      max_change_scale, scale_adding, nnet_,
+      &max_change_stats_);
+
+  if (is_backstitch_step1) {
+    // The following will only do something if we have a LinearComponent or
+    // AffineComponent with orthonormal-constraint set to a nonzero value. We
+    // choose to do this only on the 1st backstitch step, for efficiency.
+    ConstrainOrthonormal(nnet_);
+  }
+
+  if (!is_backstitch_step1) {
+    // Scale down the batchnorm stats (keeps them fresh... this affects what
+    // happens when we use the model with batchnorm test-mode set).  Do this
+    // after backstitch step 2 so that the stats are scaled down before we start
+    // the next minibatch.
+    ScaleBatchnormStats(nnet_config.batchnorm_stats_scale, nnet_);
+  }
+
+  ScaleNnet(0.0, delta_nnet_);
+}
+
+void NnetChainTrainer2::ProcessOutputs(bool is_backstitch_step2,
+                                      const std::string &lang_name,
+                                      const NnetChainExample &eg,
+                                      NnetComputer *computer) {
+  // normally the eg will have just one output named 'output', but
+  // we don't assume this.
+  // In backstitch training, the output-name with the "_backstitch" suffix is
+  // the one computed after the first, backward step of backstitch.
+  const std::string suffix = (is_backstitch_step2 ? "_backstitch" : "");
+  std::vector<NnetChainSupervision>::const_iterator iter = eg.outputs.begin(),
+      end = eg.outputs.end();
+  for (; iter != end; ++iter) {
+    const NnetChainSupervision &sup = *iter;
+    std::string node_name = "output-" + lang_name;
+    /* sup.name = node_name; */
+    int32 node_index = nnet_->GetNodeIndex(node_name);
+    if (node_index < 0 ||
+        !nnet_->IsOutputNode(node_index))
+      KALDI_ERR << "Network has no output named " << node_name;
+
+    const CuMatrixBase<BaseFloat> &nnet_output = computer->GetOutput(node_name);
+    CuMatrix<BaseFloat> nnet_output_deriv(nnet_output.NumRows(),
+                                          nnet_output.NumCols(),
+                                          kUndefined);
+
+    bool use_xent = (opts_.chain_config.xent_regularize != 0.0);
+    std::string xent_name = node_name + "-xent";  // "output-${lang_name}-xent".
+    CuMatrix<BaseFloat> xent_deriv;
+
+    BaseFloat tot_objf, tot_l2_term, tot_weight;
+
+    ComputeChainObjfAndDeriv(opts_.chain_config, *(model_.GetDenGraphForLang(lang_name)),
+                             sup.supervision, nnet_output,
+                             &tot_objf, &tot_l2_term, &tot_weight,
+                             &nnet_output_deriv,
+                             (use_xent ? &xent_deriv : NULL));
+
+    if (use_xent) {
+      // this block computes the cross-entropy objective.
+      const CuMatrixBase<BaseFloat> &xent_output = computer->GetOutput(
+          xent_name);
+      // at this point, xent_deriv is posteriors derived from the numerator
+      // computation.  note, xent_objf has a factor of '.supervision.weight'
+      BaseFloat xent_objf = TraceMatMat(xent_output, xent_deriv, kTrans);
+      objf_info_[xent_name + suffix].UpdateStats(xent_name + suffix,
+                                        opts_.nnet_config.print_interval,
+                                        num_minibatches_processed_,
+                                        tot_weight, xent_objf);
+    }
+
+    if (opts_.apply_deriv_weights && sup.deriv_weights.Dim() != 0) {
+      CuVector<BaseFloat> cu_deriv_weights(sup.deriv_weights);
+      nnet_output_deriv.MulRowsVec(cu_deriv_weights);
+      if (use_xent)
+        xent_deriv.MulRowsVec(cu_deriv_weights);
+    }
+
+    /* computer->AcceptInput(sup.name, &nnet_output_deriv); */
+    computer->AcceptInput(node_name, &nnet_output_deriv);
+
+    /* objf_info_[sup.name + suffix].UpdateStats(sup.name + suffix, */
+    objf_info_[node_name + suffix].UpdateStats(sup.name + suffix,
+                                     opts_.nnet_config.print_interval,
+                                     num_minibatches_processed_,
+                                     tot_weight, tot_objf, tot_l2_term);
+
+    if (use_xent) {
+      xent_deriv.Scale(opts_.chain_config.xent_regularize);
+      computer->AcceptInput(xent_name, &xent_deriv);
+    }
+  }
+}
+
+bool NnetChainTrainer2::PrintTotalStats() const {
+  unordered_map<std::string, ObjectiveFunctionInfo, StringHasher>::const_iterator
+      iter = objf_info_.begin(),
+      end = objf_info_.end();
+  bool ans = false;
+  for (; iter != end; ++iter) {
+    const std::string &name = iter->first;
+    const ObjectiveFunctionInfo &info = iter->second;
+    ans = info.PrintTotalStats(name) || ans;
+  }
+  max_change_stats_.Print(*nnet_);
+  return ans;
+}
+
+NnetChainTrainer2::~NnetChainTrainer2() {
+  if (opts_.nnet_config.write_cache != "") {
+    Output ko(opts_.nnet_config.write_cache, opts_.nnet_config.binary_write_cache);
+    compiler_.WriteCache(ko.Stream(), opts_.nnet_config.binary_write_cache);
+    KALDI_LOG << "Wrote computation cache to " << opts_.nnet_config.write_cache;
+  }
+  delete delta_nnet_;
+}
+
+NnetChainModel2::NnetChainModel2(
+    const NnetChainTraining2Options &opts,
+    Nnet *nnet,
+    const std::string &den_fst_dir
+    ):
+    opts_(opts),
+    nnet(nnet),
+    den_fst_dir_(den_fst_dir) {
+}
+
+NnetChainModel2::~NnetChainModel2() {
+}
+
+NnetChainModel2::LanguageInfo::LanguageInfo(
+    const NnetChainModel2::LanguageInfo &other):
+    name(other.name),
+    den_graph(other.den_graph)
+     { }
+
+
+NnetChainModel2::LanguageInfo::LanguageInfo(
+    const std::string &name,
+    const fst::StdVectorFst &den_fst, 
+    int32 num_pdfs):
+    name(name),
+    den_graph(den_fst, num_pdfs){
+}
+
+void NnetChainModel2::GetPathname(const std::string &dir,
+                                   const std::string &name,
+                                   const std::string &suffix,
+                                   std::string *pathname) {
+  std::ostringstream str;
+  str << dir << '/' << name << '.' << suffix;
+  *pathname = str.str();
+}
+
+void NnetChainModel2::GetPathname(const std::string &dir,
+                                   const std::string &name,
+                                   int32 job_id,
+                                   const std::string &suffix,
+                                   std::string *pathname) {
+  std::ostringstream str;
+  str << dir << '/' << name << '.' << job_id << '.' << suffix;
+  *pathname = str.str();
+}
+
+NnetChainModel2::LanguageInfo *NnetChainModel2::GetInfoForLang(
+    const std::string &lang) {
+  auto iter = lang_info_.find(lang);
+  if (iter != lang_info_.end()) {
+    return iter->second;
+  } else {
+    std::string den_fst_filename;
+    GetPathname(den_fst_dir_, lang, "den.fst", &den_fst_filename);
+    fst::StdVectorFst den_fst;
+    ReadFstKaldi(den_fst_filename, &den_fst);
+    std::string outputname = "output-" + lang;
+
+    LanguageInfo *info = new LanguageInfo(lang, den_fst, nnet->OutputDim(outputname));
+    lang_info_[lang] = info;
+    return info;
+  }
+}
+
+/* fst::StdVectorFst* NnetChainModel2::GetDenFstForLang( */
+/*        const std::string &language_name) { */
+/*   LanguageInfo *info = GetInfoForLang(language_name); */
+/*   return &(info->den_fst); */
+/* } */
+
+chain::DenominatorGraph *NnetChainModel2::GetDenGraphForLang(const std::string &language_name){
+  LanguageInfo *info = GetInfoForLang(language_name);
+  return &(info->den_graph);
+}
+} // namespace nnet3
+} // namespace kaldi
+
diff --git a/src/nnet3/nnet-chain-training2.h b/src/nnet3/nnet-chain-training2.h
new file mode 100644
index 00000000000..185fef88979
--- /dev/null
+++ b/src/nnet3/nnet-chain-training2.h
@@ -0,0 +1,192 @@
+// nnet3/nnet-chain-training.h
+
+// Copyright    2015  Johns Hopkins University (author: Daniel Povey)
+//                2019    Idiap Research Institute (author: Srikanth Madikeri)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_NNET3_NNET_CHAIN_TRAINING2_H_
+#define KALDI_NNET3_NNET_CHAIN_TRAINING2_H_
+
+#include "nnet3/nnet-example.h"
+#include "nnet3/nnet-computation.h"
+#include "nnet3/nnet-compute.h"
+#include "nnet3/nnet-optimize.h"
+#include "nnet3/nnet-chain-example.h"
+#include "nnet3/nnet-training.h"
+#include "nnet3/nnet-chain-training.h"
+#include "chain/chain-training.h"
+#include "chain/chain-den-graph.h"
+#include "nnet3/nnet-chain-example.h"
+
+namespace kaldi {
+namespace nnet3 {
+
+struct NnetChainTraining2Options {
+  NnetTrainerOptions nnet_config;
+  chain::ChainTrainingOptions chain_config;
+  bool apply_deriv_weights;
+  NnetChainTraining2Options(): apply_deriv_weights(true) { }
+
+  void Register(OptionsItf *opts) {
+    nnet_config.Register(opts);
+    chain_config.Register(opts);
+    opts->Register("apply-deriv-weights", &apply_deriv_weights,
+                   "If true, apply the per-frame derivative weights stored with "
+                   "the example");
+  }
+};
+
+class NnetChainModel2 {
+ public:
+  /**
+     Constructor to which you pass the model directory and the den-fst
+     directory.  There is no requirement that all these directories be distinct.
+     
+     For each language called "lang" the following files should exist:
+       <den_fst_dir>/lang.den.fst <den_fst_dir>/lang.normalization.fst 
+
+     In practice, the language name will be either "default", in the
+     typical (monolingual) setup, or it might be arbitrary strings
+     representing languages such as "english", "french", and so on.
+     In general the language can be any string containing ASCII letters, numbers
+     or underscores.
+
+     The models and denominator FSTs will only be read when they are actually
+     required, so languages that are not used by a particular job (e.g. because
+     they were not represented in the egs) will not actually be read.
+
+      **/
+
+  NnetChainModel2(const NnetChainTraining2Options &opts,
+                   Nnet *nnet,
+                   const std::string &den_fst_dir);
+  
+  /* fst::StdVectorFst *GetDenFstForLang(const std::string &language_name); */
+  chain::DenominatorGraph *GetDenGraphForLang(const std::string &language_name);
+
+  ~NnetChainModel2();
+
+ private:
+  // This function sets "pathname" to the string:
+  // <dir>/<name>.<suffix>
+  void GetPathname(const std::string &dir,
+                   const std::string &name,
+                   const std::string &suffix,
+                   std::string *pathname);
+
+  // If job_id is >= 0, then this version of GetPathname() sets "pathname" to
+  // the string:
+  // <dir>/<name>.<job_id>.<suffix>
+  // otherwise (job_id < 0) it sets it to
+  // <dir>/<name>.<suffix>
+  void GetPathname(const std::string &dir,
+                   const std::string &name,
+                   int32 job_id,
+                   const std::string &suffix,
+                   std::string *pathname);
+  
+  // struct LanguageInfo contains the data that is stored per language.
+  struct LanguageInfo {
+    // name of the language
+    std::string name;
+    // den_fst comes from <den_fst_dir>/<language_name>.den.fst
+   // fst::StdVectorFst den_fst;
+    chain::DenominatorGraph den_graph;
+
+    // transform comes from <transform_dir>/<language_name>.ada
+    LanguageInfo() { }
+
+    LanguageInfo(const std::string &name, const fst::StdVectorFst &den_fst, int32 num_pdfs);
+    // Copy constructor
+    LanguageInfo(const LanguageInfo &other);
+  };
+
+  // get the LanguageInfo* for this language, creating it (and reading its
+  // contents from disk) if it does not already exist.
+  LanguageInfo *GetInfoForLang(const std::string &lang);
+
+  const NnetChainTraining2Options &opts_;
+  Nnet *nnet;
+  // Directory where denominator FSTs are located.
+  std::string den_fst_dir_;
+
+  std::unordered_map<std::string, LanguageInfo*, StringHasher> lang_info_;
+}; // class  NnetChainModel2
+
+
+/**
+   This class is for single-threaded training of neural nets using the 'chain'
+   model.
+*/
+class NnetChainTrainer2 {
+ public:
+  NnetChainTrainer2(const NnetChainTraining2Options &config,
+                    const NnetChainModel2 &model,
+                    Nnet *nnet);
+
+  // train on one minibatch.
+  void Train(const std::string &key, NnetChainExample &eg);
+
+  // Prints out the final stats, and return true if there was a nonzero count.
+  bool PrintTotalStats() const;
+
+  ~NnetChainTrainer2();
+ private:
+  // The internal function for doing one step of conventional SGD training.
+  void TrainInternal(const std::string &key, const NnetChainExample &eg,
+                     const NnetComputation &computation, const std::string &lang_name);
+
+  // The internal function for doing one step of backstitch training. Depending
+  // on whether is_backstitch_step1 is true, It could be either the first
+  // (backward) step, or the second (forward) step of backstitch.
+  void TrainInternalBackstitch(const std::string key, const NnetChainExample &eg,
+                               const NnetComputation &computation,
+                               bool is_backstitch_step1);
+
+  void ProcessOutputs(bool is_backstitch_step2, const std::string &key, const NnetChainExample &eg,
+                      NnetComputer *computer);
+
+  const NnetChainTraining2Options opts_;
+
+  NnetChainModel2 model_;
+  Nnet *nnet_;
+  Nnet *delta_nnet_;  // stores the change to the parameters on each training
+                      // iteration.
+  CachingOptimizingCompiler compiler_;
+
+  // This code supports multiple output layers, even though in the
+  // normal case there will be just one output layer named "output".
+  // So we store the objective functions per output layer.
+  int32 num_minibatches_processed_;
+
+  // stats for max-change.
+  MaxChangeStats max_change_stats_;
+
+  unordered_map<std::string, ObjectiveFunctionInfo, StringHasher> objf_info_;
+
+  // This value is used in backstitch training when we need to ensure
+  // consistent dropout masks.  It's set to a value derived from rand()
+  // when the class is initialized.
+  int32 srand_seed_;
+};
+
+
+}// namespace nnet3
+} // namespace kaldi
+
+#endif // KALDI_NNET3_NNET_CHAIN_TRAINING2_H_
+
diff --git a/src/nnet3/nnet-combined-component.cc b/src/nnet3/nnet-combined-component.cc
index 0a2fb3f5a91..75fb70f3bea 100644
--- a/src/nnet3/nnet-combined-component.cc
+++ b/src/nnet3/nnet-combined-component.cc
@@ -448,6 +448,7 @@ void ConvolutionComponent::Backprop(const std::string &debug_info,
                                     void *memo,
                                     Component *to_update_in,
                                     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("ConvolutionComponent::Backprop");
   ConvolutionComponent *to_update =
       dynamic_cast<ConvolutionComponent*>(to_update_in);
   const int32 num_x_steps = (1 + (input_x_dim_ - filt_x_dim_) / filt_x_step_),
@@ -879,6 +880,7 @@ void MaxpoolingComponent::Backprop(const std::string &debug_info,
                                    void *memo,
                                    Component *, // to_update,
                                    CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("MaxpoolingComponent::Backprop");
   if (!in_deriv)
     return;
 
@@ -1184,6 +1186,7 @@ void LstmNonlinearityComponent::Backprop(
     void *memo,
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("LstmNonlinearityComponent::Backprop");
 
   if (to_update_in == NULL) {
     cu::BackpropLstmNonlinearity(in_value, params_, out_deriv,
@@ -1488,6 +1491,7 @@ void GruNonlinearityComponent::Backprop(
     void *memo,
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("GruNonlinearityComponent::Backprop");
   KALDI_ASSERT(SameDim(out_value, out_deriv) &&
                in_value.NumRows() == out_value.NumRows() &&
                in_value.NumCols() == InputDim() &&
@@ -1989,6 +1993,7 @@ void OutputGruNonlinearityComponent::Backprop(
     void *memo,
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("OutputGruNonlinearityComponent::Backprop");
   KALDI_ASSERT(SameDim(out_value, out_deriv) &&
                in_value.NumRows() == out_value.NumRows() &&
                in_value.NumCols() == InputDim() &&
diff --git a/src/nnet3/nnet-computation-graph.cc b/src/nnet3/nnet-computation-graph.cc
index ea30b004092..cf99bbcd82a 100644
--- a/src/nnet3/nnet-computation-graph.cc
+++ b/src/nnet3/nnet-computation-graph.cc
@@ -857,7 +857,6 @@ void ComputationGraphBuilder::UpdateComputableInfo(int32 cindex_id) {
 
 
 void ComputationGraphBuilder::IncrementUsableCount(int32 cindex_id) {
-  KALDI_PARANOID_ASSERT(static_cast<size_t>(cindex_id)<usable_count_.size());
   CindexInfo &info = cindex_info_[cindex_id];
   if (info.usable_count++ == 0 &&
       info.computable != kNotComputable) {
@@ -880,7 +879,6 @@ void ComputationGraphBuilder::IncrementUsableCount(int32 cindex_id) {
 
 
 void ComputationGraphBuilder::DecrementUsableCount(int32 cindex_id) {
-  KALDI_PARANOID_ASSERT(static_cast<size_t>(cindex_id)<usable_count_.size());
   KALDI_PARANOID_ASSERT(cindex_info_[cindex_id].usable_count > 0);
   if (--cindex_info_[cindex_id].usable_count == 0 &&
       cindex_info_[cindex_id].computable != kNotComputable) {
diff --git a/src/nnet3/nnet-compute.cc b/src/nnet3/nnet-compute.cc
index b5052c71759..1ee359ce146 100644
--- a/src/nnet3/nnet-compute.cc
+++ b/src/nnet3/nnet-compute.cc
@@ -235,6 +235,7 @@ void NnetComputer::ExecuteCommand() {
         break;
       }
       case kPropagate: {
+        NVTX_RANGE("NnetComputer::ExecuteCommand::kPropagate");
         const Component *component = nnet_.GetComponent(c.arg1);
         ComponentPrecomputedIndexes *indexes =
             computation_.component_precomputed_indexes[c.arg2].data;
@@ -256,6 +257,7 @@ void NnetComputer::ExecuteCommand() {
       }
       case kBackprop:
       case kBackpropNoModelUpdate:  {
+        NVTX_RANGE("NnetComputer::ExecuteCommand::kBackpropNoModelUpdate");
         std::ostringstream debug_str;
         KALDI_ASSERT(nnet_to_update_ != NULL);
         debug_str << nnet_.GetComponentName(c.arg1);
@@ -508,6 +510,7 @@ void NnetComputer::GetPointers(int32 indexes_multi_index,
 }
 
 void NnetComputer::Run() {
+  NVTX_RANGE(__func__);
   const std::vector<NnetComputation::Command> &c = computation_.commands;
   int32 num_commands = c.size();
 
diff --git a/src/nnet3/nnet-convolutional-component.cc b/src/nnet3/nnet-convolutional-component.cc
index 7a1617f261a..6466a16e48d 100644
--- a/src/nnet3/nnet-convolutional-component.cc
+++ b/src/nnet3/nnet-convolutional-component.cc
@@ -307,6 +307,7 @@ void TimeHeightConvolutionComponent::Backprop(
     void*, // memo
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("TimeHeightConvolutionComponent::Backprop");
   const PrecomputedIndexes *indexes =
       dynamic_cast<const PrecomputedIndexes*>(indexes_in);
   KALDI_ASSERT(indexes != NULL);
diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc
index 15004092eaa..6b917483bc2 100644
--- a/src/nnet3/nnet-example-utils.cc
+++ b/src/nnet3/nnet-example-utils.cc
@@ -81,7 +81,13 @@ static void GetIoSizes(const std::vector<NnetExample> &src,
 }
 
 
-
+static int32 FindMaxNValue(const NnetIo &io) {
+  int32 max_n = 0;
+  for (auto &index: io.indexes)
+    if (index.n > max_n)
+      max_n = index.n;
+  return max_n;
+}
 
 // Do the final merging of NnetIo, once we have obtained the names, dims and
 // sizes for each feature/supervision type.
@@ -98,6 +104,9 @@ static void MergeIo(const std::vector<NnetExample> &src,
   // The features in the different NnetIo in the Indexes across all examples
   std::vector<std::vector<GeneralMatrix const*> > output_lists(num_feats);
 
+  // This is 1 for single examples and larger than 1 for already-merged egs, and
+  // it must be the same for all io's across all examples:
+  int32 example_stride = FindMaxNValue(src[0].io[0]) + 1;
   // Initialize the merged_eg
   merged_eg->io.clear();
   merged_eg->io.resize(num_feats);
@@ -139,9 +148,11 @@ static void MergeIo(const std::vector<NnetExample> &src,
       for (int32 i = this_offset; i < this_offset + this_size; i++) {
         // we could easily support merging already-merged egs, but I don't see a
         // need for it right now.
-        KALDI_ASSERT(output_iter[i].n == 0 &&
-                     "Merging already-merged egs?  Not currentlysupported.");
-        output_iter[i].n = n;
+        /* KALDI_ASSERT(output_iter[i].n == 0 && */
+        /*              "Merging already-merged egs?  Not currentlysupported."); */
+        KALDI_ASSERT(output_iter[i].n < example_stride);
+        output_iter[i].n += n * example_stride;
+        //output_iter[i].n = n;
       }
       this_offset += this_size;  // note: this_offset is a reference.
     }
@@ -354,10 +365,15 @@ UtteranceSplitter::UtteranceSplitter(const ExampleGenerationConfig &config):
 }
 
 UtteranceSplitter::~UtteranceSplitter() {
+  /* KALDI_LOG << "Split " << total_num_utterances_ << " utts, with " */
+  /*           << "total length " << total_input_frames_ << " frames (" */
+  /*           << (total_input_frames_ / 360000.0) << " hours assuming " */
+  /*           << "100 frames per second)"; */
   KALDI_LOG << "Split " << total_num_utterances_ << " utts, with "
             << "total length " << total_input_frames_ << " frames ("
             << (total_input_frames_ / 360000.0) << " hours assuming "
-            << "100 frames per second)";
+            << "100 frames per second) into " << total_num_chunks_
+            << " chunks.";
   float average_chunk_length = total_frames_in_chunks_ * 1.0 / total_num_chunks_,
       overlap_percent = total_frames_overlap_ * 100.0 / total_input_frames_,
       output_percent = total_frames_in_chunks_ * 100.0 / total_input_frames_,
diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h
index 52b2ebbf904..858cbabb168 100644
--- a/src/nnet3/nnet-example-utils.h
+++ b/src/nnet3/nnet-example-utils.h
@@ -1,6 +1,7 @@
 // nnet3/nnet-example-utils.h
 
 // Copyright    2015  Johns Hopkins University (author: Daniel Povey)
+// Copyright    2020  Idiap Research Institute (author: Srikanth Madikeri)
 
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -325,12 +326,15 @@ class ExampleMergingConfig {
   std::string measure_output_frames;  // for back-compatibility, not used.
   std::string minibatch_size;
   std::string discard_partial_minibatches;   // for back-compatibility, not used.
+  bool multilingual_eg; // add language information as a Query (e.g. ?lang=query) to the merged egs's name
 
   ExampleMergingConfig(const char *default_minibatch_size = "256"):
       compress(false),
       measure_output_frames("deprecated"),
       minibatch_size(default_minibatch_size),
-      discard_partial_minibatches("deprecated") { }
+      discard_partial_minibatches("deprecated"),
+      multilingual_eg(false)
+      { }
 
   void Register(OptionsItf *po) {
     po->Register("compress", &compress, "If true, compress the output examples "
@@ -354,6 +358,10 @@ class ExampleMergingConfig {
                  "--minibatch-size=128=64:128,256/256=32:64,128.  Egs are given "
                  "minibatch-sizes based on the specified eg-size closest to "
                  "their actual size.");
+    po->Register("multilingual-eg", &multilingual_eg,
+                "Appends language name to the merged egs. Used only by chain2 recipes for now."
+                "For example, when merging examples with output-langName we would want to add "
+                "?lang=langName");
   }
 
 
diff --git a/src/nnet3/nnet-general-component.cc b/src/nnet3/nnet-general-component.cc
index 782900ca7a8..bdc34d59b65 100644
--- a/src/nnet3/nnet-general-component.cc
+++ b/src/nnet3/nnet-general-component.cc
@@ -199,6 +199,7 @@ void DistributeComponent::Backprop(const std::string &debug_info,
                                    void *memo,
                                    Component *, // to_update,
                                    CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("DistributeComponent::Backprop");
   if (in_deriv == NULL) return;
 
   int32 num_blocks = input_dim_ / output_dim_,
@@ -482,6 +483,7 @@ void StatisticsExtractionComponent::Backprop(
     void *memo,
     Component *, // to_update,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("StatisticsExtractionComponent::Backprop");
   KALDI_ASSERT(indexes_in != NULL);
   const StatisticsExtractionComponentPrecomputedIndexes *indexes =
       dynamic_cast<const StatisticsExtractionComponentPrecomputedIndexes*>(indexes_in);
@@ -573,7 +575,7 @@ void StatisticsPoolingComponent::InitFromConfig(ConfigLine *cfl) {
 StatisticsPoolingComponent::StatisticsPoolingComponent():
     input_dim_(-1), input_period_(1), left_context_(-1), right_context_(-1),
     num_log_count_features_(0), output_stddevs_(false),
-    variance_floor_(1.0e-10) { }
+    variance_floor_(1.0e-10), require_direct_input_(false) { }
 
 
 StatisticsPoolingComponent::StatisticsPoolingComponent(
@@ -582,7 +584,8 @@ StatisticsPoolingComponent::StatisticsPoolingComponent(
     left_context_(other.left_context_), right_context_(other.right_context_),
     num_log_count_features_(other.num_log_count_features_),
     output_stddevs_(other.output_stddevs_),
-    variance_floor_(1.0e-10) {
+    variance_floor_(other.variance_floor_),
+    require_direct_input_(other.require_direct_input_) {
   Check();
 }
 
@@ -614,6 +617,9 @@ void StatisticsPoolingComponent::Read(std::istream &is, bool binary) {
   ExpectToken(is, binary, "<VarianceFloor>");
   ReadBasicType(is, binary, &variance_floor_);
   ExpectToken(is, binary, "</StatisticsPoolingComponent>");
+  require_direct_input_ = false;  // This is not written to disk, it's only used
+                                  // temporarily, in memory (see
+                                  // nnet3-xvector-compute-batched.cc).
   Check();
 }
 
@@ -826,6 +832,7 @@ void StatisticsPoolingComponent::Backprop(
     void *memo,
     Component *, // to_update,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("StatisticsPoolingComponent::Backprop");
   KALDI_ASSERT(indexes_in != NULL);
   const StatisticsPoolingComponentPrecomputedIndexes *indexes =
       dynamic_cast<const StatisticsPoolingComponentPrecomputedIndexes*>(
@@ -1104,6 +1111,7 @@ void BackpropTruncationComponent::Backprop(const std::string &debug_info,
                              Component *to_update_in, // may be NULL; may be
                              // identical to "this" or different.
                              CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("BackpropTruncationComponent::Backprop");
   const BackpropTruncationComponentPrecomputedIndexes *indexes =
       dynamic_cast<const BackpropTruncationComponentPrecomputedIndexes*>(
           indexes_in);
@@ -1239,6 +1247,7 @@ void ConstantComponent::Backprop(
     void *memo,
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("ConstantComponent::Backprop");
   // we don't update in_deriv, since we set the flag
   // kBackpropAdds, and the output doesn't depend on the
   // input, so the input-derivative is zero.
@@ -1593,6 +1602,7 @@ void GeneralDropoutComponent::Backprop(
     void *memo,
     Component *to_update,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("GeneralDropoutComponent::Backprop");
   KALDI_ASSERT(in_deriv != NULL && SameDim(*in_deriv, out_deriv));
 
   // The following will do no work if in_deriv->Data() == out_deriv.Data().
@@ -1927,6 +1937,7 @@ void SpecAugmentTimeMaskComponent::Backprop(
     void *memo,
     Component *to_update,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("SpecAugmentTimeMaskComponent::Backprop");
   KALDI_ASSERT(in_deriv != NULL && SameDim(*in_deriv, out_deriv));
 
   // The following will do no work if in_deriv->Data() == out_deriv.Data().
diff --git a/src/nnet3/nnet-general-component.h b/src/nnet3/nnet-general-component.h
index 865df5ee865..9af5c87c7f3 100644
--- a/src/nnet3/nnet-general-component.h
+++ b/src/nnet3/nnet-general-component.h
@@ -331,7 +331,8 @@ class StatisticsExtractionComponentPrecomputedIndexes:
  or whatever, instead of just component-name, because its output is only defined at multiples
  of its input-period.
 
- The output of StatisticsPoolingComponent will only be defined if at least one input was defined.
+ The output of StatisticsPoolingComponent will only be defined if at least one
+ input was defined.
  */
 class StatisticsPoolingComponent: public Component {
  public:
@@ -396,6 +397,11 @@ class StatisticsPoolingComponent: public Component {
       const std::vector<Index> &output_indexes,
       bool need_backprop) const;
 
+  // Used in computing the 'real' context of networks involving this component;
+  // with the default value of false, the left/right context will always appear
+  // to be 0.
+  void SetRequireDirectInput(bool b) { require_direct_input_ = b; }
+
  private:
   // Checks that the parameters are valid.
   void Check() const;
@@ -411,6 +417,13 @@ class StatisticsPoolingComponent: public Component {
   int32 num_log_count_features_;
   bool output_stddevs_;
   BaseFloat variance_floor_;
+  // If require_direct_input_ is set to true, in order for a particular 't'
+  // value to be available at the output of this component, it will require that
+  // 't' value to be computable at the input.  This is used in computing the
+  // "real" left/right context of the network, but this member isn't currently
+  // written to disk and will default to false when read.
+  bool require_direct_input_;
+
 };
 
 class StatisticsPoolingComponentPrecomputedIndexes:
diff --git a/src/nnet3/nnet-normalize-component.cc b/src/nnet3/nnet-normalize-component.cc
index e400bdfa2db..fdfd9544785 100644
--- a/src/nnet3/nnet-normalize-component.cc
+++ b/src/nnet3/nnet-normalize-component.cc
@@ -182,6 +182,7 @@ void NormalizeComponent::Backprop(const std::string &debug_info,
                                   void *memo,
                                   Component *to_update,
                                   CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("NormalizeComponent::Backprop");
   if (!in_deriv)
     return;
   if (block_dim_ != input_dim_) {
@@ -472,6 +473,7 @@ void BatchNormComponent::Backprop(
     void *memo_in,
     Component *to_update,  // unused
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("BatchNormComponent::Backprop");
 
   KALDI_ASSERT(SameDim(out_value, out_deriv) &&
                SameDim(out_value, *in_deriv) &&
diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc
index 53c8d46578b..337e7ab6824 100644
--- a/src/nnet3/nnet-simple-component.cc
+++ b/src/nnet3/nnet-simple-component.cc
@@ -67,6 +67,7 @@ void PnormComponent::Backprop(const std::string &debug_info,
                               void *memo,
                               Component *to_update,
                               CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("PnormComponent::Backprop");
   if (!in_deriv)
     return;
   BaseFloat p = 2.0;
@@ -183,6 +184,7 @@ void DropoutComponent::Backprop(const std::string &debug_info,
                                 void *memo,
                                 Component *to_update,
                                 CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("DropoutComponent::Backprop");
   KALDI_ASSERT(in_value.NumRows() == out_value.NumRows() &&
                in_value.NumCols() == out_value.NumCols());
 
@@ -278,6 +280,7 @@ void ElementwiseProductComponent::Backprop(const std::string &debug_info,
                               void *memo,
                               Component *to_update,
                               CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("ElementwiseProductComponent::Backprop");
   if (!in_deriv)  return;
   int32 num_inputs = input_dim_ / output_dim_;
   for (int32 i = 0; i < num_inputs; i++)  {
@@ -330,6 +333,7 @@ void SigmoidComponent::Backprop(const std::string &debug_info,
                                 void *memo,
                                 Component *to_update_in,
                                 CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("SigmoidComponent::Backprop");
   if (in_deriv != NULL) {
     in_deriv->DiffSigmoid(out_value, out_deriv);
     SigmoidComponent *to_update = dynamic_cast<SigmoidComponent*>(to_update_in);
@@ -449,6 +453,7 @@ void NoOpComponent::Backprop(const std::string &debug_info,
                              Component *to_update, // may be NULL; may be identical
                              // to "this" or different.
                              CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("NoOpComponent::Backprop");
   in_deriv->CopyFromMat(out_deriv);
   if (backprop_scale_ != 1.0)
     in_deriv->Scale(backprop_scale_);
@@ -681,6 +686,7 @@ void ClipGradientComponent::Backprop(const std::string &debug_info,
                              Component *to_update_in, // may be NULL; may be identical
                              // to "this" or different.
                              CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("ClipGradientComponent::Backprop");
   // the following statement will do nothing if in_deriv and out_deriv have same
   // memory.
   in_deriv->CopyFromMat(out_deriv);
@@ -921,6 +927,7 @@ void TanhComponent::Backprop(const std::string &debug_info,
                              Component *to_update_in, // may be NULL; may be identical
                              // to "this" or different.
                              CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("TanhComponent::Backprop");
   if (in_deriv != NULL) {
     in_deriv->DiffTanh(out_value, out_deriv);
     TanhComponent *to_update = dynamic_cast<TanhComponent*>(to_update_in);
@@ -973,6 +980,7 @@ void RectifiedLinearComponent::Backprop(
     void *memo,
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("RectifiedLinearComponent::Backprop");
   if (in_deriv != NULL) {
     in_deriv->Heaviside(out_value);
     in_deriv->MulElements(out_deriv);
@@ -1257,6 +1265,7 @@ void AffineComponent::Backprop(const std::string &debug_info,
                                void *memo,
                                Component *to_update_in,
                                CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("AffineComponent::Backprop");
   AffineComponent *to_update = dynamic_cast<AffineComponent*>(to_update_in);
 
   // Propagate the derivative back to the input.
@@ -1459,6 +1468,7 @@ void RepeatedAffineComponent::Backprop(const std::string &debug_info,
                                        void *memo,
                                        Component *to_update_in,
                                        CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("RepeatedAffineComponent::Backprop");
   KALDI_ASSERT(out_deriv.NumCols() == out_deriv.Stride() &&
        (in_value.NumCols() == 0 || in_value.NumCols() == in_value.Stride()) &&
                (!in_deriv || in_deriv->NumCols() == in_deriv->Stride()));
@@ -1771,6 +1781,7 @@ void BlockAffineComponent::Backprop(const std::string &debug_info,
                                     void *memo,
                                     Component *to_update_in,
                                     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("BlockAffineComponent::Backprop");
   BlockAffineComponent *to_update = dynamic_cast<BlockAffineComponent*>(to_update_in);
 
   const int32 num_rows_in_block = linear_params_.NumRows() / num_blocks_;
@@ -2043,6 +2054,7 @@ void PerElementScaleComponent::Backprop(
     void *memo,
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("PerElementScaleComponent::Backprop");
   PerElementScaleComponent *to_update =
       dynamic_cast<PerElementScaleComponent*>(to_update_in);
 
@@ -2216,6 +2228,7 @@ void PerElementOffsetComponent::Backprop(
     void *memo,
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("PerElementOffsetComponent::Backprop");
   PerElementOffsetComponent *to_update =
       dynamic_cast<PerElementOffsetComponent*>(to_update_in);
 
@@ -2480,6 +2493,7 @@ void ScaleAndOffsetComponent::Backprop(
     void *memo,
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("ScaleAndOffsetComponent::Backprop");
   ScaleAndOffsetComponent *to_update =
       dynamic_cast<ScaleAndOffsetComponent*>(to_update_in);
 
@@ -2615,6 +2629,7 @@ void ConstantFunctionComponent::Backprop(
     void *memo,
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("ConstantFunctionComponent::Backprop");
   // we don't update in_deriv, since we set the flag
   // kBackpropAdds, and the output doesn't depend on the
   // input, so the input-derivative is zero.
@@ -3221,6 +3236,7 @@ void LinearComponent::Backprop(const std::string &debug_info,
                                void *memo,
                                Component *to_update_in,
                                CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("LinearComponent::Backprop");
   LinearComponent *to_update = dynamic_cast<LinearComponent*>(to_update_in);
 
   // Propagate the derivative back to the input.  add with coefficient 1.0 since
@@ -3389,6 +3405,7 @@ void FixedAffineComponent::Backprop(const std::string &debug_info,
                                     void *memo,
                                     Component *, //to_update
                                     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("FixedAffineComponent::Backprop");
   // kBackpropAdds is true. It's the user's responsibility to zero out
   // <in_deriv> if they need it to be so.
   if (in_deriv)
@@ -3539,6 +3556,7 @@ void SumGroupComponent::Backprop(const std::string &debug_info,
                                  void *memo,
                                  Component *to_update_in,
                                  CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("SumGroupComponent::Backprop");
   in_deriv->CopyCols(out_deriv, reverse_indexes_);
 }
 
@@ -3565,6 +3583,7 @@ void SoftmaxComponent::Backprop(const std::string &debug_info,
                                 void *memo,
                                 Component *to_update_in,
                                 CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("SoftmaxComponent::Backprop");
 
   if (to_update_in) {
     SoftmaxComponent *to_update =
@@ -3613,6 +3632,7 @@ void LogSoftmaxComponent::Backprop(const std::string &debug_info,
                                    void *memo,
                                    Component *to_update_in,
                                    CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("LogSoftmaxComponent::Backprop");
   if (to_update_in) {
     LogSoftmaxComponent *to_update =
         dynamic_cast<LogSoftmaxComponent*>(to_update_in);
@@ -3682,6 +3702,7 @@ void FixedScaleComponent::Backprop(const std::string &debug_info,
                                    void *memo,
                                    Component *, // to_update
                                    CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("FixedScaleComponent::Backprop");
   in_deriv->CopyFromMat(out_deriv);  // does nothing if same memory.
   in_deriv->MulColsVec(scales_);
 }
@@ -3756,6 +3777,7 @@ void FixedBiasComponent::Backprop(const std::string &debug_info,
                                   void *memo,
                                   Component *, // to_update
                                   CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("FixedBiasComponent::Backprop");
   // the following statement will do nothing if in_deriv and out_deriv have same
   // memory.
   in_deriv->CopyFromMat(out_deriv);
@@ -3979,6 +4001,7 @@ void PermuteComponent::Backprop(const std::string &debug_info,
                                 void *memo,
                                 Component *to_update,
                                 CuMatrixBase<BaseFloat> *in_deriv) const  {
+  NVTX_RANGE("PermuteComponent::Backprop");
   in_deriv->CopyCols(out_deriv, reverse_column_map_);
 }
 
@@ -4259,6 +4282,7 @@ void CompositeComponent::Backprop(const std::string &debug_info,
                                   void *memo,
                                   Component *to_update,
                                   CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("CompositeComponent::Backprop");
   KALDI_ASSERT(in_value.NumRows() == out_deriv.NumRows() &&
                in_value.NumCols() == InputDim() &&
                out_deriv.NumCols() == OutputDim());
@@ -4685,6 +4709,7 @@ void SumBlockComponent::Backprop(
     void *memo,
     Component *to_update,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("SumBlockComponent::Backprop");
   if (in_deriv) {
     in_deriv->AddMatBlocks(scale_, out_deriv, kNoTrans);
   }
diff --git a/src/nnet3/nnet-tdnn-component.cc b/src/nnet3/nnet-tdnn-component.cc
index c287ce303a6..0013a6a85df 100644
--- a/src/nnet3/nnet-tdnn-component.cc
+++ b/src/nnet3/nnet-tdnn-component.cc
@@ -219,6 +219,7 @@ void TdnnComponent::Backprop(
     void*, // memo
     Component *to_update_in,
     CuMatrixBase<BaseFloat> *in_deriv) const {
+  NVTX_RANGE("TdnnComponent::Backprop");
   const PrecomputedIndexes *indexes =
       dynamic_cast<const PrecomputedIndexes*>(indexes_in);
   KALDI_ASSERT(indexes != NULL &&
@@ -262,6 +263,7 @@ void TdnnComponent::UpdateSimple(
     const PrecomputedIndexes &indexes,
     const CuMatrixBase<BaseFloat> &in_value,
     const CuMatrixBase<BaseFloat> &out_deriv) {
+  NVTX_RANGE("UpdateSimple");
 
   if (bias_params_.Dim() != 0)
     bias_params_.AddRowSumMat(learning_rate_, out_deriv);
@@ -285,6 +287,7 @@ void TdnnComponent::UpdateNaturalGradient(
     const PrecomputedIndexes &indexes,
     const CuMatrixBase<BaseFloat> &in_value,
     const CuMatrixBase<BaseFloat> &out_deriv) {
+  NVTX_RANGE("UpdateNaturalGradient");
 
   int32 num_offsets = time_offsets_.size(),
       num_rows = out_deriv.NumRows(),
diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc
index 5ab9126f0b5..5f3ef78d06c 100644
--- a/src/nnet3/nnet-utils.cc
+++ b/src/nnet3/nnet-utils.cc
@@ -300,6 +300,15 @@ void SetNnetAsGradient(Nnet *nnet) {
   }
 }
 
+void SetRequireDirectInput(bool b, Nnet *nnet) {
+  for (int32 c = 0; c < nnet->NumComponents(); c++) {
+    Component *comp = nnet->GetComponent(c);
+    if (dynamic_cast<StatisticsPoolingComponent*>(comp) != NULL)
+      dynamic_cast<StatisticsPoolingComponent*>(comp)->SetRequireDirectInput(b);
+  }
+}
+
+
 void ScaleNnet(BaseFloat scale, Nnet *nnet) {
   if (scale == 1.0) return;
   else {
@@ -724,7 +733,7 @@ class SvdApplier {
               << " components to FixedAffineComponent.";
   }
 
-  // This function finds the minimum index of 
+  // This function finds the minimum index of
   // the Descending order sorted [input_vector],
   // over a range of indices from [lower] to [upper] index,
   // for which the sum of elements upto the found min. index is greater
@@ -743,7 +752,7 @@ class SvdApplier {
     }
     return (i+1);
   }
- 
+
 // Here we perform SVD based refactorig of an input Affine component.
 // After applying SVD , we sort the Singularity values in descending order,
 // and take the subset of values which contribute to energy_threshold times
@@ -777,7 +786,7 @@ class SvdApplier {
     if (energy_threshold_ > 0) {
       BaseFloat min_singular_sum = energy_threshold_ * s2_sum_orig;
       bottleneck_dim_ = GetReducedDimension(s2, 0, s2.Dim()-1, min_singular_sum);
-    } 
+    }
     SubVector<BaseFloat> this_part(s2, 0, bottleneck_dim_);
     BaseFloat s2_sum_reduced = this_part.Sum();
     BaseFloat shrinkage_ratio =
@@ -1036,7 +1045,7 @@ void ConstrainOrthonormalInternal(BaseFloat scale, CuMatrixBase<BaseFloat> *M) {
     // the learning rate slower to reduce the risk of divergence, since the
     // update may not be stable for starting points far from equilibrium.
     BaseFloat ratio = (trace_P_P * P.NumRows() / (trace_P * trace_P));
-    KALDI_ASSERT(ratio > 0.999);
+    KALDI_ASSERT(ratio > 0.99);
     if (ratio > 1.02) {
       update_speed *= 0.5;  // Slow down the update speed to reduce the risk of divergence.
       if (ratio > 1.1) update_speed *= 0.5;  // Slow it down even more.
diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h
index 0b3ef02d4f6..875663913da 100644
--- a/src/nnet3/nnet-utils.h
+++ b/src/nnet3/nnet-utils.h
@@ -118,6 +118,14 @@ void ScaleNnet(BaseFloat scale, Nnet *nnet);
 /// learning_rate_ to 1 for each UpdatableComponent in nnet
 void SetNnetAsGradient(Nnet *nnet);
 
+
+/// Calls the corresponding function in any component of type
+/// StatisticsPoolingComponent; used as a way to compute the 'real' left-right
+/// context of networks including SatisticsPoolingComponent, which will give you
+/// the minimum chunk size they can consume.
+void SetRequireDirectInput(bool b, Nnet *nnet);
+
+
 /// Does *dest += alpha * src (affects nnet parameters and
 /// stored stats).
 void AddNnet(const Nnet &src, BaseFloat alpha, Nnet *dest);
diff --git a/src/nnet3bin/Makefile b/src/nnet3bin/Makefile
index 8de3c9716e9..8b7ea61c251 100644
--- a/src/nnet3bin/Makefile
+++ b/src/nnet3bin/Makefile
@@ -19,8 +19,9 @@ BINFILES = nnet3-init nnet3-info nnet3-get-egs nnet3-copy-egs nnet3-subset-egs \
    nnet3-discriminative-subset-egs nnet3-get-egs-simple \
    nnet3-discriminative-compute-from-egs nnet3-latgen-faster-looped \
    nnet3-egs-augment-image nnet3-xvector-get-egs nnet3-xvector-compute \
+   nnet3-xvector-compute-batched \
    nnet3-latgen-grammar nnet3-compute-batch nnet3-latgen-faster-batch \
-   cuda-gpu-available cuda-compiled
+   nnet3-latgen-faster-lookahead cuda-gpu-available cuda-compiled
 
 OBJFILES =
 
@@ -36,4 +37,5 @@ ADDLIBS = ../nnet3/kaldi-nnet3.a ../chain/kaldi-chain.a \
           ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \
           ../base/kaldi-base.a
 
+
 include ../makefiles/default_rules.mk
diff --git a/src/nnet3bin/nnet3-latgen-faster-lookahead.cc b/src/nnet3bin/nnet3-latgen-faster-lookahead.cc
new file mode 100644
index 00000000000..5ff13b0c6db
--- /dev/null
+++ b/src/nnet3bin/nnet3-latgen-faster-lookahead.cc
@@ -0,0 +1,231 @@
+// nnet3bin/nnet3-latgen-faster.cc
+
+// Copyright 2012-2015   Johns Hopkins University (author: Daniel Povey)
+//                2014   Guoguo Chen
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "tree/context-dep.h"
+#include "hmm/transition-model.h"
+#include "fstext/fstext-lib.h"
+#include "decoder/decoder-wrappers.h"
+#include "nnet3/nnet-am-decodable-simple.h"
+#include "nnet3/nnet-utils.h"
+#include "base/timer.h"
+
+
+int main(int argc, char *argv[]) {
+  // note: making this program work with GPUs is as simple as initializing the
+  // device, but it probably won't make a huge difference in speed for typical
+  // setups.  You should use nnet3-latgen-faster-batch if you want to use a GPU.
+  try {
+    using namespace kaldi;
+    using namespace kaldi::nnet3;
+    typedef kaldi::int32 int32;
+    using fst::SymbolTable;
+    using fst::Fst;
+    using fst::StdArc;
+
+    const char *usage =
+        "Generate lattices using nnet3 neural net model and standalone HCL.fst\n"
+        "and G.fst using online composition with lookahead.\n"
+        "Usage: nnet3-latgen-faster-lookahead [options] <nnet-in> <hcl-fst-in> <g-fst-in> <disambig-syms> "
+        "<features-rspecifier> <lattice-wspecifier> [ <words-wspecifier> [<alignments-wspecifier>] ]\n";
+    ParseOptions po(usage);
+    Timer timer;
+    bool allow_partial = false;
+    LatticeFasterDecoderConfig config;
+    NnetSimpleComputationOptions decodable_opts;
+
+    std::string word_syms_filename;
+    std::string ivector_rspecifier,
+        online_ivector_rspecifier,
+        utt2spk_rspecifier;
+    int32 online_ivector_period = 0;
+    config.Register(&po);
+    decodable_opts.Register(&po);
+    po.Register("word-symbol-table", &word_syms_filename,
+                "Symbol table for words [for debug output]");
+    po.Register("allow-partial", &allow_partial,
+                "If true, produce output even if end state was not reached.");
+    po.Register("ivectors", &ivector_rspecifier, "Rspecifier for "
+                "iVectors as vectors (i.e. not estimated online); per utterance "
+                "by default, or per speaker if you provide the --utt2spk option.");
+    po.Register("utt2spk", &utt2spk_rspecifier, "Rspecifier for "
+                "utt2spk option used to get ivectors per speaker");
+    po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier for "
+                "iVectors estimated online, as matrices.  If you supply this,"
+                " you must set the --online-ivector-period option.");
+    po.Register("online-ivector-period", &online_ivector_period, "Number of frames "
+                "between iVectors in matrices supplied to the --online-ivectors "
+                "option");
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() < 6 || po.NumArgs() > 8) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string model_in_filename = po.GetArg(1),
+        fst_in_str = po.GetArg(2),
+        g_in_str = po.GetArg(3),
+        disambig_rxfilename = po.GetArg(4),
+        feature_rspecifier = po.GetArg(5),
+        lattice_wspecifier = po.GetArg(6),
+        words_wspecifier = po.GetOptArg(7),
+        alignment_wspecifier = po.GetOptArg(8);
+
+    TransitionModel trans_model;
+    AmNnetSimple am_nnet;
+    {
+      bool binary;
+      Input ki(model_in_filename, &binary);
+      trans_model.Read(ki.Stream(), binary);
+      am_nnet.Read(ki.Stream(), binary);
+      SetBatchnormTestMode(true, &(am_nnet.GetNnet()));
+      SetDropoutTestMode(true, &(am_nnet.GetNnet()));
+      CollapseModel(CollapseModelConfig(), &(am_nnet.GetNnet()));
+    }
+
+    bool determinize = config.determinize_lattice;
+    CompactLatticeWriter compact_lattice_writer;
+    LatticeWriter lattice_writer;
+    if (! (determinize ? compact_lattice_writer.Open(lattice_wspecifier)
+           : lattice_writer.Open(lattice_wspecifier)))
+      KALDI_ERR << "Could not open table for writing lattices: "
+                 << lattice_wspecifier;
+
+    RandomAccessBaseFloatMatrixReader online_ivector_reader(
+        online_ivector_rspecifier);
+    RandomAccessBaseFloatVectorReaderMapped ivector_reader(
+        ivector_rspecifier, utt2spk_rspecifier);
+
+    Int32VectorWriter words_writer(words_wspecifier);
+    Int32VectorWriter alignment_writer(alignment_wspecifier);
+
+    fst::SymbolTable *word_syms = NULL;
+    if (word_syms_filename != "")
+      if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
+        KALDI_ERR << "Could not read symbol table from file "
+                   << word_syms_filename;
+
+    std::vector<int32> disambig_in;
+    if (!ReadIntegerVectorSimple(disambig_rxfilename, &disambig_in))
+      KALDI_ERR << "fstrmsymbols: Could not read disambiguation symbols from "
+                << (disambig_rxfilename == "" ? "standard input" : disambig_rxfilename);
+
+
+    double tot_like = 0.0;
+    kaldi::int64 frame_count = 0;
+    int num_success = 0, num_fail = 0;
+    // this compiler object allows caching of computations across
+    // different utterances.
+    CachingOptimizingCompiler compiler(am_nnet.GetNnet(),
+                                       decodable_opts.optimize_config);
+
+    if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) {
+      SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
+
+      fst::Fst<StdArc> *hcl_fst = fst::StdFst::Read(fst_in_str);
+      fst::Fst<StdArc> *g_fst = fst::StdFst::Read(g_in_str);
+      fst::LookaheadFst<StdArc, int32> *decode_fst =
+                       fst::LookaheadComposeFst(*hcl_fst,
+                                                *g_fst,
+                                                disambig_in);
+      timer.Reset();
+
+      {
+        LatticeFasterDecoder decoder(*decode_fst, config);
+
+        for (; !feature_reader.Done(); feature_reader.Next()) {
+          std::string utt = feature_reader.Key();
+          const Matrix<BaseFloat> &features (feature_reader.Value());
+          if (features.NumRows() == 0) {
+            KALDI_WARN << "Zero-length utterance: " << utt;
+            num_fail++;
+            continue;
+          }
+          const Matrix<BaseFloat> *online_ivectors = NULL;
+          const Vector<BaseFloat> *ivector = NULL;
+          if (!ivector_rspecifier.empty()) {
+            if (!ivector_reader.HasKey(utt)) {
+              KALDI_WARN << "No iVector available for utterance " << utt;
+              num_fail++;
+              continue;
+            } else {
+              ivector = &ivector_reader.Value(utt);
+            }
+          }
+          if (!online_ivector_rspecifier.empty()) {
+            if (!online_ivector_reader.HasKey(utt)) {
+              KALDI_WARN << "No online iVector available for utterance " << utt;
+              num_fail++;
+              continue;
+            } else {
+              online_ivectors = &online_ivector_reader.Value(utt);
+            }
+          }
+
+          DecodableAmNnetSimple nnet_decodable(
+              decodable_opts, trans_model, am_nnet,
+              features, ivector, online_ivectors,
+              online_ivector_period, &compiler);
+
+          double like;
+          if (DecodeUtteranceLatticeFaster(
+                  decoder, nnet_decodable, trans_model, word_syms, utt,
+                  decodable_opts.acoustic_scale, determinize, allow_partial,
+                  &alignment_writer, &words_writer, &compact_lattice_writer,
+                  &lattice_writer,
+                  &like)) {
+            tot_like += like;
+            frame_count += nnet_decodable.NumFramesReady();
+            num_success++;
+          } else num_fail++;
+        }
+      }
+      delete decode_fst; // delete this only after decoder goes out of scope.
+      delete hcl_fst;
+      delete g_fst;
+    } else { // We have different FSTs for different utterances.
+      KALDI_ERR << "Not supported for lookahead";
+    }
+
+    kaldi::int64 input_frame_count =
+        frame_count * decodable_opts.frame_subsampling_factor;
+
+    double elapsed = timer.Elapsed();
+    KALDI_LOG << "Time taken "<< elapsed
+              << "s: real-time factor assuming 100 frames/sec is "
+              << (elapsed * 100.0 / input_frame_count);
+    KALDI_LOG << "Done " << num_success << " utterances, failed for "
+              << num_fail;
+    KALDI_LOG << "Overall log-likelihood per frame is "
+              << (tot_like / frame_count) << " over "
+              << frame_count << " frames.";
+
+    delete word_syms;
+    if (num_success != 0) return 0;
+    else return 1;
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/nnet3bin/nnet3-xvector-compute-batched.cc b/src/nnet3bin/nnet3-xvector-compute-batched.cc
new file mode 100644
index 00000000000..01240815348
--- /dev/null
+++ b/src/nnet3bin/nnet3-xvector-compute-batched.cc
@@ -0,0 +1,663 @@
+// nnet3bin/nnet3-xvector-compute.cc
+
+// Copyright 2019   Daniel Povey
+//           2017   Johns Hopkins University (author: Daniel Povey)
+//           2017   Johns Hopkins University (author: Daniel Garcia-Romero)
+//           2017   David Snyder
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "nnet3/nnet-am-decodable-simple.h"
+#include "base/timer.h"
+#include "nnet3/nnet-utils.h"
+
+namespace kaldi {
+namespace nnet3 {
+
+
+struct BatchedXvectorComputerOptions {
+  int32 chunk_size { 150 };
+  int32 batch_size { 32 };
+  bool pad_input { true };
+  NnetComputeOptions compute_config;
+  NnetOptimizeOptions optimize_config;
+  CachingOptimizingCompilerOptions compiler_config;
+
+
+  void Register(OptionsItf *po) {
+    po->Register("chunk-size", &chunk_size,
+                 "Size of chunk, in input frames.  Includes the nnet "
+                 "context, so the number of chunks will be more than "
+                 "total-input-frames / chunk-size.");
+    po->Register("batch-size", &batch_size,
+                 "Size of the batches of chunks that we compute at once. ");
+    po->Register("pad-input", &pad_input,
+                 "If true, for utterances shorter than `chunk-size` frames "
+                 "we will pad with repeats of the last frame.");
+    compute_config.Register(po);
+    optimize_config.Register(po);
+    compiler_config.Register(po);
+  }
+};
+
+
+/**
+   This function divides the number 'a' into 'b' pieces, such that
+   the sum of the pieces equals 'a' and no two pieces differ by more
+   than 1.
+     @param [in] a     A number, may be positive or negative
+     @param [in] b     The number of pieces, b >= 1.
+     @param [out] pieces   The pieces will be written to here.
+                       At exit, their sum will equal a, and none
+                       of them will differ from any other by more
+                       than 1.  Otherwise they are arbitrarily
+                       chosen.
+ */
+void DivideIntoPieces(int32 a, int32 b, std::vector<int32> *pieces) {
+  KALDI_ASSERT(b > 0);
+  pieces->clear();
+  pieces->reserve(b);
+  int32 a_sign = 1;
+  // Make sure a is positive before division, because the behavior of division
+  // with negative operands is not fully defined in C.
+  if (a < 0) {
+    a_sign = -1;
+    a *= -1;
+  }
+  int32 piece_size1 = a / b,
+      piece_size2 = piece_size1 + 1,
+      remainder = a % b;
+  int32 num_pieces_of_size1 = b - remainder,
+      num_pieces_of_size2 = remainder;
+  KALDI_ASSERT(a == num_pieces_of_size1 * piece_size1 +
+               num_pieces_of_size2 * piece_size2);
+
+  for (int32 i = 0; i < num_pieces_of_size1; i++)
+    pieces->push_back(piece_size1 * a_sign);
+  for (int32 i = 0; i < num_pieces_of_size2; i++)
+    pieces->push_back(piece_size2 * a_sign);
+}
+
+
+
+class BatchedXvectorComputer {
+ public:
+  /**
+       @param [in]  opts  Options class; warning, it keeps a reference to it.
+       @param [in]  nnet  The neural net we'll be computing with; assumed to have
+                          already been prepared for test.
+       @param [in] total_context   The sum of the left and right context of the
+                          network, computed after calling
+                          SetRequireDirectInput(true, &nnet); so the l/r context
+                          isn't zero.
+   */
+
+  BatchedXvectorComputer(const BatchedXvectorComputerOptions &opts,
+                         const Nnet &nnet,
+                         int32 total_context);
+
+  /**
+     Accepts an utterance to process into an xvector, and, if one or more
+     batches become full, processes the batch.
+   */
+  void AcceptUtterance(const std::string &utt,
+                      const Matrix<BaseFloat> &input);
+
+
+  /**  Returns true if at least one xvector is pending output (i.e. that
+       the user may call OutputXvector()).
+   */
+  bool XvectorReady() const;
+
+  /**
+     This function, which must only be called if XvectorReady() has
+     just returned true,  outputs an xvector for an utterance.
+       @param [out] utt  The utterance-id is written to here.
+                        Note: these will be output in the same order
+                        as the user called AcceptUtterance(), except
+                        that if opts_.pad_input is false and
+                        and utterance is shorter than the chunk
+                        size, some utterances may be skipped.
+       @param [out] xvector  The xvector will be written to here.
+   */
+  void OutputXvector(std::string *utt,
+                     Vector<BaseFloat> *xvector);
+
+
+  /**
+     Calling this will force any partial minibatch to be computed,
+     so that any utterances that have previously been passed to
+     AcceptUtterance() will, when this function returns, have
+     their xvectors ready to be retrieved by OutputXvector().
+   */
+  void Flush();
+
+
+ private:
+
+  struct XvectorTask {
+    std::string utt_id;
+    int32 num_chunks;
+    int32 num_chunks_finished;
+    Vector<BaseFloat> xvector;
+    XvectorTask *tail;
+  };
+
+
+  /**
+     This decides how to split the utterance into chunks.  It does so in a way
+     that minimizes the variance of the x-vector under some simplifying
+     assumptions.  It's about minimizing the variance of the x-vector.  We treat
+     the x-vector as computed as a sum over frames (although some frames may be
+     repeated or omitted due to gaps between chunks or overlaps between chunks);
+     and we try to minimize the variance of the x-vector estimate; this is minimized
+     when all the frames have the same weight, which is only possible if it can be
+     exactly divided into chunks; anyway, this function computes the best division
+     into chunks.
+
+     It's a question of whether to allow overlaps or gaps.
+     Suppose we are averaging independent quantities with variance 1.  The
+     variance of a simple sum of M of those quantities is 1/M.
+     Suppose we have M of those quantities, plus N which are repeated twice
+     in the sum.  The variance of the estimate formed that way is:
+
+      (M + 4N) / (M + 2N)^2
+
+     If we can't divide it exactly into chunks we'll compare the variances from
+     the cases where there is a gap vs. an overlap, and choose the one with
+     the smallest variance.  (Note: due to context effects we actually lose
+     total_context_ frames from the input signal, and the chunks would have
+     to overlap by total_context_ even if the part at the statistics-computation
+     layer were ideally cut up.
+
+        @param [in] num_frames  The number of frames in the utterance
+        @param [out] start_frames  This function will output to here a vector
+                    containing all the start-frames of chunks in this utterance.
+                    All chunks will have duration opts_.chunk_size; if a chunk
+                    goes past the end of the input we'll repeat the last frame.
+                    (This will only happen if opts_.pad_input is false and
+                    num_frames is less than opts_.chunk_length.)
+   */
+  void SplitUtteranceIntoChunks(int32 num_frames,
+                                std::vector<int32> *start_frames);
+
+  /** This adds a newly created XvectorTask at the tail of the singly linked
+      list whose (head,tail) are results_head_, results_tail_.
+   */
+  XvectorTask* CreateTask(const std::string &utt, int32 num_chunks);
+
+
+  /**
+     Does the nnet computation for one batch and distributes the
+     computed x-vectors (of chunks) appropriately to their XvectorTask
+     objects.
+   */
+  void ComputeOneBatch();
+
+  /**
+     Adds a new chunk to a batch we are preparing.  This will go
+     at position `position_in_batch_` which will be incremented.
+       @param [in] task  The task this is part of (records the
+                utterance); tasks_this_batch_[position_in_batch_] will
+                be set to this.
+       @param [in] input  The input matrix of features of
+                which this chunk is a part
+       @param [in] chunk_start  The frame at which this
+                chunk starts.  Must be >= 0; and if
+                opts_.pad_input is false, chunk_start + opts_.chunk_size
+                must be <= input.NumRows().
+   */
+  void AddChunkToBatch(XvectorTask *task,
+                       const Matrix<BaseFloat> &input,
+                       int32 chunk_start);
+
+  const BatchedXvectorComputerOptions &opts_;
+  int32 total_context_;
+  const Nnet &nnet_;
+
+  int32 feature_dim_;
+  int32 xvector_dim_;
+
+  /**
+     Staging area for the input features prior to copying them to GPU.
+     Dimension is opts_.chunk_size * opts_.batch_size by feature_dim_.  The
+     sequences are interleaved (will be faster since this corresponds to how
+     nnet3 keeps things in memory), i.e. row 0 of input_feats_ is time t=0
+     for chunk n=0; and row 1 of input_feats_ is time t=0 for chunk n=1.
+  */
+  Matrix<BaseFloat> input_feats_;
+
+
+  /** The compiled computation (will be the same for every batch).  */
+  std::shared_ptr<const NnetComputation> computation_;
+
+
+  /**  position_in_batch_ is the number of chunks that we have filled in in
+       the input_feats_ matrix and tasks_this_batch_.  When it reaches
+       opts_.batch_size we will do the actual computation.
+  */
+  int32 position_in_batch_;
+
+  /**
+     tasks_this_batch_ is of dimension opts_.batch_size.  It is a vector of pointers to
+     elements of the singly linked list whose head is at results_head_, or
+     NULL for elements with indexes >= position_in_batch_.
+   */
+  std::vector<XvectorTask*> tasks_this_batch_;
+
+  // results_head_ is the first element in the singly linked list of
+  // already-computed xvectors, or NULL if that list is empty.  Note:
+  // utterances that are ready will appear here first; new utterances
+  // get added to the tail.
+  XvectorTask *results_head_;
+  // results_tail_ is the last element in the singly linked list of
+  // already-computed xvectors, or NULL if the list is empty.
+  XvectorTask *results_tail_;
+};
+
+BatchedXvectorComputer::XvectorTask*
+BatchedXvectorComputer::CreateTask(
+    const std::string &utt, int32 num_chunks) {
+  XvectorTask *task = new XvectorTask;
+  task->utt_id = utt;
+  task->num_chunks = num_chunks;
+  task->num_chunks_finished = 0;
+  task->xvector.Resize(xvector_dim_);
+  task->tail = NULL;
+  if (results_tail_) {
+    results_tail_->tail = task;
+    results_tail_ = task;
+  } else {  // List was previously empty.
+    results_head_ = task;
+    results_tail_ = task;
+  }
+  return task;
+}
+
+BatchedXvectorComputer::BatchedXvectorComputer(
+    const BatchedXvectorComputerOptions &opts,
+    const Nnet &nnet,
+    int32 total_context):
+    opts_(opts),
+    total_context_(total_context),
+    nnet_(nnet),
+    position_in_batch_(0),
+    results_head_(NULL),
+    results_tail_(NULL) {
+
+  tasks_this_batch_.resize(opts_.batch_size);
+
+  feature_dim_ = nnet.InputDim("input");
+  xvector_dim_ = nnet.OutputDim("output");
+  // Zero input_feats_ in case there is only one batch, to avoid
+  // NaN's being generated due to undefined data.
+  input_feats_.Resize(opts_.chunk_size * opts_.batch_size,
+                      feature_dim_);
+
+  CachingOptimizingCompiler compiler(nnet, opts.optimize_config,
+                                     opts.compiler_config);
+
+  {  // This block creates computation_.
+    ComputationRequest request;
+    request.need_model_derivative = false;
+    request.store_component_stats = false;
+    request.inputs.resize(1);
+    IoSpecification &input(request.inputs[0]);
+    input.name = "input";
+    input.has_deriv = false;
+    input.indexes.resize(opts_.batch_size * opts_.chunk_size);
+    // Note: the sequences are interleaved in the input; this will save an extra
+    // copy since it corresponds to how nnet3 stores things by default.  (Makes
+    // TDNNs easier to implement.)
+    for (int32 n = 0; n < opts_.batch_size; n++) {
+      for (int32 t = 0; t < opts_.chunk_size; t++) {
+        Index index;
+        index.n = n;
+        index.t = t;
+        // index.x is 0 by default.
+        input.indexes[n + opts_.batch_size * t] = index;
+      }
+    }
+    IoSpecification output;
+    output.name = "output";
+    output.has_deriv = false;
+    output.indexes.resize(opts_.batch_size);
+    for (int32 n = 0; n < opts_.batch_size; n++){
+        Index index;
+        index.n = n;
+        index.t = 0;
+        output.indexes[n] = index;
+    }
+    request.outputs.push_back(output);
+    computation_ = compiler.Compile(request);
+  }
+}
+
+void BatchedXvectorComputer::AddChunkToBatch(
+    XvectorTask *task,
+    const Matrix<BaseFloat> &input,
+    int32 chunk_start) {
+  int32 n = position_in_batch_++;
+  KALDI_ASSERT(n >= 0 && n < opts_.batch_size);
+  tasks_this_batch_[n] = task;
+  int32 T = opts_.chunk_size,
+      num_input_frames = input.NumRows();
+  KALDI_ASSERT(input_feats_.NumRows() == T * opts_.batch_size);
+  if (input.NumCols() != feature_dim_) {
+    KALDI_ERR << "Feature dimension mismatch: neural net expected "
+              << feature_dim_ << ", got " << input.NumCols();
+  }
+  for (int32 t = 0; t < T; t++) {
+    SubVector<BaseFloat> dest(input_feats_, t * opts_.batch_size + n);
+    int32 src_t = t + chunk_start;
+    if (src_t >= num_input_frames) {
+      KALDI_ASSERT(opts_.pad_input);
+      src_t = num_input_frames - 1;  // Pad with repeats of the last frame.
+    }
+    SubVector<BaseFloat> src(input, src_t);
+    dest.CopyFromVec(src);
+  }
+}
+
+bool BatchedXvectorComputer::XvectorReady() const {
+  if (results_head_ == NULL)
+    return false;
+  KALDI_ASSERT(results_head_->num_chunks_finished <= results_head_->num_chunks);
+  return results_head_->num_chunks_finished == results_head_->num_chunks;
+}
+
+void BatchedXvectorComputer::OutputXvector(std::string *utt,
+                                           Vector<BaseFloat> *xvector) {
+  KALDI_ASSERT(XvectorReady());
+  *utt = results_head_->utt_id;
+  xvector->Swap(&(results_head_->xvector));
+  XvectorTask *new_tail = results_head_->tail;
+  delete results_head_;
+  results_head_ = new_tail;
+  if (new_tail == NULL)
+    results_tail_ = NULL;
+}
+
+void BatchedXvectorComputer::Flush() {
+  if (position_in_batch_ == 0)
+    return;
+  ComputeOneBatch();
+}
+
+
+void BatchedXvectorComputer::ComputeOneBatch() {
+
+  CuMatrix<BaseFloat> cu_input_feats(input_feats_);
+  Nnet *nnet_to_update = NULL;  // we're not doing any update.
+  NnetComputer computer(opts_.compute_config, *computation_,
+                        nnet_, nnet_to_update);
+  computer.AcceptInput("input", &cu_input_feats);
+  computer.Run();
+  CuMatrix<BaseFloat> cu_output;
+  computer.GetOutputDestructive("output", &cu_output);
+  KALDI_ASSERT(cu_output.NumRows() == opts_.batch_size);
+  Matrix<BaseFloat> output(cu_output);
+  for (int32 n = 0; n < opts_.batch_size; n++) {
+    XvectorTask *task = tasks_this_batch_[n];
+    if (task == NULL)
+      continue;  // Would only happen for the last batch.
+    task->num_chunks_finished++;
+    task->xvector.AddVec(1.0 / task->num_chunks, output.Row(n));
+  }
+  position_in_batch_ = 0;
+  std::fill(tasks_this_batch_.begin(), tasks_this_batch_.end(),
+            (XvectorTask*)NULL);
+}
+
+void BatchedXvectorComputer::AcceptUtterance(
+    const std::string &utt,
+    const Matrix<BaseFloat> &input) {
+  std::vector<int32> chunk_starts;
+  int32 num_frames = input.NumRows();
+  SplitUtteranceIntoChunks(num_frames, &chunk_starts);
+  int32 num_chunks = chunk_starts.size();
+  XvectorTask *task = CreateTask(utt, num_chunks);
+
+  for (int32 i = 0; i < num_chunks; i++) {
+    AddChunkToBatch(task, input, chunk_starts[i]);
+    if (position_in_batch_ == opts_.batch_size) {
+      ComputeOneBatch();
+    }
+  }
+}
+
+void BatchedXvectorComputer::SplitUtteranceIntoChunks(
+    int32 num_frames, std::vector<int32> *start_frames) {
+  start_frames->clear();
+  if (num_frames <= opts_.chunk_size) {
+    if (num_frames == opts_.chunk_size || opts_.pad_input)
+      start_frames->push_back(0);
+    // if we leave start_frames empty, then we just won't compute anything for
+    // this file.
+  } else {
+    // these modified quantities are to account for the context effects...  when
+    // the chunks overlap by exactly total_context_, the frames that get
+    // averaged by the respective chunks in their averaging layers would touch
+    // but not overlap.  So the optimal separation between chunks would equal
+    // opts_.chunk_size - total_context_.
+    int32 modified_num_frames = num_frames - total_context_,
+        modified_chunk_size = opts_.chunk_size - total_context_;
+    KALDI_ASSERT(modified_num_frames > modified_chunk_size);
+    int32 num_chunks1 = modified_num_frames / modified_chunk_size,
+        num_chunks2 = num_chunks1 + 1;
+    int32 num_frames1 = num_chunks1 * modified_chunk_size,
+        num_frames2 = num_chunks2 * modified_chunk_size;
+    KALDI_ASSERT(num_frames2 > modified_chunk_size);
+    // The M and N below correspond to the M and N in the comment:
+    // M is the number of frames repeated once in the averaging, N
+    // the number of frames repeated twice.  (Basically a solution
+    // of the equations: (M + 2N == num_frames2, M+N == modified_num_frames).
+    // Note: by a "frame" above, I mean a specific "t" value in
+    // the utterance.
+    int32 N = num_frames2 - modified_num_frames,
+        M = modified_num_frames - N;
+    KALDI_ASSERT(M + 2*N == num_frames2 && M + N == modified_num_frames);
+
+    // The variances below are proportional to the variance of our
+    // estimate of the xvector under certain simplifying assumptions..
+    // they help us choose whether to have gaps between the chunks
+    // or overlaps between them.
+    BaseFloat variance1 = 1.0 / num_frames1,  // the 1/M mentioned above.
+        variance2 = (M + 4.0*N) / ((M + 2.0*N)*(M + 2.0*N));
+    if (variance1 <= variance2) {
+      // We'll choose the smaller number of chunks.  There may be gaps.
+      // Counting the positions at the ends, there are num_chunks+1 positions
+      // where there might be gaps.
+      // Note: "total_gap" is >= 0, it's the positive of the sum of the
+      // sizes of those gaps.
+      int32 num_chunks = num_chunks1,
+          num_gaps = num_chunks + 1,
+          total_gap = modified_num_frames - num_chunks * modified_chunk_size;
+      KALDI_ASSERT(0 <= total_gap && total_gap < modified_chunk_size);
+      std::vector<int32> gap_sizes;  // elements will be >= 0.
+      DivideIntoPieces(total_gap, num_gaps, &gap_sizes);
+      int32 pos = gap_sizes[0];
+      for (int32 i = 0; i < num_chunks; i++) {
+        start_frames->push_back(pos);
+        pos += modified_chunk_size + gap_sizes[i + 1];
+      }
+      KALDI_ASSERT(pos == modified_num_frames);
+    } else {
+      int32 num_chunks = num_chunks2,
+          num_overlaps = num_chunks - 1,
+          total_overlap = modified_num_frames - num_chunks * modified_chunk_size;
+      KALDI_ASSERT( -modified_chunk_size < total_overlap && total_overlap <= 0 );
+      std::vector<int32> overlap_sizes;  // elements will be <= 0.
+      DivideIntoPieces(total_overlap, num_overlaps, &overlap_sizes);
+      int32 pos = 0;
+      for (int32 i = 0; i < num_chunks; i++) {
+        start_frames->push_back(pos);
+        pos += modified_chunk_size;
+        if (i < num_overlaps)
+          pos += overlap_sizes[i];
+      }
+      KALDI_ASSERT(pos == modified_num_frames);
+    }
+  }
+}
+
+
+} // namespace nnet3
+} // namespace kaldi
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace kaldi::nnet3;
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+
+    const char *usage =
+        "Propagate features through an xvector neural network model and write\n"
+        "the output vectors.  \"Xvector\" is our term for a vector or\n"
+        "embedding which is the output of a particular type of neural network\n"
+        "architecture found in speaker recognition.  This architecture\n"
+        "consists of several layers that operate on frames, a statistics\n"
+        "pooling layer that aggregates over the frame-level representations\n"
+        "and possibly additional layers that operate on segment-level\n"
+        "representations.  The xvectors are generally extracted from an\n"
+        "output layer after the statistics pooling layer.  By default, one\n"
+        "xvector is extracted directly from the set of features for each\n"
+        "utterance.  Optionally, xvectors are extracted from chunks of input\n"
+        "features and averaged, to produce a single vector.\n"
+        "\n"
+        "Usage: nnet3-xvector-compute [options] <raw-nnet-in> "
+        "<features-rspecifier> <vector-wspecifier>\n"
+        "e.g.: nnet3-xvector-compute final.raw scp:feats.scp "
+        "ark:nnet_prediction.ark\n"
+        "See also: nnet3-compute\n";
+
+    ParseOptions po(usage);
+    Timer timer;
+
+    BatchedXvectorComputerOptions opts;
+
+    std::string use_gpu = "no";
+
+    opts.Register(&po);
+
+    po.Register("use-gpu", &use_gpu,
+      "yes|no|optional|wait, only has effect if compiled with CUDA");
+
+#if HAVE_CUDA==1
+    CuDevice::RegisterDeviceOptions(&po);
+#endif
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 3) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+#if HAVE_CUDA==1
+    CuDevice::Instantiate().SelectGpuId(use_gpu);
+#endif
+
+    std::string nnet_rxfilename = po.GetArg(1),
+                feature_rspecifier = po.GetArg(2),
+                vector_wspecifier = po.GetArg(3);
+
+    Nnet nnet;
+    ReadKaldiObject(nnet_rxfilename, &nnet);
+    SetBatchnormTestMode(true, &nnet);
+    SetDropoutTestMode(true, &nnet);
+    CollapseModel(CollapseModelConfig(), &nnet);
+
+    int32 total_context;
+    {
+      int32 left_context, right_context;
+      // Compute left_context, right_context as the 'real' left/right context
+      // of the network; they'll tell us how many frames on the chunk boundaries
+      // won't really participate in the statistics averaging.
+      // SetRequireDirectInput()  modifies how the StatisticsPoolingComponent
+      // treats its dependences, so we'll get the 'real' left/right context.
+      SetRequireDirectInput(true, &nnet);
+      ComputeSimpleNnetContext(nnet, &left_context, &right_context);
+      KALDI_LOG << "Left/right context is " << left_context << ", "
+                << right_context;
+      SetRequireDirectInput(false, &nnet);
+      total_context = left_context + right_context;
+    }
+
+    BatchedXvectorComputer computer(opts, nnet, total_context);
+    BaseFloatVectorWriter vector_writer(vector_wspecifier);
+
+    int32 num_utts_read = 0, num_xvectors_written = 0;
+    int64 frame_count = 0;
+
+    SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
+
+    for (; !feature_reader.Done(); feature_reader.Next()) {
+      std::string utt = feature_reader.Key();
+      const Matrix<BaseFloat> &features (feature_reader.Value());
+      if (features.NumRows() == 0) {
+        KALDI_WARN << "Zero-length utterance: " << utt;
+        continue;
+      }
+
+      frame_count += features.NumRows();
+
+      computer.AcceptUtterance(utt, features);
+      num_utts_read++;
+
+      while (computer.XvectorReady()) {
+        std::string utt;
+        Vector<BaseFloat> xvector;
+        computer.OutputXvector(&utt, &xvector);
+        vector_writer.Write(utt, xvector);
+        num_xvectors_written++;
+      }
+    }
+
+    computer.Flush();
+    while (computer.XvectorReady()) {
+      std::string utt;
+      Vector<BaseFloat> xvector;
+      computer.OutputXvector(&utt, &xvector);
+      vector_writer.Write(utt, xvector);
+      num_xvectors_written++;
+    }
+
+
+#if HAVE_CUDA==1
+    CuDevice::Instantiate().PrintProfile();
+#endif
+    double elapsed = timer.Elapsed();
+    KALDI_LOG << "Time taken "<< elapsed
+              << "s: real-time factor assuming 100 frames/sec is "
+              << (elapsed*100.0/frame_count);
+    KALDI_LOG << "Read " << num_utts_read << " utterances, wrote "
+              << num_xvectors_written << " xvectors.";
+
+    // Note: the following rule does something reasonable even if there are 0, 1
+    // or 2 utterances read.
+    if (num_xvectors_written > num_utts_read / 2)
+      return 0;
+    else
+      return 1;
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/online/online-audio-source.cc b/src/online/online-audio-source.cc
index 4f4c0ef5cf5..4516b313d2e 100644
--- a/src/online/online-audio-source.cc
+++ b/src/online/online-audio-source.cc
@@ -177,12 +177,14 @@ bool OnlineVectorSource::Read(Vector<BaseFloat> *data) {
     if (data->Dim() == subsrc.Dim()) {
       data->CopyFromVec(subsrc);
     } else {
+      data->Resize(n_elem);
       for (int32 i = 0; i < subsrc.Dim(); ++i)
         (*data)(i) = subsrc(i);
     }
     pos_ += n_elem;
+    return true;
   }
-  return (pos_ < src_.Dim());
+  return false;
 }
 
 }  // namespace kaldi
diff --git a/src/online/online-tcp-source.cc b/src/online/online-tcp-source.cc
index 870390f4a33..6d63493b4bd 100644
--- a/src/online/online-tcp-source.cc
+++ b/src/online/online-tcp-source.cc
@@ -140,6 +140,7 @@ bool OnlineTcpVectorSource::Read(Vector<BaseFloat> *data) {
   int32 n_read = b_read / 2;
 
   short* s_frame = (short*) frame;
+  data->Resize(n_read);
   for (int32 i = 0; i < n_read; i++)
     (*data)(i) = s_frame[i];
 
diff --git a/src/online2/Makefile b/src/online2/Makefile
index 242c7be6da6..177babef5fe 100644
--- a/src/online2/Makefile
+++ b/src/online2/Makefile
@@ -9,7 +9,8 @@ OBJFILES = online-gmm-decodable.o online-feature-pipeline.o online-ivector-featu
            online-nnet2-feature-pipeline.o online-gmm-decoding.o online-timing.o \
            online-endpoint.o onlinebin-util.o online-speex-wrapper.o \
            online-nnet2-decoding.o online-nnet2-decoding-threaded.o \
-           online-nnet3-decoding.o
+           online-nnet3-decoding.o online-nnet3-incremental-decoding.o \
+           online-nnet3-wake-word-faster-decoder.o
 
 LIBNAME = kaldi-online2
 
diff --git a/src/online2/online-endpoint.cc b/src/online2/online-endpoint.cc
index aa7752c4484..ec4a7456d22 100644
--- a/src/online2/online-endpoint.cc
+++ b/src/online2/online-endpoint.cc
@@ -71,10 +71,10 @@ bool EndpointDetected(const OnlineEndpointConfig &config,
   return false;
 }
 
-template <typename FST>
+template <typename DEC>
 int32 TrailingSilenceLength(const TransitionModel &tmodel,
                             const std::string &silence_phones_str,
-                            const LatticeFasterOnlineDecoderTpl<FST> &decoder) {
+                            const DEC &decoder) {
   std::vector<int32> silence_phones;
   if (!SplitStringToIntegers(silence_phones_str, ":", false, &silence_phones))
     KALDI_ERR << "Bad --silence-phones option in endpointing config: "
@@ -87,7 +87,7 @@ int32 TrailingSilenceLength(const TransitionModel &tmodel,
   ConstIntegerSet<int32> silence_set(silence_phones);
 
   bool use_final_probs = false;
-  typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator iter =
+  typename DEC::BestPathIterator iter =
       decoder.BestPathEnd(use_final_probs, NULL);
   int32 num_silence_frames = 0;
   while (!iter.Done()) {  // we're going backwards in time from the most
@@ -106,12 +106,12 @@ int32 TrailingSilenceLength(const TransitionModel &tmodel,
   return num_silence_frames;
 }
 
-template <typename FST>
+template <typename DEC>
 bool EndpointDetected(
     const OnlineEndpointConfig &config,
     const TransitionModel &tmodel,
     BaseFloat frame_shift_in_seconds,
-    const LatticeFasterOnlineDecoderTpl<FST> &decoder) {
+    const DEC &decoder) {
   if (decoder.NumFramesDecoded() == 0) return false;
 
   BaseFloat final_relative_cost = decoder.FinalRelativeCost();
@@ -129,7 +129,7 @@ bool EndpointDetected(
 // Instantiate EndpointDetected for the types we need.
 // It will require TrailingSilenceLength so we don't have to instantiate that.
 template
-bool EndpointDetected<fst::Fst<fst::StdArc> >(
+bool EndpointDetected<LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> > >(
     const OnlineEndpointConfig &config,
     const TransitionModel &tmodel,
     BaseFloat frame_shift_in_seconds,
@@ -137,11 +137,27 @@ bool EndpointDetected<fst::Fst<fst::StdArc> >(
 
 
 template
-bool EndpointDetected<fst::GrammarFst>(
+bool EndpointDetected<LatticeFasterOnlineDecoderTpl<fst::GrammarFst> >(
     const OnlineEndpointConfig &config,
     const TransitionModel &tmodel,
     BaseFloat frame_shift_in_seconds,
     const LatticeFasterOnlineDecoderTpl<fst::GrammarFst> &decoder);
 
+template
+bool EndpointDetected<LatticeIncrementalOnlineDecoderTpl<fst::Fst<fst::StdArc> > >(
+    const OnlineEndpointConfig &config,
+    const TransitionModel &tmodel,
+    BaseFloat frame_shift_in_seconds,
+    const LatticeIncrementalOnlineDecoderTpl<fst::Fst<fst::StdArc> > &decoder);
+
+
+template
+bool EndpointDetected<LatticeIncrementalOnlineDecoderTpl<fst::GrammarFst> >(
+    const OnlineEndpointConfig &config,
+    const TransitionModel &tmodel,
+    BaseFloat frame_shift_in_seconds,
+    const LatticeIncrementalOnlineDecoderTpl<fst::GrammarFst> &decoder);
+
+
 
 }  // namespace kaldi
diff --git a/src/online2/online-endpoint.h b/src/online2/online-endpoint.h
index aaf9232db13..cdceb04680a 100644
--- a/src/online2/online-endpoint.h
+++ b/src/online2/online-endpoint.h
@@ -35,6 +35,7 @@
 #include "lat/kaldi-lattice.h"
 #include "hmm/transition-model.h"
 #include "decoder/lattice-faster-online-decoder.h"
+#include "decoder/lattice-incremental-online-decoder.h"
 
 namespace kaldi {
 /// @addtogroup  onlinedecoding OnlineDecoding
@@ -128,7 +129,7 @@ struct OnlineEndpointConfig {
                               /// that we consider as silence for purposes of
                               /// endpointing.
 
-  /// We support four rules.  We terminate decoding if ANY of these rules
+  /// We support five rules.  We terminate decoding if ANY of these rules
   /// evaluates to "true". If you want to add more rules, do it by changing this
   /// code.  If you want to disable a rule, you can set the silence-timeout for
   /// that rule to a very large number.
@@ -187,21 +188,20 @@ bool EndpointDetected(const OnlineEndpointConfig &config,
 /// integer id's of phones that we consider silence.  We use the the
 /// BestPathEnd() and TraceBackOneLink() functions of LatticeFasterOnlineDecoder
 /// to do this efficiently.
-template <typename FST>
+template <typename DEC>
 int32 TrailingSilenceLength(const TransitionModel &tmodel,
                             const std::string &silence_phones,
-                            const LatticeFasterOnlineDecoderTpl<FST> &decoder);
+                            const DEC &decoder);
 
 
 /// This is a higher-level convenience function that works out the
 /// arguments to the EndpointDetected function above, from the decoder.
-template <typename FST>
+template <typename DEC>
 bool EndpointDetected(
     const OnlineEndpointConfig &config,
     const TransitionModel &tmodel,
     BaseFloat frame_shift_in_seconds,
-    const LatticeFasterOnlineDecoderTpl<FST> &decoder);
-
+    const DEC &decoder);
 
 
 
diff --git a/src/online2/online-ivector-feature.cc b/src/online2/online-ivector-feature.cc
index 32a4db70097..4d4436e259a 100644
--- a/src/online2/online-ivector-feature.cc
+++ b/src/online2/online-ivector-feature.cc
@@ -67,6 +67,15 @@ void OnlineIvectorExtractionInfo::Init(
   this->Check();
 }
 
+int32 OnlineIvectorExtractionInfo::ExpectedFeatureDim() const {
+  int32 num_splice = 1 + splice_opts.left_context + splice_opts.right_context,
+      full_dim = lda_mat.NumCols();
+  if (!(full_dim % num_splice == 0 || full_dim % num_splice == 1)){
+    KALDI_WARN << "Error getting expected feature dimension: full-dim = "
+               << full_dim << ", num-splice = " << num_splice;
+  }
+  return full_dim / num_splice;
+}
 
 void OnlineIvectorExtractionInfo::Check() const {
   KALDI_ASSERT(global_cmvn_stats.NumRows() == 2);
@@ -519,6 +528,57 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(
   }
 }
 
+template <typename FST>
+void OnlineSilenceWeighting::ComputeCurrentTraceback(
+    const LatticeIncrementalOnlineDecoderTpl<FST> &decoder) {
+  int32 num_frames_decoded = decoder.NumFramesDecoded(),
+      num_frames_prev = frame_info_.size();
+  // note, num_frames_prev is not the number of frames previously decoded,
+  // it's the generally-larger number of frames that we were requested to
+  // provide weights for.
+  if (num_frames_prev < num_frames_decoded)
+    frame_info_.resize(num_frames_decoded);
+  if (num_frames_prev > num_frames_decoded &&
+      frame_info_[num_frames_decoded].transition_id != -1)
+    KALDI_ERR << "Number of frames decoded decreased";  // Likely bug
+
+  if (num_frames_decoded == 0)
+    return;
+  int32 frame = num_frames_decoded - 1;
+  bool use_final_probs = false;
+  typename LatticeIncrementalOnlineDecoderTpl<FST>::BestPathIterator iter =
+      decoder.BestPathEnd(use_final_probs, NULL);
+  while (frame >= 0) {
+    LatticeArc arc;
+    arc.ilabel = 0;
+    while (arc.ilabel == 0)  // the while loop skips over input-epsilons
+      iter = decoder.TraceBackBestPath(iter, &arc);
+    // note, the iter.frame values are slightly unintuitively defined,
+    // they are one less than you might expect.
+    KALDI_ASSERT(iter.frame == frame - 1);
+
+    if (frame_info_[frame].token == iter.tok) {
+      // we know that the traceback from this point back will be identical, so
+      // no point tracing back further.  Note: we are comparing memory addresses
+      // of tokens of the decoder; this guarantees it's the same exact token,
+      // because tokens, once allocated on a frame, are only deleted, never
+      // reallocated for that frame.
+      break;
+    }
+
+    if (num_frames_output_and_correct_ > frame)
+      num_frames_output_and_correct_ = frame;
+
+    frame_info_[frame].token = iter.tok;
+    frame_info_[frame].transition_id = arc.ilabel;
+    frame--;
+    // leave frame_info_.current_weight at zero for now (as set in the
+    // constructor), reflecting that we haven't already output a weight for that
+    // frame.
+  }
+}
+
+
 // Instantiate the template OnlineSilenceWeighting::ComputeCurrentTraceback().
 template
 void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::Fst<fst::StdArc> >(
@@ -526,6 +586,13 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::Fst<fst::StdArc> >(
 template
 void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::GrammarFst>(
     const LatticeFasterOnlineDecoderTpl<fst::GrammarFst> &decoder);
+template
+void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::Fst<fst::StdArc> >(
+    const LatticeIncrementalOnlineDecoderTpl<fst::Fst<fst::StdArc> > &decoder);
+template
+void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::GrammarFst>(
+    const LatticeIncrementalOnlineDecoderTpl<fst::GrammarFst> &decoder);
+
 
 void OnlineSilenceWeighting::GetDeltaWeights(
     int32 num_frames_ready, int32 first_decoder_frame,
diff --git a/src/online2/online-ivector-feature.h b/src/online2/online-ivector-feature.h
index 12bc5c6bb2f..69bf30bb0b3 100644
--- a/src/online2/online-ivector-feature.h
+++ b/src/online2/online-ivector-feature.h
@@ -33,6 +33,7 @@
 #include "feat/online-feature.h"
 #include "ivector/ivector-extractor.h"
 #include "decoder/lattice-faster-online-decoder.h"
+#include "decoder/lattice-incremental-online-decoder.h"
 
 namespace kaldi {
 /// @addtogroup  onlinefeat OnlineFeatureExtraction
@@ -193,6 +194,8 @@ struct OnlineIvectorExtractionInfo {
 
   void Init(const OnlineIvectorExtractionConfig &config);
 
+  int32 ExpectedFeatureDim() const;
+
   // This constructor creates a version of this object where everything
   // is empty or zero.
   OnlineIvectorExtractionInfo();
@@ -480,6 +483,8 @@ class OnlineSilenceWeighting {
   // It will be instantiated for FST == fst::Fst<fst::StdArc> and fst::GrammarFst.
   template <typename FST>
   void ComputeCurrentTraceback(const LatticeFasterOnlineDecoderTpl<FST> &decoder);
+  template <typename FST>
+  void ComputeCurrentTraceback(const LatticeIncrementalOnlineDecoderTpl<FST> &decoder);
 
   // Calling this function gets the changes in weight that require us to modify
   // the stats... the output format is (frame-index, delta-weight).
diff --git a/src/online2/online-nnet2-feature-pipeline.cc b/src/online2/online-nnet2-feature-pipeline.cc
index b291ba92d98..67f42754db4 100644
--- a/src/online2/online-nnet2-feature-pipeline.cc
+++ b/src/online2/online-nnet2-feature-pipeline.cc
@@ -241,5 +241,17 @@ BaseFloat OnlineNnet2FeaturePipelineInfo::FrameShiftInSeconds() const {
   }
 }
 
+BaseFloat OnlineNnet2FeaturePipelineInfo::GetSamplingFrequency() {
+  if (feature_type == "mfcc") {
+    return mfcc_opts.frame_opts.samp_freq;
+  } else if (feature_type == "plp") {
+    return plp_opts.frame_opts.samp_freq;
+  } else if (feature_type == "fbank") {
+    return fbank_opts.frame_opts.samp_freq;
+  } else {
+    KALDI_ERR << "Unknown feature type " << feature_type;
+  }
+  return 0.0f; // avoiding a possible "return missing" warning
+}
 
 }  // namespace kaldi
diff --git a/src/online2/online-nnet2-feature-pipeline.h b/src/online2/online-nnet2-feature-pipeline.h
index 6275378823a..3e4be736416 100644
--- a/src/online2/online-nnet2-feature-pipeline.h
+++ b/src/online2/online-nnet2-feature-pipeline.h
@@ -176,6 +176,9 @@ struct OnlineNnet2FeaturePipelineInfo {
   /// on the command line instead of inside sub-config-files.
   OnlineSilenceWeightingConfig silence_weighting_config;
 
+  /// Returns the frequency expected by the model
+  BaseFloat GetSamplingFrequency();
+
   int32 IvectorDim() { return ivector_extractor_info.extractor.IvectorDim(); }
  private:
   KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineNnet2FeaturePipelineInfo);
diff --git a/src/online2/online-nnet3-incremental-decoding.cc b/src/online2/online-nnet3-incremental-decoding.cc
new file mode 100644
index 00000000000..5e7acf147ee
--- /dev/null
+++ b/src/online2/online-nnet3-incremental-decoding.cc
@@ -0,0 +1,75 @@
+// online2/online-nnet3-incremental-decoding.cc
+
+// Copyright      2019  Zhehuai Chen
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "online2/online-nnet3-incremental-decoding.h"
+#include "lat/lattice-functions.h"
+#include "lat/determinize-lattice-pruned.h"
+#include "decoder/grammar-fst.h"
+
+namespace kaldi {
+
+template <typename FST>
+SingleUtteranceNnet3IncrementalDecoderTpl<FST>::SingleUtteranceNnet3IncrementalDecoderTpl(
+    const LatticeIncrementalDecoderConfig &decoder_opts,
+    const TransitionModel &trans_model,
+    const nnet3::DecodableNnetSimpleLoopedInfo &info,
+    const FST &fst,
+    OnlineNnet2FeaturePipeline *features):
+    decoder_opts_(decoder_opts),
+    input_feature_frame_shift_in_seconds_(features->FrameShiftInSeconds()),
+    trans_model_(trans_model),
+    decodable_(trans_model_, info,
+               features->InputFeature(), features->IvectorFeature()),
+    decoder_(fst, trans_model, decoder_opts_) {
+  decoder_.InitDecoding();
+}
+
+template <typename FST>
+void SingleUtteranceNnet3IncrementalDecoderTpl<FST>::InitDecoding(int32 frame_offset) {
+  decoder_.InitDecoding();
+  decodable_.SetFrameOffset(frame_offset);
+}
+
+template <typename FST>
+void SingleUtteranceNnet3IncrementalDecoderTpl<FST>::AdvanceDecoding() {
+  decoder_.AdvanceDecoding(&decodable_);
+}
+
+template <typename FST>
+void SingleUtteranceNnet3IncrementalDecoderTpl<FST>::GetBestPath(bool end_of_utterance,
+                                              Lattice *best_path) const {
+  decoder_.GetBestPath(best_path, end_of_utterance);
+}
+
+template <typename FST>
+bool SingleUtteranceNnet3IncrementalDecoderTpl<FST>::EndpointDetected(
+    const OnlineEndpointConfig &config) {
+  BaseFloat output_frame_shift =
+      input_feature_frame_shift_in_seconds_ *
+      decodable_.FrameSubsamplingFactor();
+  return kaldi::EndpointDetected(config, trans_model_,
+                                 output_frame_shift, decoder_);
+}
+
+
+// Instantiate the template for the types needed.
+template class SingleUtteranceNnet3IncrementalDecoderTpl<fst::Fst<fst::StdArc> >;
+template class SingleUtteranceNnet3IncrementalDecoderTpl<fst::GrammarFst>;
+
+}  // namespace kaldi
diff --git a/src/online2/online-nnet3-incremental-decoding.h b/src/online2/online-nnet3-incremental-decoding.h
new file mode 100644
index 00000000000..e407cc2be2b
--- /dev/null
+++ b/src/online2/online-nnet3-incremental-decoding.h
@@ -0,0 +1,148 @@
+// online2/online-nnet3-incremental-decoding.h
+
+// Copyright      2019  Zhehuai Chen
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_ONLINE2_ONLINE_NNET3_INCREMENTAL_DECODING_H_
+#define KALDI_ONLINE2_ONLINE_NNET3_INCREMENTAL_DECODING_H_
+
+#include <string>
+#include <vector>
+#include <deque>
+
+#include "nnet3/decodable-online-looped.h"
+#include "matrix/matrix-lib.h"
+#include "util/common-utils.h"
+#include "base/kaldi-error.h"
+#include "itf/online-feature-itf.h"
+#include "online2/online-endpoint.h"
+#include "online2/online-nnet2-feature-pipeline.h"
+#include "decoder/lattice-incremental-online-decoder.h"
+#include "hmm/transition-model.h"
+#include "hmm/posterior.h"
+
+namespace kaldi {
+/// @addtogroup  onlinedecoding OnlineDecoding
+/// @{
+
+
+/**
+   You will instantiate this class when you want to decode a single utterance
+   using the online-decoding setup for neural nets.  The template will be
+   instantiated only for FST = fst::Fst<fst::StdArc> and FST = fst::GrammarFst.
+*/
+
+template <typename FST>
+class SingleUtteranceNnet3IncrementalDecoderTpl {
+ public:
+
+  // Constructor. The pointer 'features' is not being given to this class to own
+  // and deallocate, it is owned externally.
+  SingleUtteranceNnet3IncrementalDecoderTpl(const LatticeIncrementalDecoderConfig &decoder_opts,
+                                            const TransitionModel &trans_model,
+                                            const nnet3::DecodableNnetSimpleLoopedInfo &info,
+                                            const FST &fst,
+                                            OnlineNnet2FeaturePipeline *features);
+
+  /// Initializes the decoding and sets the frame offset of the underlying
+  /// decodable object. This method is called by the constructor. You can also
+  /// call this method when you want to reset the decoder state, but want to
+  /// keep using the same decodable object, e.g. in case of an endpoint.
+  void InitDecoding(int32 frame_offset = 0);
+
+  /// Advances the decoding as far as we can.
+  void AdvanceDecoding();
+
+  /// Finalizes the decoding. Cleans up and prunes remaining tokens, so the
+  /// GetLattice() call will return faster.  You must not call this before
+  /// calling (TerminateDecoding() or InputIsFinished()) and then Wait().
+  void FinalizeDecoding() { decoder_.FinalizeDecoding(); }
+
+  int32 NumFramesDecoded() const { return decoder_.NumFramesDecoded(); }
+
+  int32 NumFramesInLattice() const { return decoder_.NumFramesInLattice(); }
+
+  /* Gets the lattice.  The output lattice has any acoustic scaling in it
+     (which will typically be desirable in an online-decoding context); if you
+     want an un-scaled lattice, scale it using ScaleLattice() with the inverse
+     of the acoustic weight.
+
+         @param [in] num_frames_to_include  The number of frames you want
+                  to be included in the lattice.  Must be in the range
+                  [NumFramesInLattice().. NumFramesDecoded()].  If you
+                  make it a few frames less than NumFramesDecoded(), it
+                  will save significant computation.
+         @param [in] use_final_probs   True if you want the lattice to
+                  contain final-probs (if at least one state was final
+                  on the most recently decoded frame).  Must be false
+                  if num_frames_to_include < NumFramesDecoded().
+                  Must be true if you have previously called
+                  FinalizeDecoding().
+  */
+  const CompactLattice &GetLattice(int32 num_frames_to_include,
+                                      bool use_final_probs = false) {
+    return decoder_.GetLattice(num_frames_to_include, use_final_probs);
+  }
+
+
+
+
+
+  /// Outputs an FST corresponding to the single best path through the current
+  /// lattice. If "use_final_probs" is true AND we reached the final-state of
+  /// the graph then it will include those as final-probs, else it will treat
+  /// all final-probs as one.
+  void GetBestPath(bool end_of_utterance,
+                   Lattice *best_path) const;
+
+
+  /// This function calls EndpointDetected from online-endpoint.h,
+  /// with the required arguments.
+  bool EndpointDetected(const OnlineEndpointConfig &config);
+
+  const LatticeIncrementalOnlineDecoderTpl<FST> &Decoder() const { return decoder_; }
+
+  ~SingleUtteranceNnet3IncrementalDecoderTpl() { }
+ private:
+
+  const LatticeIncrementalDecoderConfig &decoder_opts_;
+
+  // this is remembered from the constructor; it's ultimately
+  // derived from calling FrameShiftInSeconds() on the feature pipeline.
+  BaseFloat input_feature_frame_shift_in_seconds_;
+
+  // we need to keep a reference to the transition model around only because
+  // it's needed by the endpointing code.
+  const TransitionModel &trans_model_;
+
+  nnet3::DecodableAmNnetLoopedOnline decodable_;
+
+  LatticeIncrementalOnlineDecoderTpl<FST> decoder_;
+
+};
+
+
+typedef SingleUtteranceNnet3IncrementalDecoderTpl<fst::Fst<fst::StdArc> > SingleUtteranceNnet3IncrementalDecoder;
+
+/// @} End of "addtogroup onlinedecoding"
+
+}  // namespace kaldi
+
+
+
+#endif  // KALDI_ONLINE2_ONLINE_NNET3_DECODING_H_
diff --git a/src/online2/online-nnet3-wake-word-faster-decoder.cc b/src/online2/online-nnet3-wake-word-faster-decoder.cc
new file mode 100644
index 00000000000..6e2dbe0896d
--- /dev/null
+++ b/src/online2/online-nnet3-wake-word-faster-decoder.cc
@@ -0,0 +1,205 @@
+// online2/online-nnet3-wake-word-faster-decoder.cc
+
+// Copyright 2019  Daniel Povey
+//           2019  Yiming Wang
+
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "base/timer.h"
+#include "online-nnet3-wake-word-faster-decoder.h"
+#include "fstext/fstext-utils.h"
+#include "hmm/hmm-utils.h"
+
+namespace kaldi {
+
+void OnlineWakeWordFasterDecoder::InitDecoding() {
+  ClearToks(toks_.Clear());
+  StateId start_state = fst_.Start();
+  KALDI_ASSERT(start_state != fst::kNoStateId);
+  Arc dummy_arc(0, 0, Weight::One(), start_state);
+  Token *dummy_token = new Token(dummy_arc, NULL);
+  toks_.Insert(start_state, dummy_token);
+  ProcessNonemitting(std::numeric_limits<float>::max());
+  num_frames_decoded_ = 0;
+  prev_immortal_tok_ = immortal_tok_ = dummy_token;
+}
+
+
+void OnlineWakeWordFasterDecoder::MakeLattice(const Token *start,
+    const Token *end, fst::MutableFst<LatticeArc> *out_fst) const {
+  out_fst->DeleteStates();
+  if (start == NULL) return;
+  bool is_final = false;
+  double this_cost = start->cost_ + fst_.Final(start->arc_.nextstate).Value();
+  if (this_cost != std::numeric_limits<double>::infinity())
+    is_final = true;
+  std::vector<LatticeArc> arcs_reverse;  // arcs in reverse order.
+  for (const Token *tok = start; tok != end; tok = tok->prev_) {
+    BaseFloat tot_cost = tok->cost_ -
+        (tok->prev_ ? tok->prev_->cost_ : 0.0),
+        graph_cost = tok->arc_.weight.Value(),
+        ac_cost = tot_cost - graph_cost;
+    LatticeArc l_arc(tok->arc_.ilabel,
+                     tok->arc_.olabel,
+                     LatticeWeight(graph_cost, ac_cost),
+                     tok->arc_.nextstate);
+    arcs_reverse.push_back(l_arc);
+  }
+  if(arcs_reverse.back().nextstate == fst_.Start()) {
+    arcs_reverse.pop_back();  // that was a "fake" token... gives no info.
+  }
+  StateId cur_state = out_fst->AddState();
+  out_fst->SetStart(cur_state);
+  for (ssize_t i = static_cast<ssize_t>(arcs_reverse.size())-1; i >= 0; i--) {
+    LatticeArc arc = arcs_reverse[i];
+    arc.nextstate = out_fst->AddState();
+    out_fst->AddArc(cur_state, arc);
+    cur_state = arc.nextstate;
+  }
+  if (is_final) {
+    Weight final_weight = fst_.Final(start->arc_.nextstate);
+    out_fst->SetFinal(cur_state, LatticeWeight(final_weight.Value(), 0.0));
+  } else {
+    out_fst->SetFinal(cur_state, LatticeWeight::One());
+  }
+  RemoveEpsLocal(out_fst);
+}
+
+
+void OnlineWakeWordFasterDecoder::UpdateImmortalToken() {
+  unordered_set<Token*> emitting;
+  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
+    Token* tok = e->val;
+    while (tok != NULL && tok->arc_.ilabel == 0) //deal with non-emitting ones ...
+      tok = tok->prev_;
+    if (tok != NULL)
+      emitting.insert(tok);
+  }
+  Token* the_one = NULL;
+  while (1) {
+    if (emitting.size() == 1) {
+      the_one = *(emitting.begin());
+      break;
+    }
+    if (emitting.size() == 0)
+      break;
+    unordered_set<Token*> prev_emitting;
+    unordered_set<Token*>::iterator it;
+    for (it = emitting.begin(); it != emitting.end(); ++it) {
+      Token* tok = *it;
+      Token* prev_token = tok->prev_;
+      while ((prev_token != NULL) && (prev_token->arc_.ilabel == 0))
+        prev_token = prev_token->prev_; //deal with non-emitting ones
+      if (prev_token == NULL)
+        continue;
+      prev_emitting.insert(prev_token);
+    } // for
+    emitting = prev_emitting;
+  } // while
+  if (the_one != NULL) {
+    prev_immortal_tok_ = immortal_tok_;
+    immortal_tok_ = the_one;
+    return;
+  }
+}
+
+
+bool OnlineWakeWordFasterDecoder::PartialTraceback(
+    fst::MutableFst<LatticeArc> *out_fst) {
+  UpdateImmortalToken();
+  if(immortal_tok_ == prev_immortal_tok_)
+    return false; //no partial traceback at that point of time
+  MakeLattice(immortal_tok_, prev_immortal_tok_, out_fst);
+  return true;
+}
+
+
+void OnlineWakeWordFasterDecoder::FinishTraceBack(
+    fst::MutableFst<LatticeArc> *out_fst) {
+  Token *best_tok = NULL;
+  bool is_final = ReachedFinal();
+  if (!is_final) {
+    for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail)
+      if (best_tok == NULL || *best_tok < *(e->val) )
+        best_tok = e->val;
+  } else {
+    double best_cost = std::numeric_limits<double>::infinity();
+    for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
+      double this_cost = e->val->cost_ + fst_.Final(e->key).Value();
+      if (this_cost != std::numeric_limits<double>::infinity() &&
+          this_cost < best_cost) {
+        best_cost = this_cost;
+        best_tok = e->val;
+      }
+    }
+  }
+  MakeLattice(best_tok, immortal_tok_, out_fst);
+}
+
+
+void OnlineWakeWordFasterDecoder::TracebackNFrames(int32 nframes,
+    fst::MutableFst<LatticeArc> *out_fst) {
+  Token *best_tok = NULL;
+  for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail)
+    if (best_tok == NULL || *best_tok < *(e->val) )
+      best_tok = e->val;
+  if (best_tok == NULL) {
+    out_fst->DeleteStates();
+    return;
+  }
+
+  bool is_final = false;
+  double this_cost = best_tok->cost_ +
+      fst_.Final(best_tok->arc_.nextstate).Value();
+
+  if (this_cost != std::numeric_limits<double>::infinity())
+    is_final = true;
+  std::vector<LatticeArc> arcs_reverse;  // arcs in reverse order.
+  for (Token *tok = best_tok; (tok != NULL) && (nframes > 0); tok = tok->prev_) {
+    if (tok->arc_.ilabel != 0) // count only the non-epsilon arcs
+      --nframes;
+    BaseFloat tot_cost = tok->cost_ -
+        (tok->prev_ ? tok->prev_->cost_ : 0.0);
+    BaseFloat graph_cost = tok->arc_.weight.Value();
+    BaseFloat ac_cost = tot_cost - graph_cost;
+    LatticeArc larc(tok->arc_.ilabel,
+                     tok->arc_.olabel,
+                     LatticeWeight(graph_cost, ac_cost),
+                     tok->arc_.nextstate);
+    arcs_reverse.push_back(larc);
+  }
+  if(arcs_reverse.back().nextstate == fst_.Start())
+    arcs_reverse.pop_back();  // that was a "fake" token... gives no info.
+  StateId cur_state = out_fst->AddState();
+  out_fst->SetStart(cur_state);
+  for (ssize_t i = static_cast<ssize_t>(arcs_reverse.size())-1; i >= 0; i--) {
+    LatticeArc arc = arcs_reverse[i];
+    arc.nextstate = out_fst->AddState();
+    out_fst->AddArc(cur_state, arc);
+    cur_state = arc.nextstate;
+  }
+  if (is_final) {
+    Weight final_weight = fst_.Final(best_tok->arc_.nextstate);
+    out_fst->SetFinal(cur_state, LatticeWeight(final_weight.Value(), 0.0));
+  } else {
+    out_fst->SetFinal(cur_state, LatticeWeight::One());
+  }
+  RemoveEpsLocal(out_fst);
+}
+
+
+} // namespace kaldi
diff --git a/src/online2/online-nnet3-wake-word-faster-decoder.h b/src/online2/online-nnet3-wake-word-faster-decoder.h
new file mode 100644
index 00000000000..8696cdb298a
--- /dev/null
+++ b/src/online2/online-nnet3-wake-word-faster-decoder.h
@@ -0,0 +1,94 @@
+// online2/online-nnet3-wake-word-faster-decoder.h
+
+// Copyright 2019-2020  Daniel Povey
+//           2019-2020  Yiming Wang
+
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_ONLINE2_ONLINE_NNET3_WAKE_WORD_FASTER_DECODER_H_
+#define KALDI_ONLINE2_ONLINE_NNET3_WAKE_WORD_FASTER_DECODER_H_
+
+#include "util/stl-utils.h"
+#include "decoder/faster-decoder.h"
+#include "itf/online-feature-itf.h"
+#include "online2/online-nnet2-feature-pipeline.h"
+#include "hmm/transition-model.h"
+
+namespace kaldi {
+
+// Extends the definition of FasterDecoder's options to include additional
+// parameters.
+struct OnlineWakeWordFasterDecoderOpts : public FasterDecoderOptions {
+
+  OnlineWakeWordFasterDecoderOpts() {}
+
+  void Register(OptionsItf *opts, bool full) {
+    FasterDecoderOptions::Register(opts, full);
+  }
+};
+
+/** This is code is modified from online/online-faster-decoder.h and
+    online2/online-nnet3-decoding.h for nnet3 online decoding in wake word
+    detection. It uses `immortal tokens` from OnlineFasterDecoder for patial
+    tracing back to obtain partial hypotheses while decoding a recording.
+    Different from OnlineFasterDecoder, tt doesn't have end-point detection,
+    and doesn't use run-time factor to adjust the beam.
+*/
+
+class OnlineWakeWordFasterDecoder : public FasterDecoder {
+ public:
+  OnlineWakeWordFasterDecoder(const fst::Fst<fst::StdArc> &fst,
+                              const OnlineWakeWordFasterDecoderOpts &opts,
+                              const TransitionModel &trans_model)
+      : FasterDecoder(fst, opts), opts_(opts), trans_model_(trans_model) {}
+
+  // Makes a linear graph, by tracing back from the last "immortal" token
+  // to the previous one
+  bool PartialTraceback(fst::MutableFst<LatticeArc> *out_fst);
+
+  // Makes a linear graph, by tracing back from the best currently active token
+  // to the last immortal token. This method is meant to be invoked at the end
+  // of an utterance in order to get the last chunk of the hypothesis
+  void FinishTraceBack(fst::MutableFst<LatticeArc> *fst_out);
+
+  // As a new alternative to Decode(), you can call InitDecoding
+  // and then (possibly multiple times) AdvanceDecoding().
+  void InitDecoding();
+
+  
+ private:
+  // Returns a linear fst by tracing back the last N frames, beginning
+  // from the best current token
+  void TracebackNFrames(int32 nframes, fst::MutableFst<LatticeArc> *out_fst);
+
+  // Makes a linear "lattice", by tracing back a path delimited by two tokens
+  void MakeLattice(const Token *start,
+                   const Token *end,
+                   fst::MutableFst<LatticeArc> *out_fst) const;
+
+  // Searches for the last token, ancestor of all currently active tokens
+  void UpdateImmortalToken();
+
+  const OnlineWakeWordFasterDecoderOpts opts_;
+  const TransitionModel &trans_model_; // needed for trans-id -> phone conversion
+  Token *immortal_tok_;      // "immortal" token means it's an ancestor of ...
+  Token *prev_immortal_tok_; // ... all currently active tokens
+  KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineWakeWordFasterDecoder);
+};
+
+} // namespace kaldi
+#endif // KALDI_ONLINE2_ONLINE_NNET3_WAKE_WORD_FASTER_DECODER_H_
diff --git a/src/online2bin/Makefile b/src/online2bin/Makefile
index 28c135eb950..bdce1f98c71 100644
--- a/src/online2bin/Makefile
+++ b/src/online2bin/Makefile
@@ -12,7 +12,8 @@ BINFILES = online2-wav-gmm-latgen-faster apply-cmvn-online \
      online2-wav-dump-features ivector-randomize \
      online2-wav-nnet2-am-compute  online2-wav-nnet2-latgen-threaded \
      online2-wav-nnet3-latgen-faster online2-wav-nnet3-latgen-grammar \
-     online2-tcp-nnet3-decode-faster
+     online2-tcp-nnet3-decode-faster online2-wav-nnet3-latgen-incremental \
+     online2-wav-nnet3-wake-word-decoder-faster
 
 OBJFILES =
 
diff --git a/src/online2bin/ivector-extract-online2.cc b/src/online2bin/ivector-extract-online2.cc
index e30d78620ad..e697de6d15a 100644
--- a/src/online2bin/ivector-extract-online2.cc
+++ b/src/online2bin/ivector-extract-online2.cc
@@ -94,6 +94,7 @@ int main(int argc, char *argv[]) {
     RandomAccessBaseFloatVectorReader frame_weights_reader(frame_weights_rspecifier);
     BaseFloatMatrixWriter ivector_writer(ivectors_wspecifier);
 
+    bool warned_dim = false;
     for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
       std::string spk = spk2utt_reader.Key();
       const std::vector<std::string> &uttlist = spk2utt_reader.Value();
@@ -108,7 +109,18 @@ int main(int argc, char *argv[]) {
         }
         const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
 
-        OnlineMatrixFeature matrix_feature(feats);
+        int32 feat_dim = feats.NumCols();
+        if (feat_dim == ivector_info.ExpectedFeatureDim() + 3) {
+          if (!warned_dim) {
+            KALDI_WARN << "Feature dimension is too large by 3, assuming there are "
+                "pitch features and removing the last 3 dims.";
+            warned_dim = true;
+          }
+          feat_dim -= 3;
+        }
+
+        SubMatrix<BaseFloat> range = feats.ColRange(0, feat_dim);
+        OnlineMatrixFeature matrix_feature(range);
 
         OnlineIvectorFeature ivector_feature(ivector_info,
                                              &matrix_feature);
diff --git a/src/online2bin/online2-tcp-nnet3-decode-faster.cc b/src/online2bin/online2-tcp-nnet3-decode-faster.cc
index f68bf91cf60..329a7cb22a4 100644
--- a/src/online2bin/online2-tcp-nnet3-decode-faster.cc
+++ b/src/online2bin/online2-tcp-nnet3-decode-faster.cc
@@ -252,6 +252,16 @@ int main(int argc, char *argv[]) {
 
           if (eos) {
             feature_pipeline.InputFinished();
+
+            if (silence_weighting.Active() &&
+                feature_pipeline.IvectorFeature() != NULL) {
+              silence_weighting.ComputeCurrentTraceback(decoder.Decoder());
+              silence_weighting.GetDeltaWeights(feature_pipeline.NumFramesReady(),
+                                                frame_offset * decodable_opts.frame_subsampling_factor,
+                                                &delta_weights);
+              feature_pipeline.UpdateFrameWeights(delta_weights);
+            }
+
             decoder.AdvanceDecoding();
             decoder.FinalizeDecoding();
             frame_offset += decoder.NumFramesDecoded();
@@ -423,26 +433,28 @@ bool TcpServer::ReadChunk(size_t len) {
 
   ssize_t ret;
   int poll_ret;
-  size_t to_read = len;
+  char *samp_buf_p = reinterpret_cast<char *>(samp_buf_);
+  size_t to_read = len * sizeof(int16);
   has_read_ = 0;
   while (to_read > 0) {
     poll_ret = poll(client_set_, 1, read_timeout_);
     if (poll_ret == 0) {
-      KALDI_WARN << "Socket timeout! Disconnecting...";
+      KALDI_WARN << "Socket timeout! Disconnecting..." << "(has_read_ = " << has_read_ << ")";
       break;
     }
     if (poll_ret < 0) {
       KALDI_WARN << "Socket error! Disconnecting...";
       break;
     }
-    ret = read(client_desc_, static_cast<void *>(samp_buf_ + has_read_), to_read * sizeof(int16));
+    ret = read(client_desc_, static_cast<void *>(samp_buf_p + has_read_), to_read);
     if (ret <= 0) {
       KALDI_WARN << "Stream over...";
       break;
     }
-    to_read -= ret / sizeof(int16);
-    has_read_ += ret / sizeof(int16);
+    to_read -= ret;
+    has_read_ += ret;
   }
+  has_read_ /= sizeof(int16);
 
   return has_read_ > 0;
 }
diff --git a/src/online2bin/online2-wav-nnet3-latgen-faster.cc b/src/online2bin/online2-wav-nnet3-latgen-faster.cc
index 1549dd6ae52..c7fb3806e6b 100644
--- a/src/online2bin/online2-wav-nnet3-latgen-faster.cc
+++ b/src/online2bin/online2-wav-nnet3-latgen-faster.cc
@@ -58,7 +58,8 @@ void GetDiagnosticsAndPrintOutput(const std::string &utt,
   *tot_like += likelihood;
   KALDI_VLOG(2) << "Likelihood per frame for utterance " << utt << " is "
                 << (likelihood / num_frames) << " over " << num_frames
-                << " frames.";
+                << " frames, = " << (-weight.Value1() / num_frames)
+                << ',' << (weight.Value2() / num_frames);
 
   if (word_syms != NULL) {
     std::cerr << utt << ' ';
diff --git a/src/online2bin/online2-wav-nnet3-latgen-incremental.cc b/src/online2bin/online2-wav-nnet3-latgen-incremental.cc
new file mode 100644
index 00000000000..aaa87f24de1
--- /dev/null
+++ b/src/online2bin/online2-wav-nnet3-latgen-incremental.cc
@@ -0,0 +1,306 @@
+// online2bin/online2-wav-nnet3-latgen-incremental.cc
+
+// Copyright      2019  Zhehuai Chen
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "feat/wave-reader.h"
+#include "online2/online-nnet3-incremental-decoding.h"
+#include "online2/online-nnet2-feature-pipeline.h"
+#include "online2/onlinebin-util.h"
+#include "online2/online-timing.h"
+#include "online2/online-endpoint.h"
+#include "fstext/fstext-lib.h"
+#include "lat/lattice-functions.h"
+#include "util/kaldi-thread.h"
+#include "nnet3/nnet-utils.h"
+
+namespace kaldi {
+
+void GetDiagnosticsAndPrintOutput(const std::string &utt,
+                                  const fst::SymbolTable *word_syms,
+                                  const CompactLattice &clat,
+                                  int64 *tot_num_frames,
+                                  double *tot_like) {
+  if (clat.NumStates() == 0) {
+    KALDI_WARN << "Empty lattice.";
+    return;
+  }
+  CompactLattice best_path_clat;
+  CompactLatticeShortestPath(clat, &best_path_clat);
+
+  Lattice best_path_lat;
+  ConvertLattice(best_path_clat, &best_path_lat);
+
+  double likelihood;
+  LatticeWeight weight;
+  int32 num_frames;
+  std::vector<int32> alignment;
+  std::vector<int32> words;
+  GetLinearSymbolSequence(best_path_lat, &alignment, &words, &weight);
+  num_frames = alignment.size();
+  likelihood = -(weight.Value1() + weight.Value2());
+  *tot_num_frames += num_frames;
+  *tot_like += likelihood;
+  KALDI_VLOG(2) << "Likelihood per frame for utterance " << utt << " is "
+                << (likelihood / num_frames) << " over " << num_frames
+                << " frames, = " << (-weight.Value1() / num_frames)
+                << ',' << (weight.Value2() / num_frames);
+
+  if (word_syms != NULL) {
+    std::cerr << utt << ' ';
+    for (size_t i = 0; i < words.size(); i++) {
+      std::string s = word_syms->Find(words[i]);
+      if (s == "")
+        KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
+      std::cerr << s << ' ';
+    }
+    std::cerr << std::endl;
+  }
+}
+
+}
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace fst;
+
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+
+    const char *usage =
+        "Reads in wav file(s) and simulates online decoding with neural nets\n"
+        "(nnet3 setup), with optional iVector-based speaker adaptation and\n"
+        "optional endpointing.  Note: some configuration values and inputs are\n"
+        "set via config files whose filenames are passed as options\n"
+        "The lattice determinization algorithm here can operate\n"
+        "incrementally.\n"
+        "\n"
+        "Usage: online2-wav-nnet3-latgen-incremental [options] <nnet3-in> <fst-in> "
+        "<spk2utt-rspecifier> <wav-rspecifier> <lattice-wspecifier>\n"
+        "The spk2utt-rspecifier can just be <utterance-id> <utterance-id> if\n"
+        "you want to decode utterance by utterance.\n";
+
+    ParseOptions po(usage);
+
+    std::string word_syms_rxfilename;
+
+    // feature_opts includes configuration for the iVector adaptation,
+    // as well as the basic features.
+    OnlineNnet2FeaturePipelineConfig feature_opts;
+    nnet3::NnetSimpleLoopedComputationOptions decodable_opts;
+    LatticeIncrementalDecoderConfig decoder_opts;
+    OnlineEndpointConfig endpoint_opts;
+
+    BaseFloat chunk_length_secs = 0.18;
+    bool do_endpointing = false;
+    bool online = true;
+
+    po.Register("chunk-length", &chunk_length_secs,
+                "Length of chunk size in seconds, that we process.  Set to <= 0 "
+                "to use all input in one chunk.");
+    po.Register("word-symbol-table", &word_syms_rxfilename,
+                "Symbol table for words [for debug output]");
+    po.Register("do-endpointing", &do_endpointing,
+                "If true, apply endpoint detection");
+    po.Register("online", &online,
+                "You can set this to false to disable online iVector estimation "
+                "and have all the data for each utterance used, even at "
+                "utterance start.  This is useful where you just want the best "
+                "results and don't care about online operation.  Setting this to "
+                "false has the same effect as setting "
+                "--use-most-recent-ivector=true and --greedy-ivector-extractor=true "
+                "in the file given to --ivector-extraction-config, and "
+                "--chunk-length=-1.");
+    po.Register("num-threads-startup", &g_num_threads,
+                "Number of threads used when initializing iVector extractor.");
+
+    feature_opts.Register(&po);
+    decodable_opts.Register(&po);
+    decoder_opts.Register(&po);
+    endpoint_opts.Register(&po);
+
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 5) {
+      po.PrintUsage();
+      return 1;
+    }
+
+    std::string nnet3_rxfilename = po.GetArg(1),
+        fst_rxfilename = po.GetArg(2),
+        spk2utt_rspecifier = po.GetArg(3),
+        wav_rspecifier = po.GetArg(4),
+        clat_wspecifier = po.GetArg(5);
+
+    OnlineNnet2FeaturePipelineInfo feature_info(feature_opts);
+
+    if (!online) {
+      feature_info.ivector_extractor_info.use_most_recent_ivector = true;
+      feature_info.ivector_extractor_info.greedy_ivector_extractor = true;
+      chunk_length_secs = -1.0;
+    }
+
+    TransitionModel trans_model;
+    nnet3::AmNnetSimple am_nnet;
+    {
+      bool binary;
+      Input ki(nnet3_rxfilename, &binary);
+      trans_model.Read(ki.Stream(), binary);
+      am_nnet.Read(ki.Stream(), binary);
+      SetBatchnormTestMode(true, &(am_nnet.GetNnet()));
+      SetDropoutTestMode(true, &(am_nnet.GetNnet()));
+      nnet3::CollapseModel(nnet3::CollapseModelConfig(), &(am_nnet.GetNnet()));
+    }
+
+    // this object contains precomputed stuff that is used by all decodable
+    // objects.  It takes a pointer to am_nnet because if it has iVectors it has
+    // to modify the nnet to accept iVectors at intervals.
+    nnet3::DecodableNnetSimpleLoopedInfo decodable_info(decodable_opts,
+                                                        &am_nnet);
+
+
+    fst::Fst<fst::StdArc> *decode_fst = ReadFstKaldiGeneric(fst_rxfilename);
+
+    fst::SymbolTable *word_syms = NULL;
+    if (word_syms_rxfilename != "")
+      if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename)))
+        KALDI_ERR << "Could not read symbol table from file "
+                  << word_syms_rxfilename;
+
+    int32 num_done = 0, num_err = 0;
+    double tot_like = 0.0;
+    int64 num_frames = 0;
+
+    SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier);
+    RandomAccessTableReader<WaveHolder> wav_reader(wav_rspecifier);
+    CompactLatticeWriter clat_writer(clat_wspecifier);
+
+    OnlineTimingStats timing_stats;
+
+    for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
+      std::string spk = spk2utt_reader.Key();
+      const std::vector<std::string> &uttlist = spk2utt_reader.Value();
+      OnlineIvectorExtractorAdaptationState adaptation_state(
+          feature_info.ivector_extractor_info);
+      for (size_t i = 0; i < uttlist.size(); i++) {
+        std::string utt = uttlist[i];
+        if (!wav_reader.HasKey(utt)) {
+          KALDI_WARN << "Did not find audio for utterance " << utt;
+          num_err++;
+          continue;
+        }
+        const WaveData &wave_data = wav_reader.Value(utt);
+        // get the data for channel zero (if the signal is not mono, we only
+        // take the first channel).
+        SubVector<BaseFloat> data(wave_data.Data(), 0);
+
+        OnlineNnet2FeaturePipeline feature_pipeline(feature_info);
+        feature_pipeline.SetAdaptationState(adaptation_state);
+
+        OnlineSilenceWeighting silence_weighting(
+            trans_model,
+            feature_info.silence_weighting_config,
+            decodable_opts.frame_subsampling_factor);
+
+        SingleUtteranceNnet3IncrementalDecoder decoder(decoder_opts, trans_model,
+                                            decodable_info,
+                                            *decode_fst, &feature_pipeline);
+        OnlineTimer decoding_timer(utt);
+
+        BaseFloat samp_freq = wave_data.SampFreq();
+        int32 chunk_length;
+        if (chunk_length_secs > 0) {
+          chunk_length = int32(samp_freq * chunk_length_secs);
+          if (chunk_length == 0) chunk_length = 1;
+        } else {
+          chunk_length = std::numeric_limits<int32>::max();
+        }
+
+        int32 samp_offset = 0;
+        std::vector<std::pair<int32, BaseFloat> > delta_weights;
+
+        while (samp_offset < data.Dim()) {
+          int32 samp_remaining = data.Dim() - samp_offset;
+          int32 num_samp = chunk_length < samp_remaining ? chunk_length
+                                                         : samp_remaining;
+
+          SubVector<BaseFloat> wave_part(data, samp_offset, num_samp);
+          feature_pipeline.AcceptWaveform(samp_freq, wave_part);
+
+          samp_offset += num_samp;
+          decoding_timer.WaitUntil(samp_offset / samp_freq);
+          if (samp_offset == data.Dim()) {
+            // no more input. flush out last frames
+            feature_pipeline.InputFinished();
+          }
+
+          if (silence_weighting.Active() &&
+              feature_pipeline.IvectorFeature() != NULL) {
+            silence_weighting.ComputeCurrentTraceback(decoder.Decoder());
+            silence_weighting.GetDeltaWeights(feature_pipeline.NumFramesReady(),
+                                              &delta_weights);
+            feature_pipeline.IvectorFeature()->UpdateFrameWeights(delta_weights);
+          }
+
+          decoder.AdvanceDecoding();
+
+          if (do_endpointing && decoder.EndpointDetected(endpoint_opts)) {
+            break;
+          }
+        }
+        decoder.FinalizeDecoding();
+
+        bool use_final_probs = true;
+        CompactLattice clat = decoder.GetLattice(decoder.NumFramesDecoded(),
+                                                 use_final_probs);
+
+        Connect(&clat);
+        GetDiagnosticsAndPrintOutput(utt, word_syms, clat,
+                                     &num_frames, &tot_like);
+
+        decoding_timer.OutputStats(&timing_stats);
+
+        // In an application you might avoid updating the adaptation state if
+        // you felt the utterance had low confidence.  See lat/confidence.h
+        feature_pipeline.GetAdaptationState(&adaptation_state);
+
+        // we want to output the lattice with un-scaled acoustics.
+        BaseFloat inv_acoustic_scale =
+            1.0 / decodable_opts.acoustic_scale;
+        ScaleLattice(AcousticLatticeScale(inv_acoustic_scale), &clat);
+
+        clat_writer.Write(utt, clat);
+        KALDI_LOG << "Decoded utterance " << utt;
+        num_done++;
+      }
+    }
+    timing_stats.Print(online);
+
+    KALDI_LOG << "Decoded " << num_done << " utterances, "
+              << num_err << " with errors.";
+    KALDI_LOG << "Overall likelihood per frame was " << (tot_like / num_frames)
+              << " per frame over " << num_frames << " frames.";
+    delete decode_fst;
+    delete word_syms; // will delete if non-NULL.
+    return (num_done != 0 ? 0 : 1);
+  } catch(const std::exception& e) {
+    std::cerr << e.what();
+    return -1;
+  }
+} // main()
diff --git a/src/online2bin/online2-wav-nnet3-wake-word-decoder-faster.cc b/src/online2bin/online2-wav-nnet3-wake-word-decoder-faster.cc
new file mode 100644
index 00000000000..06747cb2217
--- /dev/null
+++ b/src/online2bin/online2-wav-nnet3-wake-word-decoder-faster.cc
@@ -0,0 +1,286 @@
+// online2bin/online2-wav-nnet3-wake-word-decoder-faster.cc
+
+// Copyright 2019-2020  Daniel Povey
+//           2019-2020  Yiming Wang
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "feat/wave-reader.h"
+#include "online2/online-nnet3-wake-word-faster-decoder.h"
+#include "online2/online-nnet2-feature-pipeline.h"
+#include "nnet3/decodable-online-looped.h"
+#include "online2/onlinebin-util.h"
+#include "online2/online-timing.h"
+#include "online2/online-endpoint.h"
+#include "fstext/fstext-lib.h"
+#include "lat/lattice-functions.h"
+#include "util/kaldi-thread.h"
+#include "nnet3/nnet-utils.h"
+#include "online2/online-nnet3-wake-word-faster-decoder.h"
+
+/** This code is modified from online2bin/online2-wav-nnet3-latgen-faster.cc,
+    for wake word detection decoding. There is no lattice generation.
+*/
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace fst;
+
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+
+    const char *usage =
+        "Reads in wav file(s) and simulates online decoding for wake word with neural nets\n"
+        "(nnet3 setup), with optional iVector-based speaker adaptation.\n"
+        "Once the wake word has been detected, or all the feature frames has been processed,\n"
+        "the decoding terminates and write the decoded outputs to files.\n"
+        "Note: some configuration values and inputs are\n"
+        "set via config files whose filenames are passed as options\n"
+        "\n"
+        "Usage: online2-wav-nnet3-wake-word-decoder-faster [options] <nnet3-in> <fst-in> "
+        "<spk2utt-rspecifier> <wav-rspecifier> <word-symbol-table>  "
+        "<transcript-wspecifier> <alignments-wspecifier>\n"
+        "The spk2utt-rspecifier can just be <utterance-id> <utterance-id> if\n"
+        "you want to decode utterance by utterance.\n";
+
+    ParseOptions po(usage);
+
+    // feature_opts includes configuration for the iVector adaptation,
+    // as well as the basic features.
+    OnlineNnet2FeaturePipelineConfig feature_opts;
+    nnet3::NnetSimpleLoopedComputationOptions decodable_opts;
+    OnlineWakeWordFasterDecoderOpts decoder_opts;
+    OnlineEndpointConfig endpoint_opts;
+
+    BaseFloat chunk_length_secs = 1.0;
+    bool online = true;
+    int32 wake_word_id = 2;
+
+    po.Register("chunk-length", &chunk_length_secs,
+                "Length of chunk size in seconds, that we process.  Set to <= 0 "
+                "to use all input in one chunk.");
+    po.Register("wake-word-id", &wake_word_id, "Wake word id.");
+    po.Register("online", &online,
+                "You can set this to false to disable online iVector estimation "
+                "and have all the data for each utterance used, even at "
+                "utterance start.  This is useful where you just want the best "
+                "results and don't care about online operation.  Setting this to "
+                "false has the same effect as setting "
+                "--use-most-recent-ivector=true and --greedy-ivector-extractor=true "
+                "in the file given to --ivector-extraction-config, and "
+                "--chunk-length=-1.");
+    po.Register("num-threads-startup", &g_num_threads,
+                "Number of threads used when initializing iVector extractor.");
+
+    feature_opts.Register(&po);
+    decodable_opts.Register(&po);
+    decoder_opts.Register(&po, true);
+    endpoint_opts.Register(&po);
+
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 7) {
+      po.PrintUsage();
+      return 1;
+    }
+
+    std::string nnet3_rxfilename = po.GetArg(1),
+        fst_rxfilename = po.GetArg(2),
+        spk2utt_rspecifier = po.GetArg(3),
+        wav_rspecifier = po.GetArg(4),
+        word_syms_rxfilename = po.GetArg(5),
+        words_wspecifier = po.GetArg(6),
+        alignment_wspecifier = po.GetArg(7);
+
+    OnlineNnet2FeaturePipelineInfo feature_info(feature_opts);
+    if (!online) {
+      feature_info.ivector_extractor_info.use_most_recent_ivector = true;
+      feature_info.ivector_extractor_info.greedy_ivector_extractor = true;
+      chunk_length_secs = -1.0;
+    }
+
+    Matrix<double> global_cmvn_stats;
+    if (feature_info.global_cmvn_stats_rxfilename != "")
+      ReadKaldiObject(feature_info.global_cmvn_stats_rxfilename,
+                      &global_cmvn_stats);
+
+    TransitionModel trans_model;
+    nnet3::AmNnetSimple am_nnet;
+    {
+      bool binary;
+      Input ki(nnet3_rxfilename, &binary);
+      trans_model.Read(ki.Stream(), binary);
+      am_nnet.Read(ki.Stream(), binary);
+      SetBatchnormTestMode(true, &(am_nnet.GetNnet()));
+      SetDropoutTestMode(true, &(am_nnet.GetNnet()));
+      nnet3::CollapseModel(nnet3::CollapseModelConfig(), &(am_nnet.GetNnet()));
+    }
+
+    // this object contains precomputed stuff that is used by all decodable
+    // objects.  It takes a pointer to am_nnet because if it has iVectors it has
+    // to modify the nnet to accept iVectors at intervals.
+    nnet3::DecodableNnetSimpleLoopedInfo decodable_info(decodable_opts,
+                                                        &am_nnet);
+
+    fst::Fst<fst::StdArc> *decode_fst = ReadFstKaldiGeneric(fst_rxfilename);
+
+    int32 num_done = 0, num_err = 0;
+
+    SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier);
+    RandomAccessTableReader<WaveHolder> wav_reader(wav_rspecifier);
+
+    OnlineTimingStats timing_stats;
+
+    Int32VectorWriter words_writer(words_wspecifier);
+    Int32VectorWriter alignment_writer(alignment_wspecifier);
+
+    fst::SymbolTable *word_syms = NULL;
+    if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename)))
+      KALDI_ERR << "Could not read symbol table from file "
+                << word_syms_rxfilename;
+
+    VectorFst<LatticeArc> out_fst;
+    for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
+      std::string spk = spk2utt_reader.Key();
+      const std::vector<std::string> &uttlist = spk2utt_reader.Value();
+
+      OnlineIvectorExtractorAdaptationState adaptation_state(
+          feature_info.ivector_extractor_info);
+      OnlineCmvnState cmvn_state(global_cmvn_stats);
+
+      for (size_t i = 0; i < uttlist.size(); i++) {
+        std::string utt = uttlist[i];
+        if (!wav_reader.HasKey(utt)) {
+          KALDI_WARN << "Did not find audio for utterance " << utt;
+          num_err++;
+          continue;
+        }
+        const WaveData &wave_data = wav_reader.Value(utt);
+        // get the data for channel zero (if the signal is not mono, we only
+        // take the first channel).
+        SubVector<BaseFloat> data(wave_data.Data(), 0);
+
+        OnlineNnet2FeaturePipeline feature_pipeline(feature_info);
+        feature_pipeline.SetAdaptationState(adaptation_state);
+        feature_pipeline.SetCmvnState(cmvn_state);
+
+        nnet3::DecodableAmNnetLoopedOnline decodable(trans_model,
+            decodable_info, feature_pipeline.InputFeature(),
+            feature_pipeline.IvectorFeature());
+
+        OnlineWakeWordFasterDecoder decoder(*decode_fst, decoder_opts,
+                                            trans_model);
+        OnlineTimer decoding_timer(utt);
+
+        BaseFloat samp_freq = wave_data.SampFreq();
+        int32 chunk_length;
+        if (chunk_length_secs > 0) {
+          chunk_length = int32(samp_freq * chunk_length_secs);
+          if (chunk_length == 0) chunk_length = 1;
+        } else {
+          chunk_length = std::numeric_limits<int32>::max();
+        }
+
+        int32 samp_offset = 0;
+
+        bool partial_res = false;
+        decoder.InitDecoding();
+        while (samp_offset < data.Dim()) {
+          int32 samp_remaining = data.Dim() - samp_offset;
+          int32 num_samp = chunk_length < samp_remaining ? chunk_length
+                                                         : samp_remaining;
+
+          SubVector<BaseFloat> wave_part(data, samp_offset, num_samp);
+          feature_pipeline.AcceptWaveform(samp_freq, wave_part);
+
+          samp_offset += num_samp;
+          decoding_timer.WaitUntil(samp_offset / samp_freq);
+          if (samp_offset == data.Dim()) {
+            // no more input. flush out last frames
+            feature_pipeline.InputFinished();
+          }
+
+          decoder.AdvanceDecoding(&decodable);
+          if (decodable.IsLastFrame(decoder.NumFramesDecoded() - 1)) {
+            std::vector<int32> word_ids;
+            decoder.FinishTraceBack(&out_fst);
+            fst::GetLinearSymbolSequence(out_fst,
+                                         static_cast<vector<int32> *>(0),
+                                         &word_ids,
+                                         static_cast<LatticeArc::Weight*>(0));
+            PrintPartialResult(word_ids, word_syms, partial_res || word_ids.size());
+            partial_res = false;
+            decoder.GetBestPath(&out_fst);
+            std::vector<int32> tids;
+            fst::GetLinearSymbolSequence(out_fst,
+                                         &tids,
+                                         &word_ids,
+                                         static_cast<LatticeArc::Weight*>(0));
+            //if (!word_ids.empty())
+            words_writer.Write(utt, word_ids);
+            alignment_writer.Write(utt, tids);
+            break;
+          } else {
+            std::vector<int32> word_ids;
+            if (decoder.PartialTraceback(&out_fst)) {
+              fst::GetLinearSymbolSequence(out_fst,
+                                           static_cast<vector<int32> *>(0),
+                                           &word_ids,
+                                           static_cast<LatticeArc::Weight*>(0));
+              PrintPartialResult(word_ids, word_syms, false);
+              if (!partial_res)
+                partial_res = (word_ids.size() > 0);
+              if (std::find(word_ids.begin(), word_ids.end(), wake_word_id) !=
+                  word_ids.end()) {
+                decoder.GetBestPath(&out_fst);
+                std::vector<int32> tids;
+                fst::GetLinearSymbolSequence(out_fst,
+                                             &tids,
+                                             &word_ids,
+                                             static_cast<LatticeArc::Weight*>(0));
+                words_writer.Write(utt, word_ids);
+                alignment_writer.Write(utt, tids);
+                break;
+              }
+            }
+          }
+        }
+
+        decoding_timer.OutputStats(&timing_stats);
+
+        // In an application you might avoid updating the adaptation state if
+        // you felt the utterance had low confidence.  See lat/confidence.h
+        feature_pipeline.GetAdaptationState(&adaptation_state);
+        feature_pipeline.GetCmvnState(&cmvn_state);
+
+        KALDI_LOG << "Decoded utterance " << utt;
+        num_done++;
+      }
+    }
+    timing_stats.Print(online);
+
+    KALDI_LOG << "Decoded " << num_done << " utterances, "
+              << num_err << " with errors.";
+    delete decode_fst;
+    delete word_syms; // will delete if non-NULL.
+    return (num_done != 0 ? 0 : 1);
+  } catch(const std::exception& e) {
+    std::cerr << e.what();
+    return -1;
+  }
+} // main()
diff --git a/src/onlinebin/online-net-client.cc b/src/onlinebin/online-net-client.cc
index dfcfa9361fc..64d157886a3 100644
--- a/src/onlinebin/online-net-client.cc
+++ b/src/onlinebin/online-net-client.cc
@@ -30,6 +30,7 @@
 
 int main(int argc, char *argv[]) {
   try {
+#ifndef KALDI_NO_PORTAUDIO
     using namespace kaldi;
 
     typedef kaldi::int32 int32;
@@ -122,6 +123,9 @@ int main(int argc, char *argv[]) {
     }
     freeaddrinfo(server_addr);
     return 0;
+#else
+    throw std::runtime_error("kaldi is compiled with KALDI_NO_PORTAUDIO");
+#endif
   } catch(const std::exception& e) {
     std::cerr << e.what();
     return -1;
diff --git a/src/tfrnnlm/CMakeLists.txt b/src/tfrnnlm/CMakeLists.txt
new file mode 100644
index 00000000000..3b0b9aaa10a
--- /dev/null
+++ b/src/tfrnnlm/CMakeLists.txt
@@ -0,0 +1,49 @@
+set(PUBLIC_HEADERS
+    tensorflow-rnnlm.h
+)
+
+add_library(kaldi-tfrnnlm
+    tensorflow-rnnlm.cc
+)
+
+if(NOT EXISTS ${TENSORFLOW_DIR}/bazel-bin/tensorflow/libtensorflow_framework.so
+    OR NOT EXISTS ${TENSORFLOW_DIR}/bazel-bin/tensorflow/libtensorflow_cc.so)
+    message(FATAL_ERROR "TensorFlow components are not built, please build TensorFlow first.")
+endif()
+
+target_include_directories(kaldi-tfrnnlm PUBLIC 
+    ${TENSORFLOW_DIR}/bazel-tensorflow/external/com_google_protobuf/src
+    ${TENSORFLOW_DIR}/bazel-genfiles
+    ${TENSORFLOW_DIR}
+    ${TENSORFLOW_DIR}/tensorflow/lite/tools/make/downloads/eigen
+    ${TENSORFLOW_DIR}/tensorflow/lite/tools/make/downloads/absl
+)
+
+target_include_directories(kaldi-tfrnnlm PUBLIC 
+     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
+     $<INSTALL_INTERFACE:include/kaldi>
+)
+
+target_link_libraries(kaldi-tfrnnlm PUBLIC
+    kaldi-lm
+    kaldi-util
+    kaldi-matrix
+    kaldi-base
+    -lz
+    -ldl
+    -fPIC
+    -L${TENSORFLOW_DIR}/bazel-bin/tensorflow
+    -ltensorflow_cc
+    -ltensorflow_framework
+)
+
+
+install(TARGETS kaldi-tfrnnlm
+    EXPORT kaldi-targets
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
+
+install(FILES ${PUBLIC_HEADERS} DESTINATION include/kaldi/tfrnnlm)
+
diff --git a/src/tfrnnlm/tensorflow-rnnlm.cc b/src/tfrnnlm/tensorflow-rnnlm.cc
index e4de98abd12..2f9268fa790 100644
--- a/src/tfrnnlm/tensorflow-rnnlm.cc
+++ b/src/tfrnnlm/tensorflow-rnnlm.cc
@@ -27,6 +27,8 @@
 
 // Tensorflow includes were moved after tfrnnlm/tensorflow-rnnlm.h include to
 // avoid macro redefinitions. See also the note in tfrnnlm/tensorflow-rnnlm.h.
+#include "tensorflow/cc/saved_model/loader.h"
+#include "tensorflow/cc/saved_model/tag_constants.h"
 #include "tensorflow/core/public/session.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
@@ -64,42 +66,75 @@ void SetUnkPenalties(const string &filename,
 // Read tensorflow checkpoint files
 void KaldiTfRnnlmWrapper::ReadTfModel(const std::string &tf_model_path,
                                       int32 num_threads) {
-  string graph_path = tf_model_path + ".meta";
-
   tensorflow::SessionOptions session_options;
+  tensorflow::RunOptions run_options;
 
   session_options.config.set_intra_op_parallelism_threads(num_threads);
   session_options.config.set_inter_op_parallelism_threads(num_threads);
 
-  Status status = tensorflow::NewSession(session_options,
-                                         &session_);
+  Status status = tensorflow::LoadSavedModel(
+      session_options, run_options, tf_model_path,
+      {tensorflow::kSavedModelTagServe},
+      &bundle_);
   if (!status.ok()) {
     KALDI_ERR << status.ToString();
   }
 
-  tensorflow::MetaGraphDef graph_def;
-  status = tensorflow::ReadBinaryProto(tensorflow::Env::Default(), graph_path,
-                                       &graph_def);
-  if (!status.ok()) {
-    KALDI_ERR << status.ToString();
+  // SavedModel maintains a list of "exported function signature" in its metadata.
+  // We are going to read it and get actual tensor name.
+  auto&& signature_map = bundle_.meta_graph_def.signature_def();
+  auto signature_it = signature_map.find("single_step");
+  if (signature_it == signature_map.end()) {
+    KALDI_ERR << "Cannot find signature `single_step' in SavedModel.";
   }
 
-  // Add the graph to the session
-  status = session_->Create(graph_def.graph_def());
-  if (!status.ok()) {
-    KALDI_ERR << status.ToString();
+  auto&& signature = signature_it->second;
+
+  const std::vector<std::pair<const char*, std::string&>> input_params = {
+    {"context", context_tensor_name_},
+    {"word_id", word_id_tensor_name_},
+  };
+
+  for (auto&& pair : input_params) {
+    auto&& map = signature.inputs();
+    auto param_it = map.find(pair.first);
+    if (param_it == map.end()) {
+      KALDI_ERR << "Cannot find input param `" << pair.first << "' in signature, abort.";
+    }
+    pair.second = param_it->second.name();
+    // printf("%s: %s\n", pair.first, pair.second.c_str());
   }
 
-  Tensor checkpointPathTensor(tensorflow::DT_STRING, tensorflow::TensorShape());
-  checkpointPathTensor.scalar<std::string>()() = tf_model_path;
+  const std::vector<std::pair<const char*, std::string&>> output_params = {
+    {"log_prob", log_prob_tensor_name_},
+    {"rnn_out", rnn_out_tensor_name_},
+    {"rnn_states", rnn_states_tensor_name_},
+  };
+
+  for (auto&& pair : output_params) {
+    auto&& map = signature.outputs();
+    auto param_it = map.find(pair.first);
+    if (param_it == map.end()) {
+      KALDI_ERR << "Cannot find output param `" << pair.first << "' in signature, abort.";
+    }
+    pair.second = param_it->second.name();
+    // printf("%s: %s\n", pair.first, pair.second.c_str());
+  }
 
-  status = session_->Run(
-      {{graph_def.saver_def().filename_tensor_name(), checkpointPathTensor} },
-      {},
-      {graph_def.saver_def().restore_op_name()},
-      nullptr);
-  if (!status.ok()) {
-    KALDI_ERR << status.ToString();
+  // We have another function which only emit initial RNN state
+  signature_it = signature_map.find("get_initial_state");
+  if (signature_it == signature_map.end()) {
+    KALDI_ERR << "Cannot find signature `get_initial_state' in SavedModel.";
+  }
+
+  {
+    auto&& signature = signature_it->second;
+    auto&& map = signature.outputs();
+    auto param_it = map.find("initial_state");
+    if (param_it == map.end()) {
+      KALDI_ERR << "Cannot find output param `initial_state' in signature, abort.";
+    }
+    initial_state_tensor_name_ = param_it->second.name();
   }
 }
 
@@ -177,13 +212,16 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
   delete fst_word_symbols;
 }
 
+KaldiTfRnnlmWrapper::~KaldiTfRnnlmWrapper() {
+}
+
 void KaldiTfRnnlmWrapper::AcquireInitialTensors() {
   Status status;
   // get the initial context; this is basically the all-0 tensor
   {
     std::vector<Tensor> state;
-    status = session_->Run(std::vector<std::pair<string, Tensor> >(),
-                           {"Train/Model/test_initial_state"}, {}, &state);
+    status = bundle_.session->Run(std::vector<std::pair<string, Tensor> >(),
+                           {initial_state_tensor_name_}, {}, &state);
     if (!status.ok()) {
       KALDI_ERR << status.ToString();
     }
@@ -197,11 +235,11 @@ void KaldiTfRnnlmWrapper::AcquireInitialTensors() {
     bosword.scalar<int32>()() = eos_;  // eos_ is more like a sentence boundary
 
     std::vector<std::pair<string, Tensor> > inputs = {
-      {"Train/Model/test_word_in", bosword},
-      {"Train/Model/test_state_in", initial_context_},
+      {word_id_tensor_name_, bosword},
+      {context_tensor_name_, initial_context_},
     };
 
-    status = session_->Run(inputs, {"Train/Model/test_cell_out"}, {}, &state);
+    status = bundle_.session->Run(inputs, {rnn_out_tensor_name_}, {}, &state);
     if (!status.ok()) {
       KALDI_ERR << status.ToString();
     }
@@ -215,27 +253,23 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(int32 word,
                                           const Tensor &cell_in,
                                           Tensor *context_out,
                                           Tensor *new_cell) {
-  std::vector<std::pair<string, Tensor> > inputs;
-
   Tensor thisword(tensorflow::DT_INT32, {1, 1});
-
   thisword.scalar<int32>()() = word;
+
   std::vector<Tensor> outputs;
 
-  if (context_out != NULL) {
-    inputs = {
-      {"Train/Model/test_word_in", thisword},
-      {"Train/Model/test_word_out", thisword},
-      {"Train/Model/test_state_in", context_in},
-      {"Train/Model/test_cell_in", cell_in},
-    };
+  std::vector<std::pair<string, Tensor> > inputs = {
+    {word_id_tensor_name_, thisword},
+    {context_tensor_name_, context_in},
+  };
 
+  if (context_out != NULL) {
     // The session will initialize the outputs
     // Run the session, evaluating our "c" operation from the graph
-    Status status = session_->Run(inputs,
-        {"Train/Model/test_out",
-         "Train/Model/test_state_out",
-         "Train/Model/test_cell_out"}, {}, &outputs);
+    Status status = bundle_.session->Run(inputs,
+        {log_prob_tensor_name_,
+         rnn_out_tensor_name_,
+         rnn_states_tensor_name_}, {}, &outputs);
     if (!status.ok()) {
       KALDI_ERR << status.ToString();
     }
@@ -243,14 +277,9 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(int32 word,
     *context_out = outputs[1];
     *new_cell = outputs[2];
   } else {
-    inputs = {
-      {"Train/Model/test_word_out", thisword},
-      {"Train/Model/test_cell_in", cell_in},
-    };
-
     // Run the session, evaluating our "c" operation from the graph
-    Status status = session_->Run(inputs,
-        {"Train/Model/test_out"}, {}, &outputs);
+    Status status = bundle_.session->Run(inputs,
+        {log_prob_tensor_name_}, {}, &outputs);
     if (!status.ok()) {
       KALDI_ERR << status.ToString();
     }
diff --git a/src/tfrnnlm/tensorflow-rnnlm.h b/src/tfrnnlm/tensorflow-rnnlm.h
index 90b68755964..5b6b46bc64e 100644
--- a/src/tfrnnlm/tensorflow-rnnlm.h
+++ b/src/tfrnnlm/tensorflow-rnnlm.h
@@ -53,7 +53,7 @@
 #undef DCHECK_GE
 #undef DCHECK_NE
 
-#include "tensorflow/core/public/session.h"
+#include "tensorflow/cc/saved_model/loader.h"
 
 using tensorflow::Session;
 using tensorflow::Tensor;
@@ -97,9 +97,7 @@ class KaldiTfRnnlmWrapper {
                       const std::string &word_symbol_table_rxfilename,
                       const std::string &unk_prob_file,
                       const std::string &tf_model_path);
-  ~KaldiTfRnnlmWrapper() {
-    session_->Close();
-  }
+  ~KaldiTfRnnlmWrapper();
 
   int32 GetEos() const { return eos_; }
 
@@ -156,7 +154,14 @@ class KaldiTfRnnlmWrapper {
   // this corresponds to the RNNLM symbol table
   int32 num_rnn_words;
 
-  Session* session_;  // for TF computation; pointer owned here
+  // for TF computation
+  tensorflow::SavedModelBundle bundle_;
+  std::string word_id_tensor_name_;
+  std::string context_tensor_name_;
+  std::string log_prob_tensor_name_;
+  std::string rnn_out_tensor_name_;
+  std::string rnn_states_tensor_name_;
+  std::string initial_state_tensor_name_;
   int32 eos_;
   int32 oos_;
 
diff --git a/src/tfrnnlmbin/CMakeLists.txt b/src/tfrnnlmbin/CMakeLists.txt
new file mode 100644
index 00000000000..bea32fbac02
--- /dev/null
+++ b/src/tfrnnlmbin/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_kaldi_executable(NAME lattice-lmrescore-tf-rnnlm SOURCES lattice-lmrescore-tf-rnnlm.cc DEPENDS kaldi-tfrnnlm kaldi-lat)
+add_kaldi_executable(NAME lattice-lmrescore-tf-rnnlm-pruned SOURCES lattice-lmrescore-tf-rnnlm-pruned.cc DEPENDS kaldi-tfrnnlm kaldi-lat)
diff --git a/src/util/kaldi-holder.h b/src/util/kaldi-holder.h
index f2754677bea..f495f27fd18 100644
--- a/src/util/kaldi-holder.h
+++ b/src/util/kaldi-holder.h
@@ -194,7 +194,7 @@ template<class BasicType> class BasicVectorHolder;
 // types, and bool.
 template<class BasicType> class BasicVectorVectorHolder;
 
-// A holder for vectors of pairsof basic types, e.g.
+// A holder for vectors of pairs of basic types, e.g.
 // std::vector<std::pair<int32, int32> >, and so on.
 // Note: a basic type is defined as a type for which ReadBasicType
 // and WriteBasicType are implemented, i.e. integer and floating
diff --git a/src/util/kaldi-thread.h b/src/util/kaldi-thread.h
index ac418cd0b62..4d62d74da95 100644
--- a/src/util/kaldi-thread.h
+++ b/src/util/kaldi-thread.h
@@ -23,6 +23,7 @@
 #define KALDI_THREAD_KALDI_THREAD_H_ 1
 
 #include <thread>
+#include <algorithm>
 #include "itf/options-itf.h"
 #include "util/kaldi-semaphore.h"
 
diff --git a/src/util/table-types.h b/src/util/table-types.h
index efcdf1b564e..ab491abd18d 100644
--- a/src/util/table-types.h
+++ b/src/util/table-types.h
@@ -33,7 +33,7 @@ namespace kaldi {
 /// \addtogroup table_types
 /// @{
 
-typedef TableWriter<KaldiObjectHolder<Matrix<BaseFloat> > >
+typedef TableWriter<KaldiObjectHolder<MatrixBase<BaseFloat> > >
                     BaseFloatMatrixWriter;
 typedef SequentialTableReader<KaldiObjectHolder<Matrix<BaseFloat> > >
                              SequentialBaseFloatMatrixReader;
@@ -42,7 +42,7 @@ typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<BaseFloat> > >
 typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<BaseFloat> > >
                                       RandomAccessBaseFloatMatrixReaderMapped;
 
-typedef TableWriter<KaldiObjectHolder<Matrix<double> > >
+typedef TableWriter<KaldiObjectHolder<MatrixBase<double> > >
                                       DoubleMatrixWriter;
 typedef SequentialTableReader<KaldiObjectHolder<Matrix<double> > >
                               SequentialDoubleMatrixReader;
@@ -54,7 +54,7 @@ typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<double> > >
 typedef TableWriter<KaldiObjectHolder<CompressedMatrix> >
                                       CompressedMatrixWriter;
 
-typedef TableWriter<KaldiObjectHolder<Vector<BaseFloat> > >
+typedef TableWriter<KaldiObjectHolder<VectorBase<BaseFloat> > >
                                       BaseFloatVectorWriter;
 typedef SequentialTableReader<KaldiObjectHolder<Vector<BaseFloat> > >
                               SequentialBaseFloatVectorReader;
@@ -63,7 +63,7 @@ typedef RandomAccessTableReader<KaldiObjectHolder<Vector<BaseFloat> > >
 typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Vector<BaseFloat> > >
                                       RandomAccessBaseFloatVectorReaderMapped;
 
-typedef TableWriter<KaldiObjectHolder<Vector<double> > >
+typedef TableWriter<KaldiObjectHolder<VectorBase<double> > >
                                       DoubleVectorWriter;
 typedef SequentialTableReader<KaldiObjectHolder<Vector<double> > >
                               SequentialDoubleVectorReader;
diff --git a/tools/Makefile b/tools/Makefile
index 06aa2d6b855..fb19fb37e8e 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -17,7 +17,7 @@ SCTK_VERSION ?= $(SCTK_VERSION_PARTIAL)-20151007-1312Z
 SPH2PIPE_VERSION = v2.5
 
 # Default features configured for OpenFST; can be overridden in the make command line.
-OPENFST_CONFIGURE ?= --enable-static --enable-shared --enable-far --enable-ngram-fsts
+OPENFST_CONFIGURE ?= --enable-static --enable-shared --enable-far --enable-ngram-fsts --enable-lookahead-fsts --with-pic
 
 OPENFST_VER_NUM := $(shell echo $(OPENFST_VERSION) | sed 's/\./ /g' | xargs printf "%d%02d%02d")
 ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10600)","1")
@@ -77,12 +77,12 @@ openfst-$(OPENFST_VERSION)/lib: | openfst-$(OPENFST_VERSION)/Makefile
 # "file too big".
 ifeq ($(OSTYPE),cygwin)
   # Note: OSTYPE path is probably dead for latest cygwin64 (installed on 2016/11/11).
-  openfst_add_CXXFLAGS = -O -Wa,-mbig-obj
+  openfst_add_CXXFLAGS = -g -O2 -Wa,-mbig-obj
 else ifeq ($(OS),Windows_NT)
   # This new OS path is confirmed working on Windows 10 / Cygwin64.
-  openfst_add_CXXFLAGS = -O -Wa,-mbig-obj
+  openfst_add_CXXFLAGS = -g -O2 -Wa,-mbig-obj
 else
-  openfst_add_CXXFLAGS =
+  openfst_add_CXXFLAGS = -g -O2
 endif
 
 openfst-$(OPENFST_VERSION)/Makefile: openfst-$(OPENFST_VERSION) | check_required_programs
@@ -98,7 +98,7 @@ openfst-$(OPENFST_VERSION).tar.gz:
 		cp -p "$(DOWNLOAD_DIR)/openfst-$(OPENFST_VERSION).tar.gz" .; \
 	else \
 		$(WGET) -T 10 -t 1 http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-$(OPENFST_VERSION).tar.gz || \
-		$(WGET) -T 10 -t 3 https://www.openslr.org/resources/2/openfst-$(OPENFST_VERSION).tar.gz; \
+		$(WGET) -T 10 -t 3 -c https://www.openslr.org/resources/2/openfst-$(OPENFST_VERSION).tar.gz; \
 	fi
 
 sclite: sclite_compiled
@@ -143,7 +143,7 @@ sph2pipe_$(SPH2PIPE_VERSION).tar.gz:
 		cp -p "$(DOWNLOAD_DIR)/sph2pipe_$(SPH2PIPE_VERSION).tar.gz" .; \
 	else \
 		$(WGET) -T 10 -t 3 https://www.openslr.org/resources/3/sph2pipe_$(SPH2PIPE_VERSION).tar.gz || \
-		$(WGET) -T 10 https://sourceforge.net/projects/kaldi/files/sph2pipe_$(SPH2PIPE_VERSION).tar.gz; \
+		$(WGET) -T 10 -c https://sourceforge.net/projects/kaldi/files/sph2pipe_$(SPH2PIPE_VERSION).tar.gz; \
 	fi
 
 .PHONY: cub
diff --git a/tools/extras/check_dependencies.sh b/tools/extras/check_dependencies.sh
index 2da65a88a7f..6c7343697fc 100755
--- a/tools/extras/check_dependencies.sh
+++ b/tools/extras/check_dependencies.sh
@@ -44,7 +44,7 @@ case $compiler_ver_info in
         status=1
     fi
     ;;
-  "clang "* )
+  "clang "* | "Apple clang "* )
     clang_ver=$(echo $compiler_ver_info | grep version | sed "s/.*version \([0-9\.]*\).*/\1/")
     clang_ver_num=$(echo $clang_ver | sed 's/\./ /g' | xargs printf "%d%02d")
     if [ $clang_ver_num -lt 303 ]; then
diff --git a/tools/extras/check_for_rnnlm.sh b/tools/extras/check_for_rnnlm.sh
index 66aa8004c7b..a258059dd09 100755
--- a/tools/extras/check_for_rnnlm.sh
+++ b/tools/extras/check_for_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
diff --git a/tools/extras/install_beamformit.sh b/tools/extras/install_beamformit.sh
index e29311c13e1..4f54331318c 100755
--- a/tools/extras/install_beamformit.sh
+++ b/tools/extras/install_beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 LIBSNDFILE_VERSION=1.0.25
 
diff --git a/tools/extras/install_cffi.sh b/tools/extras/install_cffi.sh
index 7319faa4cd3..ea9209bb98d 100755
--- a/tools/extras/install_cffi.sh
+++ b/tools/extras/install_cffi.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #Copyright 2013 Ufal MFF UK; Ondrej Platek
 #
 #Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/extras/install_chainer.sh b/tools/extras/install_chainer.sh
index 1dfb8cb9881..3888df9518e 100755
--- a/tools/extras/install_chainer.sh
+++ b/tools/extras/install_chainer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Installs chainer with nn-gev dependencies
 # miniconda should be installed in $HOME/miniconda3/ 
diff --git a/tools/extras/install_faster_rnnlm.sh b/tools/extras/install_faster_rnnlm.sh
index b34567eb892..ea60f88b8b5 100755
--- a/tools/extras/install_faster_rnnlm.sh
+++ b/tools/extras/install_faster_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # The script downloads and installs faster-rnnlm
 # https://github.com/yandex/faster-rnnlm
diff --git a/tools/extras/install_ffv.sh b/tools/extras/install_ffv.sh
index 0f2eece85c8..5f84bbb1c45 100755
--- a/tools/extras/install_ffv.sh
+++ b/tools/extras/install_ffv.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 VERSION=1.0.1
 
diff --git a/tools/extras/install_irstlm.sh b/tools/extras/install_irstlm.sh
index 1c6424504ac..e0dea37d5f5 100755
--- a/tools/extras/install_irstlm.sh
+++ b/tools/extras/install_irstlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 # License: Apache 2.0
 
diff --git a/tools/extras/install_jieba.sh b/tools/extras/install_jieba.sh
index 3200a16db8e..a1627ebd913 100755
--- a/tools/extras/install_jieba.sh
+++ b/tools/extras/install_jieba.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # The script downloads and installs jieba
 
diff --git a/tools/extras/install_kaldi_io.sh b/tools/extras/install_kaldi_io.sh
index e3192be78a8..8b15b043fc0 100755
--- a/tools/extras/install_kaldi_io.sh
+++ b/tools/extras/install_kaldi_io.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Install kaldi_io. Please refer to https://github.com/vesis84/kaldi-io-for-python 
 # for details.
diff --git a/tools/extras/install_kaldi_lm.sh b/tools/extras/install_kaldi_lm.sh
index a70a6ac14c3..1f51a1fb854 100755
--- a/tools/extras/install_kaldi_lm.sh
+++ b/tools/extras/install_kaldi_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # The script downloads and installs kaldi_lm
 
diff --git a/tools/extras/install_liblbfgs.sh b/tools/extras/install_liblbfgs.sh
index df14cdd70e7..8d6ae4ab7b4 100644
--- a/tools/extras/install_liblbfgs.sh
+++ b/tools/extras/install_liblbfgs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 VER=1.10
 
diff --git a/tools/extras/install_mikolov_rnnlm.sh b/tools/extras/install_mikolov_rnnlm.sh
index 66bbc9fc9c5..11d757b0e11 100755
--- a/tools/extras/install_mikolov_rnnlm.sh
+++ b/tools/extras/install_mikolov_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 WGET=${WGET:-wget}
 
diff --git a/tools/extras/install_miniconda.sh b/tools/extras/install_miniconda.sh
index 1ddc7c68bf6..a68ea976523 100755
--- a/tools/extras/install_miniconda.sh
+++ b/tools/extras/install_miniconda.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 WGET=${WGET:-wget}
 
diff --git a/tools/extras/install_mkl.sh b/tools/extras/install_mkl.sh
index fe2ea7bdb65..7006547c9ae 100755
--- a/tools/extras/install_mkl.sh
+++ b/tools/extras/install_mkl.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Intel MKL is now freely available even for commercial use. This script
 # attempts to install the MKL package automatically from Intel's repository.
@@ -12,7 +12,7 @@
 
 set -o pipefail
 
-default_package=intel-mkl-64bit-2019.2-057
+default_package=intel-mkl-64bit-2020.0-088
 
 yum_repo='https://yum.repos.intel.com/mkl/setup/intel-mkl.repo'
 apt_repo='https://apt.repos.intel.com/mkl'
@@ -123,6 +123,7 @@ if [[ ! $distro ]]; then
   [[ ! $distro && -f /etc/redhat-release ]] && distro=redhat
   [[ ! $distro && -f /etc/SuSE-release ]]   && distro=suse
   [[ ! $distro && -f /etc/debian_release ]] && distro=debian
+  [[ ! $distro && -f /etc/arch-release ]] && distro=arch
 
   [[ ! $distro ]] && Fatal "\
 Unable to determine package management style.
@@ -132,6 +133,7 @@ Invoke this script with the option '-p <style>', where <style> can be:
   fedora -- Fedora 22+, also RedHat-like, but uses dnf instead of yum.
   suse   -- SUSE-like, uses zypper and rpm.
   debian -- Debian-like, uses apt and dpkg.
+  arch   -- Archlinux, uses pacman.
 
 We do not currently support other package management systems. Check the Intel's
 documentation at https://software.intel.com/mkl/choose-download for other
@@ -163,13 +165,14 @@ variable, sudo might not allow it to propagate to the command that it invokes."
   exit 0
 fi
 
-# The install variants, each in a finction to simplify error reporting.
+# The install variants, each in a function to simplify error reporting.
 # Each one invokes a subshell with a 'set -x' to to show system-modifying
 # commands it runs. The subshells simply limit the scope of this diagnostics
 # and avoid creating noise (if we were using 'set +x', it would be printed).
 Install_redhat () {
   # yum-utils contains yum-config-manager, in case the user does not have it.
   ( set -x
+    rpm --import $intel_key_url
     yum -y install yum-utils &&
     yum-config-manager --add-repo "$yum_repo" &&
     yum -y install "$package" )
@@ -177,6 +180,7 @@ Install_redhat () {
 
 Install_fedora () {
   ( set -x
+    rpm --import $intel_key_url
     dnf -y install 'dnf-command(config-manager)' &&
     dnf config-manager --add-repo "$yum_repo" &&
     dnf -y install "$package" )
@@ -188,6 +192,7 @@ Install_suse () {
   # We must disable gpg checks with '--no-gpg-checks'. I won't bend backwards
   # as far as check the installed .so version...
   ( set -x
+    rpm --import $intel_key_url
     zypper addrepo "$yum_repo" &&
     zypper --gpg-auto-import-keys --no-gpg-checks \
            --non-interactive install "$package" )
@@ -242,6 +247,13 @@ current recommended security practice."
   fi
 }
 
+Install_arch () {
+  ( set -x
+    echo y | pacman -Syu intel-mkl && # In pacman we don't specify the version
+    pacman -Q --info intel-mkl | grep -v None
+  )
+}
+
 # Register MKL .so libraries with the ld.so.
 ConfigLdSo() {
   [ -d /etc/ld.so.conf.d ] || return 0
diff --git a/tools/extras/install_mmseg.sh b/tools/extras/install_mmseg.sh
index 232c1ac7d38..a76b98e2061 100755
--- a/tools/extras/install_mmseg.sh
+++ b/tools/extras/install_mmseg.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 VERSION=1.3.0
 
diff --git a/tools/extras/install_morfessor.sh b/tools/extras/install_morfessor.sh
index a247e8c68b6..85ed9c80c7d 100755
--- a/tools/extras/install_morfessor.sh
+++ b/tools/extras/install_morfessor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright  2017  Atlas Guide (Author : Lucas Jo)
 #
 # Apache 2.0
diff --git a/tools/extras/install_mpg123.sh b/tools/extras/install_mpg123.sh
index ea362f7a3b4..7a9618c789a 100755
--- a/tools/extras/install_mpg123.sh
+++ b/tools/extras/install_mpg123.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2015 Johns Hopkins University (author: Jan Trmal)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/extras/install_nkf.sh b/tools/extras/install_nkf.sh
index ca27a1777c1..81434543556 100644
--- a/tools/extras/install_nkf.sh
+++ b/tools/extras/install_nkf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2017  Johns Hopkins University (author: Daniel Povey)
 #           2017  Hang Lyu
diff --git a/tools/extras/install_openblas.sh b/tools/extras/install_openblas.sh
index bcf75b6d863..4763265a75c 100755
--- a/tools/extras/install_openblas.sh
+++ b/tools/extras/install_openblas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 OPENBLAS_VERSION=0.3.7
 
@@ -10,7 +10,7 @@ if ! command -v gfortran 2>/dev/null; then
   echo "$0: gfortran is not installed.  Please install it, e.g. by:"
   echo " apt-get install gfortran"
   echo "(if on Debian or Ubuntu), or:"
-  echo " yum install fortran"
+  echo " yum install gcc-gfortran"
   echo "(if on RedHat/CentOS).  On a Mac, if brew is installed, it's:"
   echo " brew install gfortran"
   exit 1
diff --git a/tools/extras/install_opengrm.sh b/tools/extras/install_opengrm.sh
new file mode 100755
index 00000000000..d4cc0145976
--- /dev/null
+++ b/tools/extras/install_opengrm.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# Copyright 2019 Alpha Cephei Inc (author: Nickolay Shmyrev)
+#
+# Apache 2.0
+#
+# This script installs OpenGrm, a library which makes and modifies n-gram language 
+# models encoded as weighted finite-state transducers (FSTs).
+
+echo "****() Installing OpenGrm"
+
+if [ ! -e ngram-1.3.7.tar.gz ]; then
+    echo "Could not find OpenGrm tarball ngram-1.3.7.tar.gz "
+    echo "Trying to download it via wget!"
+
+    if ! which wget >&/dev/null; then
+        echo "This script requires you to first install wget"
+        echo "You can also just download ngram-1.3.7.tar.gz from"
+        echo "http://www.opengrm.org/twiki/bin/view/GRM/NGramDownload"
+        exit 1;
+    fi
+
+   wget -T 10 -t 3 -c http://www.opengrm.org/twiki/pub/GRM/NGramDownload/ngram-1.3.7.tar.gz
+
+   if [ ! -e ngram-1.3.7.tar.gz ]; then
+        echo "Download of ngram-1.3.7.tar.gz - failed!"
+        echo "Aborting script. Please download and install OpenGrm manually!"
+    exit 1;
+   fi
+fi
+
+tar -xovzf ngram-1.3.7.tar.gz|| exit 1
+
+cd ngram-1.3.7
+OPENFSTPREFIX=`pwd`/../openfst
+LDFLAGS="-L${OPENFSTPREFIX}/lib" CXXFLAGS="-I${OPENFSTPREFIX}/include" ./configure --prefix ${OPENFSTPREFIX}
+make; make install
+
+cd ..
diff --git a/tools/extras/install_pfile_utils.sh b/tools/extras/install_pfile_utils.sh
index 3f76cfa7640..f77e863c9a3 100644
--- a/tools/extras/install_pfile_utils.sh
+++ b/tools/extras/install_pfile_utils.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # This was needed for a specific purpose-- some neural net training for the 
 # BABEL setup that was done by Yajie Miao.  We don't expect these tools will
 # be used very heavily.
diff --git a/tools/extras/install_phonetisaurus.sh b/tools/extras/install_phonetisaurus.sh
index 1f92d13be25..8a07c5f5ca5 100755
--- a/tools/extras/install_phonetisaurus.sh
+++ b/tools/extras/install_phonetisaurus.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 GIT=${GIT:-git}
 
diff --git a/tools/extras/install_pocolm.sh b/tools/extras/install_pocolm.sh
index df0d08b4e33..87fb8641623 100755
--- a/tools/extras/install_pocolm.sh
+++ b/tools/extras/install_pocolm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2016  Vincent Nguyen
 #           2016  Johns Hopkins University (author: Daniel Povey)
diff --git a/tools/extras/install_portaudio.sh b/tools/extras/install_portaudio.sh
index 1a339c40907..1e4854e83c1 100755
--- a/tools/extras/install_portaudio.sh
+++ b/tools/extras/install_portaudio.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #Copyright 2012 Cisco Systems; Matthias Paulik
 #
 #Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/extras/install_sacc.sh b/tools/extras/install_sacc.sh
index fc47f43f75f..dcf036f55bb 100755
--- a/tools/extras/install_sacc.sh
+++ b/tools/extras/install_sacc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 WGET=${WGET:-wget}
 
diff --git a/tools/extras/install_sctk_patched.sh b/tools/extras/install_sctk_patched.sh
index 7879e6fc9d4..8b80faa7550 100755
--- a/tools/extras/install_sctk_patched.sh
+++ b/tools/extras/install_sctk_patched.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # A patch for sctk-2.4.0 smooth installation under Cygwin
 
diff --git a/tools/extras/install_sequitur.sh b/tools/extras/install_sequitur.sh
index b58c482702d..b70e6cbb447 100755
--- a/tools/extras/install_sequitur.sh
+++ b/tools/extras/install_sequitur.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 GIT=${GIT:-git}
 
@@ -105,7 +105,7 @@ echo >&2 "SEQUITUR_PACKAGE: ${site_packages_dir:-}"
 echo >&2 "SEQUITUR: $SEQUITUR"
 echo >&2 "PYTHONPATH: ${PYTHONPATH:-}"
 mkdir -p $SEQUITUR
-PYTHONPATH=${PYTHONPATH:-}:$SEQUITUR python setup.py install --prefix `pwd`
+PYTHONPATH=${PYTHONPATH:-}:$SEQUITUR PYTHONUSERBASE=$(pwd) python setup.py install --user --prefix=
 ) || {
   echo >&2 "Problem installing sequitur!"
   exit 1
diff --git a/tools/extras/install_speex.sh b/tools/extras/install_speex.sh
index 70dd06d9bc3..5b923c7ba7c 100755
--- a/tools/extras/install_speex.sh
+++ b/tools/extras/install_speex.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Copyright 2014 IMSL, PKU-HKUST (author: Wei Shi)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/extras/install_srilm.sh b/tools/extras/install_srilm.sh
index 000b1dbe6c5..227e5d0b5ed 100755
--- a/tools/extras/install_srilm.sh
+++ b/tools/extras/install_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 current_path=`pwd`
 current_dir=`basename "$current_path"`
diff --git a/tools/extras/install_tensorflow_cc.sh b/tools/extras/install_tensorflow_cc.sh
index 76f3f3f0b1d..1d3fc524899 100755
--- a/tools/extras/install_tensorflow_cc.sh
+++ b/tools/extras/install_tensorflow_cc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 BAZEL_VERSION=0.15.0
 
diff --git a/tools/extras/install_tensorflow_py.sh b/tools/extras/install_tensorflow_py.sh
index 170eda34b46..24ec4022a49 100755
--- a/tools/extras/install_tensorflow_py.sh
+++ b/tools/extras/install_tensorflow_py.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 export HOME=$PWD/tensorflow_build/
 
diff --git a/tools/extras/install_wpe.sh b/tools/extras/install_wpe.sh
index 39393a55a46..6b0e440473a 100755
--- a/tools/extras/install_wpe.sh
+++ b/tools/extras/install_wpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 GIT=${GIT:-git}
 
diff --git a/tools/extras/travis_install_bindeps.sh b/tools/extras/travis_install_bindeps.sh
index 80067e1ddef..6bb47230603 100755
--- a/tools/extras/travis_install_bindeps.sh
+++ b/tools/extras/travis_install_bindeps.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 WGET=${WGET:-wget}
 
diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh
index c857402bed5..474424c910c 100755
--- a/tools/extras/travis_script.sh
+++ b/tools/extras/travis_script.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # We take into account dependency pointers optionally set in the environment.
 # Typical usage shown below; any one can be safely left unset.
diff --git a/tools/extras/travis_show_failures.sh b/tools/extras/travis_show_failures.sh
index 6fba045c4b4..e66781b158d 100755
--- a/tools/extras/travis_show_failures.sh
+++ b/tools/extras/travis_show_failures.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # This script is called upon a test failure under Travis CI to report the
 # failure context. It prints a header followed by the last 200 lines of every
diff --git a/windows/INSTALL.md b/windows/INSTALL.md
index c48f2908e98..9d0937b362d 100644
--- a/windows/INSTALL.md
+++ b/windows/INSTALL.md
@@ -81,6 +81,15 @@ In the directory `build64`, find the file `openfst.sln` and open it using Visual
 
  If either of the two won't build, you should stop here and start figuring what's different!
 
+## Compiling PortAudio
+1. Download PortAudio from http://www.portaudio.com/download.html and extract the folder
+2. (Optional) Add ASIO Support: http://www.steinberg.net/en/company/developer.html
+3. Open an instance or Powershell or Command Prompt, navigate to the portaudio directory and type:
+      $ cmake -G "Visual Studio 15 2017 Win64"
+4. CMake will generate a portaudio.sln in the directory. Open it  
+5. Right click portaudio_static->Properties->C/C++->Code Generation->Runtime Library Change from Multi-Threaded Debug (/MTd) to Multi-threaded Debug DLL (/MDd)
+6. Build
+
 ## Compiling Kaldi
 
 1. Checkout Kaldi trunk, using [git](https://git-for-windows.github.io/) from https://github.com/kaldi-asr/kaldi.git
diff --git a/windows/generate_solution.pl b/windows/generate_solution.pl
index 7a57bd3d785..4f434de7bbb 100755
--- a/windows/generate_solution.pl
+++ b/windows/generate_solution.pl
@@ -85,6 +85,7 @@
   "$Bin/openfstwin_release.props",
   "$Bin/openfstwin_debug_win32.props",
   "$Bin/openfstwin_release_win32.props",
+  "$Bin/portaudio.props",
 );
 
 my %optionalProps = (
@@ -534,6 +535,7 @@ sub writeProjectFiles {
   print PROJ
 "    <Import Project=\"..\\kaldiwin_win32.props\" />
     <Import Project=\"..\\openfstwin_debug_win32.props\" />
+    <Import Project=\"..\\portaudio.props\" />
   </ImportGroup>
   <ImportGroup Condition=\"'\$(Configuration)|\$(Platform)'=='Debug|x64'\" Label=\"PropertySheets\">
     <Import Project=\"..\\variables.props\" />
@@ -547,6 +549,7 @@ sub writeProjectFiles {
   print PROJ
 "    <Import Project=\"..\\kaldiwin.props\" />
     <Import Project=\"..\\openfstwin_debug.props\" />
+    <Import Project=\"..\\portaudio.props\" />
   </ImportGroup>
   <ImportGroup Condition=\"'\$(Configuration)|\$(Platform)'=='Release|Win32'\" Label=\"PropertySheets\">
     <Import Project=\"..\\variables.props\" />
@@ -560,6 +563,7 @@ sub writeProjectFiles {
   print PROJ
 "    <Import Project=\"..\\kaldiwin_win32.props\" />
     <Import Project=\"..\\openfstwin_release_win32.props\" />
+    <Import Project=\"..\\portaudio.props\" />
   </ImportGroup>
   <ImportGroup Condition=\"'\$(Configuration)|\$(Platform)'=='Release|x64'\" Label=\"PropertySheets\">
     <Import Project=\"..\\variables.props\" />
@@ -573,6 +577,7 @@ sub writeProjectFiles {
   print PROJ
 "    <Import Project=\"..\\kaldiwin.props\" />
     <Import Project=\"..\\openfstwin_release.props\" />
+    <Import Project=\"..\\portaudio.props\" />
   </ImportGroup>
 ";
 
diff --git a/windows/portaudio.props b/windows/portaudio.props
new file mode 100644
index 00000000000..2f14900284a
--- /dev/null
+++ b/windows/portaudio.props
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <LinkIncremental>
+    </LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <AdditionalIncludeDirectories>$(PORTAUDIO)\src\common;$(PORTAUDIO)\src\os\win;$(PORTAUDIO)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <AdditionalLibraryDirectories>$(PORTAUDIOLIB);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>portaudio_static_x64.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+</Project>
diff --git a/windows/variables.props.dev b/windows/variables.props.dev
index 0810edcd262..9ec9e341b55 100644
--- a/windows/variables.props.dev
+++ b/windows/variables.props.dev
@@ -9,6 +9,8 @@
     <OPENFST>C:\Users\jtrmal\Documents\openfst\</OPENFST>
     <OPENFSTLIB>C:\Users\jtrmal\Documents\openfst\build64</OPENFSTLIB>
     <CUBDIR>c:\Users\jtrmal\Documents\cub\</CUBDIR>
+    <PORTAUDIO>W:\kaldi-build\portaudio_src_double</PORTAUDIO>
+    <PORTAUDIOLIB>W:\kaldi-build\portaudio_src_double\Debug</PORTAUDIOLIB>
     <NVTOOLSDIR>C:\Program FIles\NVIDIA Corporation\NvToolsExt\</NVTOOLSDIR>
     <!-- Do not modify anything after this line -->
   </PropertyGroup>
@@ -35,9 +37,18 @@
       <Value>$(CUBDIR)</Value>
       <EnvironmentVariable>true</EnvironmentVariable>
     </BuildMacro>
+    <BuildMacro Include="PORTAUDIO">
+      <Value>$(PORTAUDIO)</Value>
+      <EnvironmentVariable>true</EnvironmentVariable>
+    </BuildMacro>
+      <BuildMacro Include="PORTAUDIOLIB">
+      <Value>$(PORTAUDIOLIB)</Value>
+      <EnvironmentVariable>true</EnvironmentVariable>
+    </BuildMacro>
     <BuildMacro Include="NVTOOLSDIR">
       <Value>$(NVTOOLSDIR)</Value>
       <EnvironmentVariable>true</EnvironmentVariable>
     </BuildMacro>
   </ItemGroup>
 </Project>
+