diff --git a/.github/ISSUE_TEMPLATE/kaldi10-issue.md b/.github/ISSUE_TEMPLATE/kaldi10-issue.md
new file mode 100644
index 00000000000..5f2d11d8a0a
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/kaldi10-issue.md
@@ -0,0 +1,9 @@
+---
+name: Kaldi10 issue
+about: This option is for use by core developers only
+title: ''
+labels: kaldi10-TODO
+assignees: ''
+
+---
+
diff --git a/.gitignore b/.gitignore
index 9f219d458a4..267fdc91f5b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -151,3 +151,10 @@ GSYMS
/tools/cub-1.8.0/
/tools/cub
/tools/python/
+/tools/ngram-1.3.7.tar.gz
+/tools/ngram-1.3.7/
+
+# These CMakeLists.txt files are all genareted on the fly at the moment.
+# They are added here to avoid accidently checkin.
+/src/**/CMakeLists.txt
+/build*
diff --git a/.travis.yml b/.travis.yml
index 51e49653efc..92959f16227 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,6 +26,7 @@ addons:
branches:
only:
- master
+ - pybind11
before_install:
- cat /proc/sys/kernel/core_pattern
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000000..ededc78b8a4
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,244 @@
+cmake_minimum_required(VERSION 3.5)
+project(kaldi)
+
+set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}")
+include(GNUInstallDirs)
+include(Utils)
+include(third_party/get_third_party)
+
+# Should update cmake to a more recent version which supports FindPython3.
+find_package(PythonInterp)
+if(NOT PYTHON_EXECUTABLE OR PYTHON_VERSION_MAJOR LESS 3)
+ message(WARNING "Needs python3 to auto-generate most CMake files, but not found. "
+ "Will try `python3` directly...")
+ set(PYTHON_EXECUTABLE "python3")
+endif()
+
+message(STATUS "Running gen_cmake_skeleton.py")
+execute_process(COMMAND ${PYTHON_EXECUTABLE}
+ "${CMAKE_CURRENT_SOURCE_DIR}/cmake/gen_cmake_skeleton.py"
+ "${CMAKE_CURRENT_SOURCE_DIR}/src"
+ "--quiet"
+)
+
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_INSTALL_MESSAGE LAZY) # hide "-- Up-to-date: ..."
+if(BUILD_SHARED_LIBS)
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+ if(WIN32)
+ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
+ message(FATAL_ERROR "DLL is not supported currently")
+ elseif(APPLE)
+ set(CMAKE_INSTALL_RPATH "@loader_path")
+ else()
+ set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../lib")
+ endif()
+endif()
+
+if(APPLE)
+ # Use built-in BLAS on MacOS by default.
+ set(MATHLIB "Accelerate" CACHE STRING "OpenBLAS|MKL|Accelerate")
+else()
+ set(MATHLIB "OpenBLAS" CACHE STRING "OpenBLAS|MKL|Accelerate")
+endif()
+option(KALDI_BUILD_EXE "If disabled, will make add_kaldi_executable a no-op" ON)
+option(KALDI_BUILD_TEST "If disabled, will make add_kaldi_test_executable a no-op" ON)
+option(KALDI_USE_PATCH_NUMBER "Use MAJOR.MINOR.PATCH format, otherwise MAJOR.MINOR" OFF)
+
+if (KALDI_BUILD_TEST)
+ include(CTest)
+ enable_testing()
+endif()
+
+link_libraries(${CMAKE_DL_LIBS})
+
+find_package(Threads)
+link_libraries(Threads::Threads)
+
+if(MATHLIB STREQUAL "OpenBLAS")
+ set(BLA_VENDOR "OpenBLAS")
+ find_package(LAPACK REQUIRED)
+ add_definitions(-DHAVE_CLAPACK=1)
+ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/tools/CLAPACK)
+ link_libraries(${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
+elseif(MATHLIB STREQUAL "MKL")
+ set(BLA_VENDOR "Intel10_64lp")
+ # find_package(BLAS REQUIRED)
+ normalize_env_path(ENV{MKLROOT})
+ find_package(LAPACK REQUIRED)
+ add_definitions(-DHAVE_MKL=1)
+ include_directories($ENV{MKLROOT}/include) # TODO: maybe not use env, idk, find_package doesnt handle includes...
+ link_libraries(${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
+elseif(MATHLIB STREQUAL "Accelerate")
+ execute_process(COMMAND sw_vers -productVersion
+ OUTPUT_VARIABLE MACOS_VERSION)
+ if(MACOS_VERSION VERSION_LESS "10.12" AND MACOS_VERSION VERSION_GREATER_EQUAL "10.11")
+ message(WARNING
+ "**BAD WARNING**: You are using OS X El Capitan. Some versions of this OS"
+ " have a bug in the BLAS implementation that affects Kaldi."
+ " After compiling, cd to matrix/ and type 'make test'. The"
+ " test will fail if the problem exists in your version."
+ " Eventually this issue will be fixed by system updates from"
+ " Apple. Unexplained crashes with reports of NaNs will"
+ " be caused by this bug, but some recipes will (sometimes) work."
+ )
+ endif()
+ set(BLA_VENDOR "Apple")
+ find_package(BLAS REQUIRED)
+ find_package(LAPACK REQUIRED)
+ add_definitions(-DHAVE_CLAPACK=1)
+ link_libraries(${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
+else()
+ message(FATAL_ERROR "${MATHLIB} is not tested and supported, you are on your own now.")
+endif()
+
+if(MSVC)
+ # Added in source, but we actually should do it in build script, whatever...
+ # add_definitions(-DWIN32_LEAN_AND_MEAN=1)
+
+ add_compile_options(/permissive- /FS /wd4819 /EHsc /bigobj)
+
+ # some warnings related with fst
+ add_compile_options(/wd4018 /wd4244 /wd4267 /wd4291 /wd4305)
+
+ set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
+ if(NOT DEFINED ENV{CUDAHOSTCXX})
+ set(ENV{CUDAHOSTCXX} ${CMAKE_CXX_COMPILER})
+ endif()
+ if(NOT DEFINED CUDA_HOST_COMPILER)
+ set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
+ endif()
+endif()
+
+find_package(CUDA)
+if(CUDA_FOUND)
+ set(CUB_ROOT_DIR "${PROJECT_SOURCE_DIR}/tools/cub")
+
+ set(CUDA_PROPAGATE_HOST_FLAGS ON)
+ set(KALDI_CUDA_NVCC_FLAGS "--default-stream=per-thread;-std=c++${CMAKE_CXX_STANDARD}")
+ if(MSVC)
+ list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler /permissive-,/FS,/wd4819,/EHsc,/bigobj")
+ list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler /wd4018,/wd4244,/wd4267,/wd4291,/wd4305")
+ if(BUILD_SHARED_LIBS)
+ list(APPEND CUDA_NVCC_FLAGS_RELEASE -Xcompiler /MD)
+ list(APPEND CUDA_NVCC_FLAGS_DEBUG -Xcompiler /MDd)
+ endif()
+ else()
+ # list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -std=c++${CMAKE_CXX_STANDARD}")
+ list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
+ endif()
+ set(CUDA_NVCC_FLAGS ${KALDI_CUDA_NVCC_FLAGS} ${CUDA_NVCC_FLAGS})
+
+ add_definitions(-DHAVE_CUDA=1)
+ add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM=1)
+ include_directories(${CUDA_INCLUDE_DIRS})
+ link_libraries(
+ ${CUDA_LIBRARIES}
+ ${CUDA_CUDA_LIBRARY}
+ ${CUDA_CUBLAS_LIBRARIES}
+ ${CUDA_CUFFT_LIBRARIES}
+ ${CUDA_curand_LIBRARY}
+ ${CUDA_cusolver_LIBRARY}
+ ${CUDA_cusparse_LIBRARY})
+
+ find_package(NvToolExt REQUIRED)
+ include_directories(${NvToolExt_INCLUDE_DIR})
+ link_libraries(${NvToolExt_LIBRARIES})
+
+ find_package(CUB REQUIRED)
+ include_directories(${CUB_INCLUDE_DIR})
+endif()
+
+add_definitions(-DKALDI_NO_PORTAUDIO=1)
+
+include(VersionHelper)
+get_version() # this will set KALDI_VERSION and KALDI_PATCH_NUMBER
+if(${KALDI_USE_PATCH_NUMBER})
+ set(KALDI_VERSION "${KALDI_VERSION}.${KALDI_PATCH_NUMBER}")
+endif()
+
+get_third_party(openfst)
+set(OPENFST_ROOT_DIR ${CMAKE_BINARY_DIR}/openfst)
+include(third_party/openfst_lib_target)
+link_libraries(fst)
+
+# add all native libraries
+add_subdirectory(src/base) # NOTE, we need to patch the target with version from outside
+set_property(TARGET kaldi-base PROPERTY COMPILE_DEFINITIONS "KALDI_VERSION=\"${KALDI_VERSION}\"")
+add_subdirectory(src/matrix)
+add_subdirectory(src/cudamatrix)
+add_subdirectory(src/util)
+add_subdirectory(src/feat)
+add_subdirectory(src/tree)
+add_subdirectory(src/gmm)
+add_subdirectory(src/transform)
+add_subdirectory(src/sgmm2)
+add_subdirectory(src/fstext)
+add_subdirectory(src/hmm)
+add_subdirectory(src/lm)
+add_subdirectory(src/decoder)
+add_subdirectory(src/lat)
+add_subdirectory(src/nnet)
+add_subdirectory(src/nnet2)
+add_subdirectory(src/nnet3)
+add_subdirectory(src/rnnlm)
+add_subdirectory(src/chain)
+add_subdirectory(src/ivector)
+add_subdirectory(src/online)
+add_subdirectory(src/online2)
+add_subdirectory(src/kws)
+
+add_subdirectory(src/itf)
+
+if(TENSORFLOW_DIR)
+ add_subdirectory(src/tfrnnlm)
+ add_subdirectory(src/tfrnnlmbin)
+endif()
+
+# add all cuda libraries
+if(CUDA_FOUND)
+ add_subdirectory(src/cudafeat)
+ add_subdirectory(src/cudadecoder)
+endif()
+
+# add all native executables
+add_subdirectory(src/bin)
+add_subdirectory(src/gmmbin)
+add_subdirectory(src/featbin)
+add_subdirectory(src/sgmm2bin)
+add_subdirectory(src/fstbin)
+add_subdirectory(src/lmbin)
+add_subdirectory(src/latbin)
+add_subdirectory(src/nnetbin)
+add_subdirectory(src/nnet2bin)
+add_subdirectory(src/nnet3bin)
+add_subdirectory(src/rnnlmbin)
+add_subdirectory(src/chainbin)
+add_subdirectory(src/ivectorbin)
+add_subdirectory(src/onlinebin)
+add_subdirectory(src/online2bin)
+add_subdirectory(src/kwsbin)
+
+# add all cuda executables
+if(CUDA_FOUND)
+ add_subdirectory(src/cudafeatbin)
+ add_subdirectory(src/cudadecoderbin)
+endif()
+
+include(CMakePackageConfigHelpers)
+# maybe we should put this into subfolder?
+configure_package_config_file(
+ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/kaldi-config.cmake.in
+ ${CMAKE_BINARY_DIR}/cmake/kaldi-config.cmake
+ INSTALL_DESTINATION lib/cmake/kaldi
+)
+write_basic_package_version_file(
+ ${CMAKE_BINARY_DIR}/cmake/kaldi-config-version.cmake
+ VERSION ${KALDI_VERSION}
+ COMPATIBILITY AnyNewerVersion
+)
+install(FILES ${CMAKE_BINARY_DIR}/cmake/kaldi-config.cmake ${CMAKE_BINARY_DIR}/cmake/kaldi-config-version.cmake
+ DESTINATION lib/cmake/kaldi
+)
+install(EXPORT kaldi-targets DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/cmake/kaldi)
diff --git a/INSTALL b/INSTALL
index 2dbf318118c..7beb79a7336 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,9 +1,16 @@
This is the official Kaldi INSTALL. Look also at INSTALL.md for the git mirror installation.
-[for native Windows install, see windows/INSTALL]
+[Option 1 in the following does not apply to native Windows install, see windows/INSTALL or following Option 2]
-(1)
-go to tools/ and follow INSTALL instructions there.
+Option 1 (bash + makefile):
-(2)
-go to src/ and follow INSTALL instructions there.
+ Steps:
+ (1)
+ go to tools/ and follow INSTALL instructions there.
+ (2)
+ go to src/ and follow INSTALL instructions there.
+
+Option 2 (cmake):
+
+ Go to cmake/ and follow INSTALL.md instructions there.
+ Note, it may not be well tested and some features are missing currently.
diff --git a/cmake/FindBLAS.cmake b/cmake/FindBLAS.cmake
new file mode 100644
index 00000000000..67676110c6d
--- /dev/null
+++ b/cmake/FindBLAS.cmake
@@ -0,0 +1,816 @@
+# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FindBLAS
+--------
+
+Find Basic Linear Algebra Subprograms (BLAS) library
+
+This module finds an installed Fortran library that implements the
+BLAS linear-algebra interface (see http://www.netlib.org/blas/). The
+list of libraries searched for is taken from the ``autoconf`` macro file,
+``acx_blas.m4`` (distributed at
+http://ac-archive.sourceforge.net/ac-archive/acx_blas.html).
+
+Input Variables
+^^^^^^^^^^^^^^^
+
+The following variables may be set to influence this module's behavior:
+
+``BLA_STATIC``
+ if ``ON`` use static linkage
+
+``BLA_VENDOR``
+ If set, checks only the specified vendor, if not set checks all the
+ possibilities. List of vendors valid in this module:
+
+ * Goto
+ * OpenBLAS
+ * FLAME
+ * ATLAS PhiPACK
+ * CXML
+ * DXML
+ * SunPerf
+ * SCSL
+ * SGIMATH
+ * IBMESSL
+ * Intel10_32 (intel mkl v10 32 bit)
+ * Intel10_64lp (intel mkl v10+ 64 bit, threaded code, lp64 model)
+ * Intel10_64lp_seq (intel mkl v10+ 64 bit, sequential code, lp64 model)
+ * Intel10_64ilp (intel mkl v10+ 64 bit, threaded code, ilp64 model)
+ * Intel10_64ilp_seq (intel mkl v10+ 64 bit, sequential code, ilp64 model)
+ * Intel (obsolete versions of mkl 32 and 64 bit)
+ * ACML
+ * ACML_MP
+ * ACML_GPU
+ * Apple
+ * NAS
+ * Generic
+
+``BLA_F95``
+ if ``ON`` tries to find the BLAS95 interfaces
+
+``BLA_PREFER_PKGCONFIG``
+ if set ``pkg-config`` will be used to search for a BLAS library first
+ and if one is found that is preferred
+
+Result Variables
+^^^^^^^^^^^^^^^^
+
+This module defines the following variables:
+
+``BLAS_FOUND``
+ library implementing the BLAS interface is found
+``BLAS_LINKER_FLAGS``
+ uncached list of required linker flags (excluding ``-l`` and ``-L``).
+``BLAS_LIBRARIES``
+ uncached list of libraries (using full path name) to link against
+ to use BLAS (may be empty if compiler implicitly links BLAS)
+``BLAS95_LIBRARIES``
+ uncached list of libraries (using full path name) to link against
+ to use BLAS95 interface
+``BLAS95_FOUND``
+ library implementing the BLAS95 interface is found
+
+.. note::
+
+ C or CXX must be enabled to use Intel Math Kernel Library (MKL)
+
+ For example, to use Intel MKL libraries and/or Intel compiler:
+
+ .. code-block:: cmake
+
+ set(BLA_VENDOR Intel10_64lp)
+ find_package(BLAS)
+
+Hints
+^^^^^
+
+Set ``MKLROOT`` environment variable to a directory that contains an MKL
+installation.
+
+#]=======================================================================]
+
+include(CheckFunctionExists)
+include(CheckFortranFunctionExists)
+include(CMakePushCheckState)
+include(FindPackageHandleStandardArgs)
+cmake_push_check_state()
+set(CMAKE_REQUIRED_QUIET ${BLAS_FIND_QUIETLY})
+
+set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
+
+# Check the language being used
+if( NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED) )
+ if(BLAS_FIND_REQUIRED)
+ message(FATAL_ERROR "FindBLAS requires Fortran, C, or C++ to be enabled.")
+ else()
+ message(STATUS "Looking for BLAS... - NOT found (Unsupported languages)")
+ return()
+ endif()
+endif()
+
+if(BLA_PREFER_PKGCONFIG)
+ find_package(PkgConfig)
+ pkg_check_modules(PKGC_BLAS blas)
+ if(PKGC_BLAS_FOUND)
+ set(BLAS_FOUND ${PKGC_BLAS_FOUND})
+ set(BLAS_LIBRARIES "${PKGC_BLAS_LINK_LIBRARIES}")
+ return()
+ endif()
+endif()
+
+macro(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list _thread)
+ # This macro checks for the existence of the combination of fortran libraries
+ # given by _list. If the combination is found, this macro checks (using the
+ # Check_Fortran_Function_Exists macro) whether can link against that library
+ # combination using the name of a routine given by _name using the linker
+ # flags given by _flags. If the combination of libraries is found and passes
+ # the link test, LIBRARIES is set to the list of complete library paths that
+ # have been found. Otherwise, LIBRARIES is set to FALSE.
+
+ # N.B. _prefix is the prefix applied to the names of all cached variables that
+ # are generated internally and marked advanced by this macro.
+
+ set(_libdir ${ARGN})
+
+ set(_libraries_work TRUE)
+ set(${LIBRARIES})
+ set(_combined_name)
+ if (NOT _libdir)
+ if (WIN32)
+ set(_libdir ENV LIB)
+ elseif (APPLE)
+ set(_libdir ENV DYLD_LIBRARY_PATH)
+ else ()
+ set(_libdir ENV LD_LIBRARY_PATH)
+ endif ()
+ endif ()
+
+ list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}")
+
+ foreach(_library ${_list})
+ set(_combined_name ${_combined_name}_${_library})
+ if(NOT "${_thread}" STREQUAL "")
+ set(_combined_name ${_combined_name}_thread)
+ endif()
+ if(_libraries_work)
+ if (BLA_STATIC)
+ if (WIN32)
+ set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES})
+ endif ()
+ if (APPLE)
+ set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES})
+ else ()
+ set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
+ endif ()
+ else ()
+ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ # for ubuntu's libblas3gf and liblapack3gf packages
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf)
+ endif ()
+ endif ()
+ find_library(${_prefix}_${_library}_LIBRARY
+ NAMES ${_library}
+ PATHS ${_libdir}
+ )
+ mark_as_advanced(${_prefix}_${_library}_LIBRARY)
+ set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})
+ set(_libraries_work ${${_prefix}_${_library}_LIBRARY})
+ endif()
+ endforeach()
+ if(_libraries_work)
+ # Test this combination of libraries.
+ set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_thread})
+ # message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}")
+ if (CMAKE_Fortran_COMPILER_LOADED)
+ check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS)
+ else()
+ check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS)
+ endif()
+ set(CMAKE_REQUIRED_LIBRARIES)
+ set(_libraries_work ${${_prefix}${_combined_name}_WORKS})
+ endif()
+ if(_libraries_work)
+ if("${_list}" STREQUAL "")
+ set(${LIBRARIES} "${LIBRARIES}-PLACEHOLDER-FOR-EMPTY-LIBRARIES")
+ else()
+ set(${LIBRARIES} ${${LIBRARIES}} ${_thread}) # for static link
+ endif()
+ else()
+ set(${LIBRARIES} FALSE)
+ endif()
+ #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}")
+endmacro()
+
+set(BLAS_LINKER_FLAGS)
+set(BLAS_LIBRARIES)
+set(BLAS95_LIBRARIES)
+if (NOT $ENV{BLA_VENDOR} STREQUAL "")
+ set(BLA_VENDOR $ENV{BLA_VENDOR})
+else ()
+ if(NOT BLA_VENDOR)
+ set(BLA_VENDOR "All")
+ endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ # Implicitly linked BLAS libraries
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ ""
+ ""
+ )
+ endif()
+endif ()
+
+#BLAS in intel mkl 10+ library? (em64t 64bit)
+if (BLA_VENDOR MATCHES "Intel" OR BLA_VENDOR STREQUAL "All")
+ if (NOT BLAS_LIBRARIES)
+
+ # System-specific settings
+ if (WIN32)
+ if (BLA_STATIC)
+ set(BLAS_mkl_DLL_SUFFIX "")
+ else()
+ set(BLAS_mkl_DLL_SUFFIX "_dll")
+ endif()
+ else()
+ # Switch to GNU Fortran support layer if needed (but not on Apple, where MKL does not provide it)
+ if(CMAKE_Fortran_COMPILER_LOADED AND CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" AND NOT APPLE)
+ set(BLAS_mkl_INTFACE "gf")
+ set(BLAS_mkl_THREADING "gnu")
+ set(BLAS_mkl_OMP "gomp")
+ else()
+ set(BLAS_mkl_INTFACE "intel")
+ set(BLAS_mkl_THREADING "intel")
+ set(BLAS_mkl_OMP "iomp5")
+ endif()
+ set(BLAS_mkl_LM "-lm")
+ set(BLAS_mkl_LDL "-ldl")
+ endif()
+
+ if (BLA_VENDOR MATCHES "_64ilp")
+ set(BLAS_mkl_ILP_MODE "ilp64")
+ else ()
+ set(BLAS_mkl_ILP_MODE "lp64")
+ endif ()
+
+ if (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)
+ if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED)
+ find_package(Threads)
+ else()
+ find_package(Threads REQUIRED)
+ endif()
+
+ set(BLAS_SEARCH_LIBS "")
+
+ if(BLA_F95)
+ set(BLAS_mkl_SEARCH_SYMBOL sgemm_f95)
+ set(_LIBRARIES BLAS95_LIBRARIES)
+ if (WIN32)
+ # Find the main file (32-bit or 64-bit)
+ set(BLAS_SEARCH_LIBS_WIN_MAIN "")
+ if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
+ list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
+ "mkl_blas95${BLAS_mkl_DLL_SUFFIX} mkl_intel_c${BLAS_mkl_DLL_SUFFIX}")
+ endif()
+ if (BLA_VENDOR MATCHES "^Intel10_64i?lp" OR BLA_VENDOR STREQUAL "All")
+ list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
+ "mkl_blas95_${BLAS_mkl_ILP_MODE}${BLAS_mkl_DLL_SUFFIX} mkl_intel_${BLAS_mkl_ILP_MODE}${BLAS_mkl_DLL_SUFFIX}")
+ endif ()
+
+ # Add threading/sequential libs
+ set(BLAS_SEARCH_LIBS_WIN_THREAD "")
+ if (BLA_VENDOR MATCHES "_seq$" OR BLA_VENDOR STREQUAL "All")
+ list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+ "mkl_sequential${BLAS_mkl_DLL_SUFFIX}")
+ endif()
+ if (NOT BLA_VENDOR MATCHES "_seq$" OR BLA_VENDOR STREQUAL "All")
+ # old version
+ list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+ "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
+ # mkl >= 10.3
+ list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+ "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
+ endif()
+
+ # Cartesian product of the above
+ foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN})
+ foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD})
+ list(APPEND BLAS_SEARCH_LIBS
+ "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}")
+ endforeach()
+ endforeach()
+ else ()
+ if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
+ # old version
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_blas95 mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide")
+
+ # mkl >= 10.3
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_blas95 mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_OMP}")
+ endif ()
+ if (BLA_VENDOR MATCHES "^Intel10_64i?lp$" OR BLA_VENDOR STREQUAL "All")
+ # old version
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_blas95 mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide")
+
+ # mkl >= 10.3
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_blas95_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_OMP}")
+ endif ()
+ if (BLA_VENDOR MATCHES "^Intel10_64i?lp_seq$" OR BLA_VENDOR STREQUAL "All")
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_blas95_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_sequential mkl_core")
+ endif ()
+ endif ()
+ else ()
+ set(BLAS_mkl_SEARCH_SYMBOL sgemm)
+ set(_LIBRARIES BLAS_LIBRARIES)
+ if (WIN32)
+ # Find the main file (32-bit or 64-bit)
+ set(BLAS_SEARCH_LIBS_WIN_MAIN "")
+ if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
+ list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
+ "mkl_intel_c${BLAS_mkl_DLL_SUFFIX}")
+ endif()
+ if (BLA_VENDOR MATCHES "^Intel10_64i?lp" OR BLA_VENDOR STREQUAL "All")
+ list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN
+ "mkl_intel_${BLAS_mkl_ILP_MODE}${BLAS_mkl_DLL_SUFFIX}")
+ endif ()
+
+ # Add threading/sequential libs
+ set(BLAS_SEARCH_LIBS_WIN_THREAD "")
+ if (NOT BLA_VENDOR MATCHES "_seq$" OR BLA_VENDOR STREQUAL "All")
+ # old version
+ list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+ "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
+ # mkl >= 10.3
+ list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+ "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}")
+ endif()
+ if (BLA_VENDOR MATCHES "_seq$" OR BLA_VENDOR STREQUAL "All")
+ list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD
+ "mkl_sequential${BLAS_mkl_DLL_SUFFIX}")
+ endif()
+
+ # Cartesian product of the above
+ foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN})
+ foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD})
+ list(APPEND BLAS_SEARCH_LIBS
+ "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}")
+ endforeach()
+ endforeach()
+ else ()
+ if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All")
+ # old version
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide")
+
+ # mkl >= 10.3
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_OMP}")
+ endif ()
+ if (BLA_VENDOR MATCHES "^Intel10_64i?lp$" OR BLA_VENDOR STREQUAL "All")
+ # old version
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide")
+
+ # mkl >= 10.3
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_OMP}")
+ endif ()
+ if (BLA_VENDOR MATCHES "^Intel10_64i?lp_seq$" OR BLA_VENDOR STREQUAL "All")
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_sequential mkl_core")
+ endif ()
+
+ #older vesions of intel mkl libs
+ if (BLA_VENDOR STREQUAL "Intel" OR BLA_VENDOR STREQUAL "All")
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl")
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_ia32")
+ list(APPEND BLAS_SEARCH_LIBS
+ "mkl_em64t")
+ endif ()
+ endif ()
+ endif ()
+
+ if (DEFINED ENV{MKLROOT})
+ if (BLA_VENDOR STREQUAL "Intel10_32")
+ set(_BLAS_MKLROOT_LIB_DIR "$ENV{MKLROOT}/lib/ia32")
+ elseif (BLA_VENDOR MATCHES "^Intel10_64i?lp$" OR BLA_VENDOR MATCHES "^Intel10_64i?lp_seq$")
+ set(_BLAS_MKLROOT_LIB_DIR "$ENV{MKLROOT}/lib/intel64")
+ endif ()
+ endif ()
+ if (_BLAS_MKLROOT_LIB_DIR)
+ if (WIN32)
+ string(APPEND _BLAS_MKLROOT_LIB_DIR "_win")
+ elseif (APPLE)
+ string(APPEND _BLAS_MKLROOT_LIB_DIR "_mac")
+ else ()
+ string(APPEND _BLAS_MKLROOT_LIB_DIR "_lin")
+ endif ()
+ endif ()
+
+ foreach (IT ${BLAS_SEARCH_LIBS})
+ string(REPLACE " " ";" SEARCH_LIBS ${IT})
+ if (NOT ${_LIBRARIES})
+ check_fortran_libraries(
+ ${_LIBRARIES}
+ BLAS
+ ${BLAS_mkl_SEARCH_SYMBOL}
+ ""
+ "${SEARCH_LIBS}"
+ "${CMAKE_THREAD_LIBS_INIT};${BLAS_mkl_LM};${BLAS_mkl_LDL}"
+ "${_BLAS_MKLROOT_LIB_DIR}"
+ )
+ endif ()
+ endforeach ()
+
+ endif ()
+ unset(BLAS_mkl_ILP_MODE)
+ unset(BLAS_mkl_INTFACE)
+ unset(BLAS_mkl_THREADING)
+ unset(BLAS_mkl_OMP)
+ unset(BLAS_mkl_DLL_SUFFIX)
+ unset(BLAS_mkl_LM)
+ unset(BLAS_mkl_LDL)
+ endif ()
+endif ()
+
+if(BLA_F95)
+ find_package_handle_standard_args(BLAS REQUIRED_VARS BLAS95_LIBRARIES)
+ set(BLAS95_FOUND ${BLAS_FOUND})
+ if(BLAS_FOUND)
+ set(BLAS_LIBRARIES "${BLAS95_LIBRARIES}")
+ endif()
+endif()
+
+if (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ # gotoblas (http://www.tacc.utexas.edu/tacc-projects/gotoblas2)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "goto2"
+ ""
+ )
+ endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "OpenBLAS" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ # OpenBLAS (http://www.openblas.net)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "openblas"
+ ""
+ )
+ endif()
+ if(NOT BLAS_LIBRARIES)
+ find_package(Threads)
+ # OpenBLAS (http://www.openblas.net)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "openblas"
+ "${CMAKE_THREAD_LIBS_INIT}"
+ )
+ endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "FLAME" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ # FLAME's blis library (https://github.com/flame/blis)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "blis"
+ ""
+ )
+ endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "ATLAS" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ dgemm
+ ""
+ "f77blas;atlas"
+ ""
+ )
+ endif()
+endif ()
+
+# BLAS in PhiPACK libraries? (requires generic BLAS lib, too)
+if (BLA_VENDOR STREQUAL "PhiPACK" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "sgemm;dgemm;blas"
+ ""
+ )
+ endif()
+endif ()
+
+# BLAS in Alpha CXML library?
+if (BLA_VENDOR STREQUAL "CXML" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "cxml"
+ ""
+ )
+ endif()
+endif ()
+
+# BLAS in Alpha DXML library? (now called CXML, see above)
+if (BLA_VENDOR STREQUAL "DXML" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "dxml"
+ ""
+ )
+ endif()
+endif ()
+
+# BLAS in Sun Performance library?
+if (BLA_VENDOR STREQUAL "SunPerf" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ "-xlic_lib=sunperf"
+ "sunperf;sunmath"
+ ""
+ )
+ if(BLAS_LIBRARIES)
+ set(BLAS_LINKER_FLAGS "-xlic_lib=sunperf")
+ endif()
+ endif()
+endif ()
+
+# BLAS in SCSL library? (SGI/Cray Scientific Library)
+if (BLA_VENDOR STREQUAL "SCSL" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "scsl"
+ ""
+ )
+ endif()
+endif ()
+
+# BLAS in SGIMATH library?
+if (BLA_VENDOR STREQUAL "SGIMATH" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "complib.sgimath"
+ ""
+ )
+ endif()
+endif ()
+
+# BLAS in IBM ESSL library? (requires generic BLAS lib, too)
+if (BLA_VENDOR STREQUAL "IBMESSL" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "essl;blas"
+ ""
+ )
+ endif()
+endif ()
+
+#BLAS in acml library?
+if (BLA_VENDOR MATCHES "ACML" OR BLA_VENDOR STREQUAL "All")
+ if( ((BLA_VENDOR STREQUAL "ACML") AND (NOT BLAS_ACML_LIB_DIRS)) OR
+ ((BLA_VENDOR STREQUAL "ACML_MP") AND (NOT BLAS_ACML_MP_LIB_DIRS)) OR
+ ((BLA_VENDOR STREQUAL "ACML_GPU") AND (NOT BLAS_ACML_GPU_LIB_DIRS))
+ )
+ # try to find acml in "standard" paths
+ if( WIN32 )
+ file( GLOB _ACML_ROOT "C:/AMD/acml*/ACML-EULA.txt" )
+ else()
+ file( GLOB _ACML_ROOT "/opt/acml*/ACML-EULA.txt" )
+ endif()
+ if( WIN32 )
+ file( GLOB _ACML_GPU_ROOT "C:/AMD/acml*/GPGPUexamples" )
+ else()
+ file( GLOB _ACML_GPU_ROOT "/opt/acml*/GPGPUexamples" )
+ endif()
+ list(GET _ACML_ROOT 0 _ACML_ROOT)
+ list(GET _ACML_GPU_ROOT 0 _ACML_GPU_ROOT)
+ if( _ACML_ROOT )
+ get_filename_component( _ACML_ROOT ${_ACML_ROOT} PATH )
+ if( SIZEOF_INTEGER EQUAL 8 )
+ set( _ACML_PATH_SUFFIX "_int64" )
+ else()
+ set( _ACML_PATH_SUFFIX "" )
+ endif()
+ if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" )
+ set( _ACML_COMPILER32 "ifort32" )
+ set( _ACML_COMPILER64 "ifort64" )
+ elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "SunPro" )
+ set( _ACML_COMPILER32 "sun32" )
+ set( _ACML_COMPILER64 "sun64" )
+ elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
+ set( _ACML_COMPILER32 "pgi32" )
+ if( WIN32 )
+ set( _ACML_COMPILER64 "win64" )
+ else()
+ set( _ACML_COMPILER64 "pgi64" )
+ endif()
+ elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "Open64" )
+ # 32 bit builds not supported on Open64 but for code simplicity
+ # We'll just use the same directory twice
+ set( _ACML_COMPILER32 "open64_64" )
+ set( _ACML_COMPILER64 "open64_64" )
+ elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "NAG" )
+ set( _ACML_COMPILER32 "nag32" )
+ set( _ACML_COMPILER64 "nag64" )
+ else()
+ set( _ACML_COMPILER32 "gfortran32" )
+ set( _ACML_COMPILER64 "gfortran64" )
+ endif()
+
+ if( BLA_VENDOR STREQUAL "ACML_MP" )
+ set(_ACML_MP_LIB_DIRS
+ "${_ACML_ROOT}/${_ACML_COMPILER32}_mp${_ACML_PATH_SUFFIX}/lib"
+ "${_ACML_ROOT}/${_ACML_COMPILER64}_mp${_ACML_PATH_SUFFIX}/lib" )
+ else()
+ set(_ACML_LIB_DIRS
+ "${_ACML_ROOT}/${_ACML_COMPILER32}${_ACML_PATH_SUFFIX}/lib"
+ "${_ACML_ROOT}/${_ACML_COMPILER64}${_ACML_PATH_SUFFIX}/lib" )
+ endif()
+ endif()
+elseif(BLAS_${BLA_VENDOR}_LIB_DIRS)
+ set(_${BLA_VENDOR}_LIB_DIRS ${BLAS_${BLA_VENDOR}_LIB_DIRS})
+endif()
+
+if( BLA_VENDOR STREQUAL "ACML_MP" )
+ foreach( BLAS_ACML_MP_LIB_DIRS ${_ACML_MP_LIB_DIRS})
+ check_fortran_libraries (
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ "" "acml_mp;acml_mv" "" ${BLAS_ACML_MP_LIB_DIRS}
+ )
+ if( BLAS_LIBRARIES )
+ break()
+ endif()
+ endforeach()
+elseif( BLA_VENDOR STREQUAL "ACML_GPU" )
+ foreach( BLAS_ACML_GPU_LIB_DIRS ${_ACML_GPU_LIB_DIRS})
+ check_fortran_libraries (
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ "" "acml;acml_mv;CALBLAS" "" ${BLAS_ACML_GPU_LIB_DIRS}
+ )
+ if( BLAS_LIBRARIES )
+ break()
+ endif()
+ endforeach()
+else()
+ foreach( BLAS_ACML_LIB_DIRS ${_ACML_LIB_DIRS} )
+ check_fortran_libraries (
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ "" "acml;acml_mv" "" ${BLAS_ACML_LIB_DIRS}
+ )
+ if( BLAS_LIBRARIES )
+ break()
+ endif()
+ endforeach()
+endif()
+
+# Either acml or acml_mp should be in LD_LIBRARY_PATH but not both
+if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "acml;acml_mv"
+ ""
+ )
+endif()
+if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "acml_mp;acml_mv"
+ ""
+ )
+endif()
+if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "acml;acml_mv;CALBLAS"
+ ""
+ )
+endif()
+endif () # ACML
+
+# Apple BLAS library?
+if (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ dgemm
+ ""
+ "Accelerate"
+ ""
+ )
+ endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All")
+ if ( NOT BLAS_LIBRARIES )
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ dgemm
+ ""
+ "vecLib"
+ ""
+ )
+ endif ()
+endif ()
+
+# Generic BLAS library?
+if (BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All")
+ if(NOT BLAS_LIBRARIES)
+ check_fortran_libraries(
+ BLAS_LIBRARIES
+ BLAS
+ sgemm
+ ""
+ "blas"
+ ""
+ )
+ endif()
+endif ()
+
+if(NOT BLA_F95)
+ find_package_handle_standard_args(BLAS REQUIRED_VARS BLAS_LIBRARIES)
+endif()
+
+# On compilers that implicitly link BLAS (such as ftn, cc, and CC on Cray HPC machines)
+# we used a placeholder for empty BLAS_LIBRARIES to get through our logic above.
+if (BLAS_LIBRARIES STREQUAL "BLAS_LIBRARIES-PLACEHOLDER-FOR-EMPTY-LIBRARIES")
+ set(BLAS_LIBRARIES "")
+endif()
+
+cmake_pop_check_state()
+set(CMAKE_FIND_LIBRARY_SUFFIXES ${_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
diff --git a/cmake/FindCUB.cmake b/cmake/FindCUB.cmake
new file mode 100644
index 00000000000..33c8a926f97
--- /dev/null
+++ b/cmake/FindCUB.cmake
@@ -0,0 +1,25 @@
+# Try to find the CUB library and headers.
+# CUB_ROOT_DIR - where to find
+
+# CUB_FOUND - system has CUB
+# CUB_INCLUDE_DIRS - the CUB include directory
+
+
+find_path(CUB_INCLUDE_DIR
+ NAMES cub/cub.cuh
+ HINTS ${CUB_ROOT_DIR}
+ DOC "The directory where CUB includes reside"
+)
+
+set(CUB_INCLUDE_DIRS ${CUB_INCLUDE_DIR})
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(CUB
+ FOUND_VAR CUB_FOUND
+ REQUIRED_VARS CUB_INCLUDE_DIR
+)
+
+mark_as_advanced(CUB_FOUND)
+
+add_library(CUB INTERFACE)
+target_include_directories(CUB INTERFACE ${CUB_INCLUDE_DIR})
diff --git a/cmake/FindICU.cmake b/cmake/FindICU.cmake
new file mode 100644
index 00000000000..8c460082c36
--- /dev/null
+++ b/cmake/FindICU.cmake
@@ -0,0 +1,428 @@
+# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FindICU
+-------
+
+Find the International Components for Unicode (ICU) libraries and
+programs.
+
+This module supports multiple components.
+Components can include any of: ``data``, ``i18n``, ``io``, ``le``,
+``lx``, ``test``, ``tu`` and ``uc``.
+
+Note that on Windows ``data`` is named ``dt`` and ``i18n`` is named
+``in``; any of the names may be used, and the appropriate
+platform-specific library name will be automatically selected.
+
+This module reports information about the ICU installation in
+several variables. General variables::
+
+ ICU_VERSION - ICU release version
+ ICU_FOUND - true if the main programs and libraries were found
+ ICU_LIBRARIES - component libraries to be linked
+ ICU_INCLUDE_DIRS - the directories containing the ICU headers
+
+Imported targets::
+
+ ICU::
+
+Where ```` is the name of an ICU component, for example
+``ICU::i18n``.
+
+ICU programs are reported in::
+
+ ICU_GENCNVAL_EXECUTABLE - path to gencnval executable
+ ICU_ICUINFO_EXECUTABLE - path to icuinfo executable
+ ICU_GENBRK_EXECUTABLE - path to genbrk executable
+ ICU_ICU-CONFIG_EXECUTABLE - path to icu-config executable
+ ICU_GENRB_EXECUTABLE - path to genrb executable
+ ICU_GENDICT_EXECUTABLE - path to gendict executable
+ ICU_DERB_EXECUTABLE - path to derb executable
+ ICU_PKGDATA_EXECUTABLE - path to pkgdata executable
+ ICU_UCONV_EXECUTABLE - path to uconv executable
+ ICU_GENCFU_EXECUTABLE - path to gencfu executable
+ ICU_MAKECONV_EXECUTABLE - path to makeconv executable
+ ICU_GENNORM2_EXECUTABLE - path to gennorm2 executable
+ ICU_GENCCODE_EXECUTABLE - path to genccode executable
+ ICU_GENSPREP_EXECUTABLE - path to gensprep executable
+ ICU_ICUPKG_EXECUTABLE - path to icupkg executable
+ ICU_GENCMN_EXECUTABLE - path to gencmn executable
+
+ICU component libraries are reported in::
+
+ ICU__FOUND - ON if component was found
+ ICU__LIBRARIES - libraries for component
+
+ICU datafiles are reported in::
+
+ ICU_MAKEFILE_INC - Makefile.inc
+ ICU_PKGDATA_INC - pkgdata.inc
+
+Note that ```` is the uppercased name of the component.
+
+This module reads hints about search results from::
+
+ ICU_ROOT - the root of the ICU installation
+
+The environment variable ``ICU_ROOT`` may also be used; the
+ICU_ROOT variable takes precedence.
+
+The following cache variables may also be set::
+
+ ICU__EXECUTABLE - the path to executable
+ ICU_INCLUDE_DIR - the directory containing the ICU headers
+ ICU__LIBRARY - the library for component
+
+.. note::
+
+ In most cases none of the above variables will require setting,
+ unless multiple ICU versions are available and a specific version
+ is required.
+
+Other variables one may set to control this module are::
+
+ ICU_DEBUG - Set to ON to enable debug output from FindICU.
+#]=======================================================================]
+
+# Written by Roger Leigh
+
+set(icu_programs
+ gencnval
+ icuinfo
+ genbrk
+ icu-config
+ genrb
+ gendict
+ derb
+ pkgdata
+ uconv
+ gencfu
+ makeconv
+ gennorm2
+ genccode
+ gensprep
+ icupkg
+ gencmn)
+
+set(icu_data
+ Makefile.inc
+ pkgdata.inc)
+
+# The ICU checks are contained in a function due to the large number
+# of temporary variables needed.
+function(_ICU_FIND)
+ # Set up search paths, taking compiler into account. Search ICU_ROOT,
+ # with ICU_ROOT in the environment as a fallback if unset.
+ if(ICU_ROOT)
+ list(APPEND icu_roots "${ICU_ROOT}")
+ else()
+ if(NOT "$ENV{ICU_ROOT}" STREQUAL "")
+ file(TO_CMAKE_PATH "$ENV{ICU_ROOT}" NATIVE_PATH)
+ list(APPEND icu_roots "${NATIVE_PATH}")
+ set(ICU_ROOT "${NATIVE_PATH}"
+ CACHE PATH "Location of the ICU installation" FORCE)
+ endif()
+ endif()
+
+ # Find include directory
+ list(APPEND icu_include_suffixes "include")
+ find_path(ICU_INCLUDE_DIR
+ NAMES "unicode/utypes.h"
+ HINTS ${icu_roots}
+ PATH_SUFFIXES ${icu_include_suffixes}
+ DOC "ICU include directory")
+ set(ICU_INCLUDE_DIR "${ICU_INCLUDE_DIR}" PARENT_SCOPE)
+
+ # Get version
+ if(ICU_INCLUDE_DIR AND EXISTS "${ICU_INCLUDE_DIR}/unicode/uvernum.h")
+ file(STRINGS "${ICU_INCLUDE_DIR}/unicode/uvernum.h" icu_header_str
+ REGEX "^#define[\t ]+U_ICU_VERSION[\t ]+\".*\".*")
+
+ string(REGEX REPLACE "^#define[\t ]+U_ICU_VERSION[\t ]+\"([^ \\n]*)\".*"
+ "\\1" icu_version_string "${icu_header_str}")
+ set(ICU_VERSION "${icu_version_string}")
+ set(ICU_VERSION "${icu_version_string}" PARENT_SCOPE)
+ unset(icu_header_str)
+ unset(icu_version_string)
+ endif()
+
+ if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ # 64-bit binary directory
+ set(_bin64 "bin64")
+ # 64-bit library directory
+ set(_lib64 "lib64")
+ endif()
+
+
+ # Find all ICU programs
+ list(APPEND icu_binary_suffixes "${_bin64}" "bin" "sbin")
+ foreach(program ${icu_programs})
+ string(TOUPPER "${program}" program_upcase)
+ set(cache_var "ICU_${program_upcase}_EXECUTABLE")
+ set(program_var "ICU_${program_upcase}_EXECUTABLE")
+ find_program("${cache_var}"
+ NAMES "${program}"
+ HINTS ${icu_roots}
+ PATH_SUFFIXES ${icu_binary_suffixes}
+ DOC "ICU ${program} executable"
+ NO_PACKAGE_ROOT_PATH
+ )
+ mark_as_advanced(cache_var)
+ set("${program_var}" "${${cache_var}}" PARENT_SCOPE)
+ endforeach()
+
+ # Find all ICU libraries
+ list(APPEND icu_library_suffixes "${_lib64}" "lib")
+ set(ICU_REQUIRED_LIBS_FOUND ON)
+ set(static_prefix )
+ # static icu libraries compiled with MSVC have the prefix 's'
+ if(MSVC)
+ set(static_prefix "s")
+ endif()
+ foreach(component ${ICU_FIND_COMPONENTS})
+ string(TOUPPER "${component}" component_upcase)
+ set(component_cache "ICU_${component_upcase}_LIBRARY")
+ set(component_cache_release "${component_cache}_RELEASE")
+ set(component_cache_debug "${component_cache}_DEBUG")
+ set(component_found "${component_upcase}_FOUND")
+ set(component_libnames "icu${component}")
+ set(component_debug_libnames "icu${component}d")
+
+ # Special case deliberate library naming mismatches between Unix
+ # and Windows builds
+ unset(component_libnames)
+ unset(component_debug_libnames)
+ list(APPEND component_libnames "icu${component}")
+ list(APPEND component_debug_libnames "icu${component}d")
+ if(component STREQUAL "data")
+ list(APPEND component_libnames "icudt")
+ # Note there is no debug variant at present
+ list(APPEND component_debug_libnames "icudtd")
+ endif()
+ if(component STREQUAL "dt")
+ list(APPEND component_libnames "icudata")
+ # Note there is no debug variant at present
+ list(APPEND component_debug_libnames "icudatad")
+ endif()
+ if(component STREQUAL "i18n")
+ list(APPEND component_libnames "icuin")
+ list(APPEND component_debug_libnames "icuind")
+ endif()
+ if(component STREQUAL "in")
+ list(APPEND component_libnames "icui18n")
+ list(APPEND component_debug_libnames "icui18nd")
+ endif()
+
+ if(static_prefix)
+ unset(static_component_libnames)
+ unset(static_component_debug_libnames)
+ foreach(component_libname ${component_libnames})
+ list(APPEND static_component_libnames
+ ${static_prefix}${component_libname})
+ endforeach()
+ foreach(component_libname ${component_debug_libnames})
+ list(APPEND static_component_debug_libnames
+ ${static_prefix}${component_libname})
+ endforeach()
+ list(APPEND component_libnames ${static_component_libnames})
+ list(APPEND component_debug_libnames ${static_component_debug_libnames})
+ endif()
+ find_library("${component_cache_release}"
+ NAMES ${component_libnames}
+ HINTS ${icu_roots}
+ PATH_SUFFIXES ${icu_library_suffixes}
+ DOC "ICU ${component} library (release)"
+ NO_PACKAGE_ROOT_PATH
+ )
+ find_library("${component_cache_debug}"
+ NAMES ${component_debug_libnames}
+ HINTS ${icu_roots}
+ PATH_SUFFIXES ${icu_library_suffixes}
+ DOC "ICU ${component} library (debug)"
+ NO_PACKAGE_ROOT_PATH
+ )
+ include(SelectLibraryConfigurations)
+ select_library_configurations(ICU_${component_upcase})
+ mark_as_advanced("${component_cache_release}" "${component_cache_debug}")
+ if(${component_cache})
+ set("${component_found}" ON)
+ list(APPEND ICU_LIBRARY "${${component_cache}}")
+ endif()
+ mark_as_advanced("${component_found}")
+ set("${component_cache}" "${${component_cache}}" PARENT_SCOPE)
+ set("${component_found}" "${${component_found}}" PARENT_SCOPE)
+ if(${component_found})
+ if (ICU_FIND_REQUIRED_${component})
+ list(APPEND ICU_LIBS_FOUND "${component} (required)")
+ else()
+ list(APPEND ICU_LIBS_FOUND "${component} (optional)")
+ endif()
+ else()
+ if (ICU_FIND_REQUIRED_${component})
+ set(ICU_REQUIRED_LIBS_FOUND OFF)
+ list(APPEND ICU_LIBS_NOTFOUND "${component} (required)")
+ else()
+ list(APPEND ICU_LIBS_NOTFOUND "${component} (optional)")
+ endif()
+ endif()
+ endforeach()
+ set(_ICU_REQUIRED_LIBS_FOUND "${ICU_REQUIRED_LIBS_FOUND}" PARENT_SCOPE)
+ set(ICU_LIBRARY "${ICU_LIBRARY}" PARENT_SCOPE)
+
+ # Find all ICU data files
+ if(CMAKE_LIBRARY_ARCHITECTURE)
+ list(APPEND icu_data_suffixes
+ "${_lib64}/${CMAKE_LIBRARY_ARCHITECTURE}/icu/${ICU_VERSION}"
+ "lib/${CMAKE_LIBRARY_ARCHITECTURE}/icu/${ICU_VERSION}"
+ "${_lib64}/${CMAKE_LIBRARY_ARCHITECTURE}/icu"
+ "lib/${CMAKE_LIBRARY_ARCHITECTURE}/icu")
+ endif()
+ list(APPEND icu_data_suffixes
+ "${_lib64}/icu/${ICU_VERSION}"
+ "lib/icu/${ICU_VERSION}"
+ "${_lib64}/icu"
+ "lib/icu")
+ foreach(data ${icu_data})
+ string(TOUPPER "${data}" data_upcase)
+ string(REPLACE "." "_" data_upcase "${data_upcase}")
+ set(cache_var "ICU_${data_upcase}")
+ set(data_var "ICU_${data_upcase}")
+ find_file("${cache_var}"
+ NAMES "${data}"
+ HINTS ${icu_roots}
+ PATH_SUFFIXES ${icu_data_suffixes}
+ DOC "ICU ${data} data file")
+ mark_as_advanced(cache_var)
+ set("${data_var}" "${${cache_var}}" PARENT_SCOPE)
+ endforeach()
+
+ if(NOT ICU_FIND_QUIETLY)
+ if(ICU_LIBS_FOUND)
+ message(STATUS "Found the following ICU libraries:")
+ foreach(found ${ICU_LIBS_FOUND})
+ message(STATUS " ${found}")
+ endforeach()
+ endif()
+ if(ICU_LIBS_NOTFOUND)
+ message(STATUS "The following ICU libraries were not found:")
+ foreach(notfound ${ICU_LIBS_NOTFOUND})
+ message(STATUS " ${notfound}")
+ endforeach()
+ endif()
+ endif()
+
+ if(ICU_DEBUG)
+ message(STATUS "--------FindICU.cmake search debug--------")
+ message(STATUS "ICU binary path search order: ${icu_roots}")
+ message(STATUS "ICU include path search order: ${icu_roots}")
+ message(STATUS "ICU library path search order: ${icu_roots}")
+ message(STATUS "----------------")
+ endif()
+endfunction()
+
+_ICU_FIND()
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(ICU
+ FOUND_VAR ICU_FOUND
+ REQUIRED_VARS ICU_INCLUDE_DIR
+ ICU_LIBRARY
+ _ICU_REQUIRED_LIBS_FOUND
+ VERSION_VAR ICU_VERSION
+ FAIL_MESSAGE "Failed to find all ICU components")
+
+unset(_ICU_REQUIRED_LIBS_FOUND)
+
+if(ICU_FOUND)
+ set(ICU_INCLUDE_DIRS "${ICU_INCLUDE_DIR}")
+ set(ICU_LIBRARIES "${ICU_LIBRARY}")
+ foreach(_ICU_component ${ICU_FIND_COMPONENTS})
+ string(TOUPPER "${_ICU_component}" _ICU_component_upcase)
+ set(_ICU_component_cache "ICU_${_ICU_component_upcase}_LIBRARY")
+ set(_ICU_component_cache_release "ICU_${_ICU_component_upcase}_LIBRARY_RELEASE")
+ set(_ICU_component_cache_debug "ICU_${_ICU_component_upcase}_LIBRARY_DEBUG")
+ set(_ICU_component_lib "ICU_${_ICU_component_upcase}_LIBRARIES")
+ set(_ICU_component_found "${_ICU_component_upcase}_FOUND")
+ set(_ICU_imported_target "ICU::${_ICU_component}")
+ if(${_ICU_component_found})
+ set("${_ICU_component_lib}" "${${_ICU_component_cache}}")
+ if(NOT TARGET ${_ICU_imported_target})
+ add_library(${_ICU_imported_target} UNKNOWN IMPORTED)
+ if(ICU_INCLUDE_DIR)
+ set_target_properties(${_ICU_imported_target} PROPERTIES
+ INTERFACE_INCLUDE_DIRECTORIES "${ICU_INCLUDE_DIR}")
+ endif()
+ if(EXISTS "${${_ICU_component_cache}}")
+ set_target_properties(${_ICU_imported_target} PROPERTIES
+ IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+ IMPORTED_LOCATION "${${_ICU_component_cache}}")
+ endif()
+ if(EXISTS "${${_ICU_component_cache_release}}")
+ set_property(TARGET ${_ICU_imported_target} APPEND PROPERTY
+ IMPORTED_CONFIGURATIONS RELEASE)
+ set_target_properties(${_ICU_imported_target} PROPERTIES
+ IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX"
+ IMPORTED_LOCATION_RELEASE "${${_ICU_component_cache_release}}")
+ endif()
+ if(EXISTS "${${_ICU_component_cache_debug}}")
+ set_property(TARGET ${_ICU_imported_target} APPEND PROPERTY
+ IMPORTED_CONFIGURATIONS DEBUG)
+ set_target_properties(${_ICU_imported_target} PROPERTIES
+ IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "CXX"
+ IMPORTED_LOCATION_DEBUG "${${_ICU_component_cache_debug}}")
+ endif()
+ if(CMAKE_DL_LIBS AND _ICU_component STREQUAL "uc")
+ set_target_properties(${_ICU_imported_target} PROPERTIES
+ INTERFACE_LINK_LIBRARIES "${CMAKE_DL_LIBS}")
+ endif()
+ endif()
+ endif()
+ unset(_ICU_component_upcase)
+ unset(_ICU_component_cache)
+ unset(_ICU_component_lib)
+ unset(_ICU_component_found)
+ unset(_ICU_imported_target)
+ endforeach()
+endif()
+
+if(ICU_DEBUG)
+ message(STATUS "--------FindICU.cmake results debug--------")
+ message(STATUS "ICU found: ${ICU_FOUND}")
+ message(STATUS "ICU_VERSION number: ${ICU_VERSION}")
+ message(STATUS "ICU_ROOT directory: ${ICU_ROOT}")
+ message(STATUS "ICU_INCLUDE_DIR directory: ${ICU_INCLUDE_DIR}")
+ message(STATUS "ICU_LIBRARIES: ${ICU_LIBRARIES}")
+
+ foreach(program IN LISTS icu_programs)
+ string(TOUPPER "${program}" program_upcase)
+ set(program_lib "ICU_${program_upcase}_EXECUTABLE")
+ message(STATUS "${program} program: ${${program_lib}}")
+ unset(program_upcase)
+ unset(program_lib)
+ endforeach()
+
+ foreach(data IN LISTS icu_data)
+ string(TOUPPER "${data}" data_upcase)
+ string(REPLACE "." "_" data_upcase "${data_upcase}")
+ set(data_lib "ICU_${data_upcase}")
+ message(STATUS "${data} data: ${${data_lib}}")
+ unset(data_upcase)
+ unset(data_lib)
+ endforeach()
+
+ foreach(component IN LISTS ICU_FIND_COMPONENTS)
+ string(TOUPPER "${component}" component_upcase)
+ set(component_lib "ICU_${component_upcase}_LIBRARIES")
+ set(component_found "${component_upcase}_FOUND")
+ message(STATUS "${component} library found: ${${component_found}}")
+ message(STATUS "${component} library: ${${component_lib}}")
+ unset(component_upcase)
+ unset(component_lib)
+ unset(component_found)
+ endforeach()
+ message(STATUS "----------------")
+endif()
+
+unset(icu_programs)
diff --git a/cmake/FindLAPACK.cmake b/cmake/FindLAPACK.cmake
new file mode 100644
index 00000000000..60fbf0726a0
--- /dev/null
+++ b/cmake/FindLAPACK.cmake
@@ -0,0 +1,430 @@
+# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FindLAPACK
+----------
+
+Find Linear Algebra PACKage (LAPACK) library
+
+This module finds an installed fortran library that implements the
+LAPACK linear-algebra interface (see http://www.netlib.org/lapack/).
+
+The approach follows that taken for the autoconf macro file,
+``acx_lapack.m4`` (distributed at
+http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html).
+
+Input Variables
+^^^^^^^^^^^^^^^
+
+The following variables may be set to influence this module's behavior:
+
+``BLA_STATIC``
+ if ``ON`` use static linkage
+
+``BLA_VENDOR``
+ If set, checks only the specified vendor, if not set checks all the
+ possibilities. List of vendors valid in this module:
+
+ * ``Intel10_32`` (intel mkl v10 32 bit)
+ * ``Intel10_64lp`` (intel mkl v10+ 64 bit, threaded code, lp64 model)
+ * ``Intel10_64lp_seq`` (intel mkl v10+ 64 bit, sequential code, lp64 model)
+ * ``Intel10_64ilp`` (intel mkl v10+ 64 bit, threaded code, ilp64 model)
+ * ``Intel10_64ilp_seq`` (intel mkl v10+ 64 bit, sequential code, ilp64 model)
+ * ``Intel`` (obsolete versions of mkl 32 and 64 bit)
+ * ``OpenBLAS``
+ * ``FLAME``
+ * ``ACML``
+ * ``Apple``
+ * ``NAS``
+ * ``Generic``
+
+``BLA_F95``
+ if ``ON`` tries to find BLAS95/LAPACK95
+
+Result Variables
+^^^^^^^^^^^^^^^^
+
+This module defines the following variables:
+
+``LAPACK_FOUND``
+ library implementing the LAPACK interface is found
+``LAPACK_LINKER_FLAGS``
+ uncached list of required linker flags (excluding -l and -L).
+``LAPACK_LIBRARIES``
+ uncached list of libraries (using full path name) to link against
+ to use LAPACK
+``LAPACK95_LIBRARIES``
+ uncached list of libraries (using full path name) to link against
+ to use LAPACK95
+``LAPACK95_FOUND``
+ library implementing the LAPACK95 interface is found
+
+.. note::
+
+ C or CXX must be enabled to use Intel MKL
+
+ For example, to use Intel MKL libraries and/or Intel compiler:
+
+ .. code-block:: cmake
+
+ set(BLA_VENDOR Intel10_64lp)
+ find_package(LAPACK)
+#]=======================================================================]
+
+set(_lapack_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
+
+# Check the language being used
+if( NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED) )
+ if(LAPACK_FIND_REQUIRED)
+ message(FATAL_ERROR "FindLAPACK requires Fortran, C, or C++ to be enabled.")
+ else()
+ message(STATUS "Looking for LAPACK... - NOT found (Unsupported languages)")
+ return()
+ endif()
+endif()
+
+if (CMAKE_Fortran_COMPILER_LOADED)
+include(CheckFortranFunctionExists)
+else ()
+include(CheckFunctionExists)
+endif ()
+include(CMakePushCheckState)
+
+cmake_push_check_state()
+set(CMAKE_REQUIRED_QUIET ${LAPACK_FIND_QUIETLY})
+
+set(LAPACK_FOUND FALSE)
+set(LAPACK95_FOUND FALSE)
+
+# TODO: move this stuff to separate module
+
+macro(Check_Lapack_Libraries LIBRARIES _prefix _name _flags _list _blas _threads)
+# This macro checks for the existence of the combination of fortran libraries
+# given by _list. If the combination is found, this macro checks (using the
+# Check_Fortran_Function_Exists macro) whether can link against that library
+# combination using the name of a routine given by _name using the linker
+# flags given by _flags. If the combination of libraries is found and passes
+# the link test, LIBRARIES is set to the list of complete library paths that
+# have been found. Otherwise, LIBRARIES is set to FALSE.
+
+# N.B. _prefix is the prefix applied to the names of all cached variables that
+# are generated internally and marked advanced by this macro.
+
+set(_libraries_work TRUE)
+set(${LIBRARIES})
+set(_combined_name)
+if (NOT _libdir)
+ if (WIN32)
+ set(_libdir ENV LIB)
+ elseif (APPLE)
+ set(_libdir ENV DYLD_LIBRARY_PATH)
+ else ()
+ set(_libdir ENV LD_LIBRARY_PATH)
+ endif ()
+endif ()
+
+list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}")
+
+foreach(_library ${_list})
+ set(_combined_name ${_combined_name}_${_library})
+
+ if(_libraries_work)
+ if (BLA_STATIC)
+ if (WIN32)
+ set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES})
+ endif ()
+ if (APPLE)
+ set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES})
+ else ()
+ set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
+ endif ()
+ else ()
+ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ # for ubuntu's libblas3gf and liblapack3gf packages
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf)
+ endif ()
+ endif ()
+ find_library(${_prefix}_${_library}_LIBRARY
+ NAMES ${_library}
+ PATHS ${_libdir}
+ )
+ mark_as_advanced(${_prefix}_${_library}_LIBRARY)
+ set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})
+ set(_libraries_work ${${_prefix}_${_library}_LIBRARY})
+ endif()
+endforeach()
+
+if(_libraries_work)
+ # Test this combination of libraries.
+ if(UNIX AND BLA_STATIC)
+ set(CMAKE_REQUIRED_LIBRARIES ${_flags} "-Wl,--start-group" ${${LIBRARIES}} ${_blas} "-Wl,--end-group" ${_threads})
+ else()
+ set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas} ${_threads})
+ endif()
+# message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}")
+ if (NOT CMAKE_Fortran_COMPILER_LOADED)
+ check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS)
+ else ()
+ check_fortran_function_exists(${_name} ${_prefix}${_combined_name}_WORKS)
+ endif ()
+ set(CMAKE_REQUIRED_LIBRARIES)
+ set(_libraries_work ${${_prefix}${_combined_name}_WORKS})
+ #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}")
+endif()
+
+if(_libraries_work)
+ set(${LIBRARIES} ${${LIBRARIES}} ${_blas} ${_threads})
+else()
+ set(${LIBRARIES} FALSE)
+endif()
+
+endmacro()
+
+
+set(LAPACK_LINKER_FLAGS)
+set(LAPACK_LIBRARIES)
+set(LAPACK95_LIBRARIES)
+
+
+if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)
+ find_package(BLAS)
+else()
+ find_package(BLAS REQUIRED)
+endif()
+
+
+if(BLAS_FOUND)
+ set(LAPACK_LINKER_FLAGS ${BLAS_LINKER_FLAGS})
+ if (NOT $ENV{BLA_VENDOR} STREQUAL "")
+ set(BLA_VENDOR $ENV{BLA_VENDOR})
+ else ()
+ if(NOT BLA_VENDOR)
+ set(BLA_VENDOR "All")
+ endif()
+ endif ()
+
+#intel lapack
+if (BLA_VENDOR MATCHES "Intel" OR BLA_VENDOR STREQUAL "All")
+ if (NOT WIN32)
+ set(LAPACK_mkl_LM "-lm")
+ set(LAPACK_mkl_LDL "-ldl")
+ endif ()
+ if (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)
+ if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)
+ find_PACKAGE(Threads)
+ else()
+ find_package(Threads REQUIRED)
+ endif()
+
+ if (BLA_VENDOR MATCHES "_64ilp")
+ set(LAPACK_mkl_ILP_MODE "ilp64")
+ else ()
+ set(LAPACK_mkl_ILP_MODE "lp64")
+ endif ()
+
+ set(LAPACK_SEARCH_LIBS "")
+
+ if (BLA_F95)
+ set(LAPACK_mkl_SEARCH_SYMBOL "cheev_f95")
+ set(_LIBRARIES LAPACK95_LIBRARIES)
+ set(_BLAS_LIBRARIES ${BLAS95_LIBRARIES})
+
+ # old
+ list(APPEND LAPACK_SEARCH_LIBS
+ "mkl_lapack95")
+ # new >= 10.3
+ list(APPEND LAPACK_SEARCH_LIBS
+ "mkl_intel_c")
+ list(APPEND LAPACK_SEARCH_LIBS
+ "mkl_lapack95_${LAPACK_mkl_ILP_MODE}")
+ else()
+ set(LAPACK_mkl_SEARCH_SYMBOL "cheev")
+ set(_LIBRARIES LAPACK_LIBRARIES)
+ set(_BLAS_LIBRARIES ${BLAS_LIBRARIES})
+
+ # old
+ list(APPEND LAPACK_SEARCH_LIBS
+ "mkl_lapack")
+ endif()
+
+ # First try empty lapack libs
+ if (NOT ${_LIBRARIES})
+ check_lapack_libraries(
+ ${_LIBRARIES}
+ LAPACK
+ ${LAPACK_mkl_SEARCH_SYMBOL}
+ ""
+ ""
+ "${_BLAS_LIBRARIES}"
+ ""
+ )
+ endif ()
+ # Then try the search libs
+ foreach (IT ${LAPACK_SEARCH_LIBS})
+ if (NOT ${_LIBRARIES})
+ check_lapack_libraries(
+ ${_LIBRARIES}
+ LAPACK
+ ${LAPACK_mkl_SEARCH_SYMBOL}
+ ""
+ "${IT}"
+ "${_BLAS_LIBRARIES}"
+ "${CMAKE_THREAD_LIBS_INIT};${LAPACK_mkl_LM};${LAPACK_mkl_LDL}"
+ )
+ endif ()
+ endforeach ()
+
+ unset(LAPACK_mkl_ILP_MODE)
+ unset(LAPACK_mkl_SEARCH_SYMBOL)
+ unset(LAPACK_mkl_LM)
+ unset(LAPACK_mkl_LDL)
+ endif ()
+endif()
+
+if (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All")
+ if(NOT LAPACK_LIBRARIES)
+ check_lapack_libraries(
+ LAPACK_LIBRARIES
+ LAPACK
+ cheev
+ ""
+ "goto2"
+ "${BLAS_LIBRARIES}"
+ ""
+ )
+ endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "OpenBLAS" OR BLA_VENDOR STREQUAL "All")
+ if(NOT LAPACK_LIBRARIES)
+ check_lapack_libraries(
+ LAPACK_LIBRARIES
+ LAPACK
+ cheev
+ ""
+ "openblas"
+ "${BLAS_LIBRARIES}"
+ ""
+ )
+ endif()
+endif ()
+
+if (BLA_VENDOR STREQUAL "FLAME" OR BLA_VENDOR STREQUAL "All")
+ if(NOT LAPACK_LIBRARIES)
+ check_lapack_libraries(
+ LAPACK_LIBRARIES
+ LAPACK
+ cheev
+ ""
+ "flame"
+ "${BLAS_LIBRARIES}"
+ ""
+ )
+ endif()
+endif ()
+
+#acml lapack
+if (BLA_VENDOR MATCHES "ACML" OR BLA_VENDOR STREQUAL "All")
+ if (BLAS_LIBRARIES MATCHES ".+acml.+")
+ set (LAPACK_LIBRARIES ${BLAS_LIBRARIES})
+ endif ()
+endif ()
+
+# Apple LAPACK library?
+if (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All")
+ if(NOT LAPACK_LIBRARIES)
+ check_lapack_libraries(
+ LAPACK_LIBRARIES
+ LAPACK
+ cheev
+ ""
+ "Accelerate"
+ "${BLAS_LIBRARIES}"
+ ""
+ )
+ endif()
+endif ()
+if (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All")
+ if ( NOT LAPACK_LIBRARIES )
+ check_lapack_libraries(
+ LAPACK_LIBRARIES
+ LAPACK
+ cheev
+ ""
+ "vecLib"
+ "${BLAS_LIBRARIES}"
+ ""
+ )
+ endif ()
+endif ()
+# Generic LAPACK library?
+if (BLA_VENDOR STREQUAL "Generic" OR
+ BLA_VENDOR STREQUAL "ATLAS" OR
+ BLA_VENDOR STREQUAL "All")
+ if ( NOT LAPACK_LIBRARIES )
+ check_lapack_libraries(
+ LAPACK_LIBRARIES
+ LAPACK
+ cheev
+ ""
+ "lapack"
+ "${BLAS_LIBRARIES}"
+ ""
+ )
+ endif ()
+endif ()
+
+else()
+ message(STATUS "LAPACK requires BLAS")
+endif()
+
+if(BLA_F95)
+ if(LAPACK95_LIBRARIES)
+ set(LAPACK95_FOUND TRUE)
+ else()
+ set(LAPACK95_FOUND FALSE)
+ endif()
+ if(NOT LAPACK_FIND_QUIETLY)
+ if(LAPACK95_FOUND)
+ message(STATUS "A library with LAPACK95 API found.")
+ else()
+ if(LAPACK_FIND_REQUIRED)
+ message(FATAL_ERROR
+ "A required library with LAPACK95 API not found. Please specify library location."
+ )
+ else()
+ message(STATUS
+ "A library with LAPACK95 API not found. Please specify library location."
+ )
+ endif()
+ endif()
+ endif()
+ set(LAPACK_FOUND "${LAPACK95_FOUND}")
+ set(LAPACK_LIBRARIES "${LAPACK95_LIBRARIES}")
+else()
+ if(LAPACK_LIBRARIES)
+ set(LAPACK_FOUND TRUE)
+ else()
+ set(LAPACK_FOUND FALSE)
+ endif()
+
+ if(NOT LAPACK_FIND_QUIETLY)
+ if(LAPACK_FOUND)
+ message(STATUS "A library with LAPACK API found.")
+ else()
+ if(LAPACK_FIND_REQUIRED)
+ message(FATAL_ERROR
+ "A required library with LAPACK API not found. Please specify library location."
+ )
+ else()
+ message(STATUS
+ "A library with LAPACK API not found. Please specify library location."
+ )
+ endif()
+ endif()
+ endif()
+endif()
+
+cmake_pop_check_state()
+set(CMAKE_FIND_LIBRARY_SUFFIXES ${_lapack_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
diff --git a/cmake/FindNvToolExt.cmake b/cmake/FindNvToolExt.cmake
new file mode 100644
index 00000000000..5f2998e442a
--- /dev/null
+++ b/cmake/FindNvToolExt.cmake
@@ -0,0 +1,35 @@
+# The following variables are optionally searched for defaults
+# NvToolExt_ROOT_DIR:
+#
+# The following are set after configuration is done:
+# NvToolExt_FOUND
+# NvToolExt_INCLUDE_DIR
+# NvToolExt_LIBRARIES
+# NvToolExt_LIBRARY_DIR
+# NvToolExt: a target
+
+include(FindPackageHandleStandardArgs)
+
+set(NvToolExt_SEARCH_DIRS ${CUDA_TOOLKIT_ROOT_DIR})
+if(WIN32)
+ list(APPEND NvToolExt_SEARCH_DIRS "C:/Program Files/NVIDIA Corporation/NvToolsExt")
+endif()
+set(NvToolExt_SEARCH_DIRS ${NvToolExt_ROOT_DIR} ${NvToolExt_SEARCH_DIRS})
+
+
+find_path(NvToolExt_INCLUDE_DIR nvToolsExt.h HINTS ${NvToolExt_SEARCH_DIRS} PATH_SUFFIXES include)
+
+# 32bit not considered
+set(NvToolExt_LIBNAME nvToolsExt libnvToolsExt.so libnvToolsExt.a libnvToolsExt.so nvToolsExt64_1.lib)
+find_library(NvToolExt_LIBRARIES NAMES ${NvToolExt_LIBNAME} HINTS ${NvToolExt_SEARCH_DIRS}
+ PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)
+
+find_package_handle_standard_args(NvToolExt REQUIRED_VARS NvToolExt_INCLUDE_DIR NvToolExt_LIBRARIES)
+
+add_library(NvToolExt INTERFACE)
+target_include_directories(NvToolExt INTERFACE ${NvToolExt_INCLUDE_DIR})
+# target_link_directories(NvToolExt INTERFACE ${NvToolExt_INCLUDE_DIR})
+target_link_libraries(NvToolExt INTERFACE ${NvToolExt_LIBRARIES})
+
+unset(NvToolExt_SEARCH_DIRS)
+unset(NvToolExt_LIBNAME)
diff --git a/cmake/INSTALL.md b/cmake/INSTALL.md
new file mode 100644
index 00000000000..0082212eb9b
--- /dev/null
+++ b/cmake/INSTALL.md
@@ -0,0 +1,49 @@
+# Install Instruction
+
+Execute following commands in the repo root.
+
+## Build with Old Style Make Generator
+```bash
+mkdir -p build && cd build
+cmake -DCMAKE_INSTALL_PREFIX=../dist .. # configure
+cmake --build . --target install -- -j8 # build && install, substitude -j8 with /m:8 if you are on Windows
+```
+
+## Build with Ninja Generator
+``` bash
+mkdir -p build && cd build
+cmake -GNinja -DCMAKE_INSTALL_PREFIX=../dist ..
+cmake --build . --target install
+```
+
+After built, you can find all installed files in /dist
+
+# For Advance Configuration
+
+Follow options are currently available:
+
+| Variable | Available Options | Default |
+| ---------------------- | ------------------------- | -------- |
+| MATHLIB | OpenBLAS, MKL, Accelerate | OpenBLAS |
+| KALDI_BUILD_EXE | ON,OFF | ON |
+| KALDI_BUILD_TEST | ON,OFF | ON |
+| KALDI_USE_PATCH_NUMBER | ON,OFF | OFF |
+| BUILD_SHARED_LIBS | ON,OFF | OFF |
+
+Append `-D=` to the configure command to use it, e.g.,
+`-DKALDI_BUILD_TEST=OFF` will disable building of test executables. For more
+information, please refers to
+[CMake Documentation](https://cmake.org/cmake/help/latest/manual/cmake.1.html).
+For quick learning CMake usage, LLVM's short introuction will do the trick:
+[Basic CMake usage](https://llvm.org/docs/CMake.html#usage),
+[Options and variables](https://llvm.org/docs/CMake.html#options-and-variables),
+[Frequently-used CMake variables](https://llvm.org/docs/CMake.html#frequently-used-cmake-variables).
+
+NOTE 1: Currently, BUILD_SHARED_LIBS does not work on Windows due to some symbols
+ (variables) are not properly exported.
+
+NOTE 2: For scripts users, since you are doing an out of source build, and the
+ install destination is at your disposal, the `$PATH` is not configured
+ properly in this case. Scripts will not work out of box. See how `$PATH`
+ is modified in [path.sh](../egs/wsj/s5/path.sh). You should add
+ `/bin` to your `$PATH` before running any scripts.
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
new file mode 100644
index 00000000000..c7f45827a99
--- /dev/null
+++ b/cmake/Utils.cmake
@@ -0,0 +1,50 @@
+if(NOT CMAKE_VERSION VERSION_LESS "3.10")
+ include_guard()
+endif()
+
+# For Windows, some env or vars are using backward slash for pathes, convert
+# them to forward slashes will fix some nasty problem in CMake.
+macro(normalize_path in_path)
+ file(TO_CMAKE_PATH "${${in_path}}" normalize_path_out_path)
+ set(${in_path} "${normalize_path_out_path}")
+ unset(normalize_path_out_path)
+endmacro()
+
+macro(normalize_env_path in_path)
+ file(TO_CMAKE_PATH "$${in_path}" normalize_env_path_out_path)
+ set(${in_path} "${normalize_env_path_out_path}")
+ unset(normalize_env_path_out_path)
+endmacro()
+
+
+macro(add_kaldi_executable)
+ if(${KALDI_BUILD_EXE})
+ cmake_parse_arguments(kaldi_exe "" "NAME" "SOURCES;DEPENDS" ${ARGN})
+ add_executable(${kaldi_exe_NAME} ${kaldi_exe_SOURCES})
+ target_link_libraries(${kaldi_exe_NAME} PRIVATE ${kaldi_exe_DEPENDS})
+ # list(APPEND KALDI_EXECUTABLES ${kaldi_exe_NAME})
+ install(TARGETS ${kaldi_exe_NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+ unset(kaldi_exe_NAME)
+ unset(kaldi_exe_SOURCES)
+ unset(kaldi_exe_DEPENDS)
+ endif()
+endmacro()
+
+macro(add_kaldi_test_executable)
+ if(${KALDI_BUILD_TEST})
+ cmake_parse_arguments(kaldi_test_exe "" "NAME" "SOURCES;DEPENDS" ${ARGN})
+ add_executable(${kaldi_test_exe_NAME} ${kaldi_test_exe_SOURCES})
+ target_link_libraries(${kaldi_test_exe_NAME} PRIVATE ${kaldi_test_exe_DEPENDS})
+ add_test(
+ NAME ${kaldi_test_exe_NAME}
+ COMMAND ${kaldi_test_exe_NAME}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
+ # list(APPEND KALDI_TEST_EXECUTABLES ${kaldi_test_exe_NAME})
+ install(TARGETS ${kaldi_test_exe_NAME} RUNTIME DESTINATION testbin)
+
+ unset(kaldi_test_exe_NAME)
+ unset(kaldi_test_exe_SOURCES)
+ unset(kaldi_test_exe_DEPENDS)
+ endif()
+endmacro()
diff --git a/cmake/VersionHelper.cmake b/cmake/VersionHelper.cmake
new file mode 100644
index 00000000000..eb8c6acef23
--- /dev/null
+++ b/cmake/VersionHelper.cmake
@@ -0,0 +1,15 @@
+function(get_version)
+ file(READ ${CMAKE_CURRENT_SOURCE_DIR}/src/.version version)
+ string(STRIP ${version} version)
+ execute_process(COMMAND git log -n1 --format=%H src/.version
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ OUTPUT_VARIABLE version_commit
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+ execute_process(COMMAND git rev-list --count "${version_commit}..HEAD"
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ OUTPUT_VARIABLE patch_number)
+ string(STRIP ${patch_number} patch_number)
+
+ set(KALDI_VERSION ${version} PARENT_SCOPE)
+ set(KALDI_PATCH_NUMBER ${patch_number} PARENT_SCOPE)
+endfunction()
diff --git a/cmake/gen_cmake_skeleton.py b/cmake/gen_cmake_skeleton.py
new file mode 100644
index 00000000000..6492f36f7c8
--- /dev/null
+++ b/cmake/gen_cmake_skeleton.py
@@ -0,0 +1,357 @@
+import os
+import sys
+import re
+import fnmatch
+import argparse
+
+# earily parse, will refernece args globally
+parser = argparse.ArgumentParser()
+parser.add_argument("working_dir")
+parser.add_argument("--quiet", default=False, action="store_true")
+args = parser.parse_args()
+
+def print_wrapper(*args_, **kwargs):
+ if not args.quiet:
+ print(*args_, **kwargs)
+
+def get_subdirectories(d):
+ return [name for name in os.listdir(d) if os.path.isdir(os.path.join(d, name))]
+
+def is_bin_dir(d):
+ return d.endswith("bin")
+
+def get_files(d):
+ return [name for name in os.listdir(d) if os.path.isfile(os.path.join(d, name))]
+
+def is_header(f):
+ return f.endswith(".h")
+
+def is_cu_source(f):
+ return f.endswith(".cu")
+
+def is_test_source(f):
+ return f.endswith("-test.cc")
+
+def is_source(f):
+ return f.endswith(".cc") and not is_test_source(f)
+
+def lib_dir_name_to_lib_target(dir_name):
+ return "kaldi-" + dir_name
+
+def bin_dir_name_to_lib_target(dir_name):
+ """return the primary lib target for all executable targets in this bin dir"""
+ assert is_bin_dir(dir_name)
+ if dir_name == "bin":
+ # NOTE: "kaldi-util" might be a more strict primary lib target...
+ return "kaldi-hmm"
+ elif dir_name == "fstbin":
+ return "kaldi-fstext"
+ else:
+ return "kaldi-" + dir_name[:-3]
+
+def wrap_notwin32_condition(should_wrap, lines):
+ if isinstance(lines, str):
+ lines = [lines]
+ if should_wrap:
+ return ["if(NOT WIN32)"] + list(map(lambda l: " " + l, lines)) + ["endif()"]
+ else:
+ return lines
+
+
+def get_exe_additional_depends(t):
+ additional = {
+ # solve bin
+ "align-*": ["decoder"],
+ "compile-*graph*": ["decoder"],
+ "decode-faster": ["decoder"],
+ "latgen-faster-mapped": ["decoder"],
+ "latgen-faster-mapped-parallel": ["decoder"],
+ "latgen-incremental-mapped": ["decoder"],
+ "decode-faster-mapped": ["decoder"],
+ "sum-lda-accs": ["transform"],
+ "sum-mllt-accs": ["transform"],
+ "est-mllt": ["transform"],
+ "est-lda": ["transform"],
+ "acc-lda": ["transform"],
+ "build-pfile-from-ali": ["gmm"],
+ "make-*-transducer": ["fstext"],
+ "phones-to-prons": ["fstext"],
+
+ # solve gmmbin
+ "post-to-feats" : ["hmm"],
+ "append-post-to-feats" : ["hmm"],
+ "gmm-*": ["hmm", "transform"],
+ "gmm-latgen-*": ["decoder"],
+ "gmm-decode-*": ["decoder"],
+ "gmm-align": ["decoder"],
+ "gmm-align-compiled": ["decoder"],
+ "gmm-est-fmllr-gpost": ["sgmm2", "hmm"],
+ "gmm-rescore-lattice": ["hmm", "lat"],
+
+ # solve fstbin
+ "make-grammar-fst": ["decoder"],
+
+ # solve sgmm2bin
+ "sgmm2-*": ["hmm"],
+ "sgmm2-latgen-faster*": ["decoder"],
+ "sgmm2-align-compiled": ["decoder"],
+ "sgmm2-rescore-lattice": ["lat"],
+ "init-ubm": ["hmm"],
+
+ # solve nnetbin
+ "nnet-train-mmi-sequential": ["lat"],
+ "nnet-train-mpe-sequential": ["lat"],
+
+ # solve nnet2bin
+ "nnet-latgen-faster*": ["fstext", "decoder"],
+ "nnet-align-compiled": ["decoder"],
+ "nnet1-to-raw-nnet": ["nnet"],
+
+ # solve chainbin
+ "nnet3-chain-*": ["nnet3"],
+
+ # solve latbin
+ "lattice-compose": ["fstext"],
+ "lattice-lmrescore": ["fstext"],
+ "lattice-lmrescore-*": ["fstext", "rnnlm"],
+
+ # solve ivectorbin
+ "ivector-extract*": ["hmm"],
+
+ # solve kwsbin
+ "generate-proxy-keywords": ["fstext"],
+ "transcripts-to-fsts": ["fstext"],
+ }
+ l = []
+ for pattern in additional.keys():
+ if fnmatch.fnmatch(t, pattern):
+ l.extend(list(map(lambda name: lib_dir_name_to_lib_target(name), additional[pattern])))
+ return sorted(list(set(l)))
+
+def disable_for_win32(t):
+ disabled = [
+ "online-audio-client",
+ "online-net-client",
+ "online2-tcp-nnet3-decode-faster",
+ "online-server-gmm-decode-faster",
+ "online-audio-server-decode-faster"
+ ]
+ return t in disabled
+
+class CMakeListsHeaderLibrary(object):
+ def __init__(self, dir_name):
+ self.dir_name = dir_name
+ self.target_name = lib_dir_name_to_lib_target(self.dir_name)
+ self.header_list = []
+
+ def add_header(self, filename):
+ self.header_list.append(filename)
+
+ def add_source(self, filename):
+ pass
+
+ def add_cuda_source(self, filename):
+ pass
+
+ def add_test_source(self, filename):
+ pass
+
+ def gen_code(self):
+ ret = []
+ if len(self.header_list) > 0:
+ ret.append("set(PUBLIC_HEADERS")
+ for f in self.header_list:
+ ret.append(" " + f)
+ ret.append(")\n")
+
+ ret.append("add_library(" + self.target_name + " INTERFACE)")
+ ret.append("target_include_directories(" + self.target_name + " INTERFACE ")
+ ret.append(" $")
+ ret.append(" $")
+ ret.append(")\n")
+
+ ret.append("""
+install(TARGETS {tgt} EXPORT kaldi-targets)
+
+install(FILES ${{PUBLIC_HEADERS}} DESTINATION include/kaldi/{dir})
+""".format(tgt=self.target_name, dir=self.dir_name))
+
+ return "\n".join(ret)
+
+class CMakeListsLibrary(object):
+
+ def __init__(self, dir_name):
+ self.dir_name = dir_name
+ self.target_name = lib_dir_name_to_lib_target(self.dir_name)
+ self.header_list = []
+ self.source_list = []
+ self.cuda_source_list = []
+ self.test_source_list = []
+ self.depends = []
+
+ def add_header(self, filename):
+ self.header_list.append(filename)
+
+ def add_source(self, filename):
+ self.source_list.append(filename)
+
+ def add_cuda_source(self, filename):
+ self.cuda_source_list.append(filename)
+
+ def add_test_source(self, filename):
+ self.test_source_list.append(filename)
+
+ def load_dependency_from_makefile(self, filename):
+ with open(filename) as f:
+ makefile = f.read()
+ if "ADDLIBS" not in makefile:
+ print_wrapper("WARNING: non-standard", filename)
+ return
+ libs = makefile.split("ADDLIBS")[-1].split("\n\n")[0]
+ libs = re.findall("[^\s\\\\=]+", libs)
+ for l in libs:
+ self.depends.append(os.path.splitext(os.path.basename(l))[0])
+
+ def gen_code(self):
+ ret = []
+
+ if len(self.header_list) > 0:
+ ret.append("set(PUBLIC_HEADERS")
+ for f in self.header_list:
+ ret.append(" " + f)
+ ret.append(")\n")
+
+ if len(self.cuda_source_list) > 0:
+ self.source_list.append("${CUDA_OBJS}")
+ ret.append("if(CUDA_FOUND)")
+ ret.append(" cuda_include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)")
+ ret.append(" cuda_compile(CUDA_OBJS")
+ for f in self.cuda_source_list:
+ ret.append(" " + f)
+ ret.append(" )")
+ ret.append("endif()\n")
+
+ ret.append("add_library(" + self.target_name)
+ for f in self.source_list:
+ ret.append(" " + f)
+ ret.append(")\n")
+ ret.append("target_include_directories(" + self.target_name + " PUBLIC ")
+ ret.append(" $")
+ ret.append(" $")
+ ret.append(")\n")
+
+ if len(self.depends) > 0:
+ ret.append("target_link_libraries(" + self.target_name + " PUBLIC")
+ for d in self.depends:
+ ret.append(" " + d)
+ ret.append(")\n")
+
+ def get_test_exe_name(filename):
+ exe_name = os.path.splitext(f)[0]
+ if self.dir_name.startswith("nnet") and exe_name.startswith("nnet"):
+ return self.dir_name + "-" + exe_name.split("-", 1)[1]
+ else:
+ return exe_name
+
+ if len(self.test_source_list) > 0:
+ ret.append("if(KALDI_BUILD_TEST)")
+ for f in self.test_source_list:
+ exe_target = get_test_exe_name(f)
+ depends = (self.target_name + " " + " ".join(get_exe_additional_depends(exe_target))).strip()
+ ret.extend(wrap_notwin32_condition(disable_for_win32(self.target_name),
+ " add_kaldi_test_executable(NAME " + exe_target + " SOURCES " + f + " DEPENDS " + depends + ")"))
+ ret.append("endif()")
+
+ ret.append("""
+install(TARGETS {tgt}
+ EXPORT kaldi-targets
+ ARCHIVE DESTINATION ${{CMAKE_INSTALL_LIBDIR}}
+ LIBRARY DESTINATION ${{CMAKE_INSTALL_LIBDIR}}
+ RUNTIME DESTINATION ${{CMAKE_INSTALL_BINDIR}}
+)
+
+install(FILES ${{PUBLIC_HEADERS}} DESTINATION include/kaldi/{dir})
+""".format(tgt=self.target_name, dir=self.dir_name))
+
+ return "\n".join(ret)
+
+
+
+class CMakeListsExecutable(object):
+
+ def __init__(self, dir_name, filename):
+ assert(dir_name.endswith("bin"))
+ self.list = []
+ exe_name = os.path.splitext(os.path.basename(filename))[0]
+ file_name = filename
+ depend = bin_dir_name_to_lib_target(dir_name)
+ self.list.append((exe_name, file_name, depend))
+
+ def gen_code(self):
+ ret = []
+ for exe_name, file_name, depend in self.list:
+ depends = (depend + " " + " ".join(get_exe_additional_depends(exe_name))).strip()
+ ret.extend(wrap_notwin32_condition(disable_for_win32(exe_name),
+ "add_kaldi_executable(NAME " + exe_name + " SOURCES " + file_name + " DEPENDS " + depends + ")"))
+
+ return "\n".join(ret)
+
+class CMakeListsFile(object):
+
+ GEN_CMAKE_HEADER = "# generated with cmake/gen_cmake_skeleton.py, DO NOT MODIFY.\n"
+
+ def __init__(self, directory):
+ self.path = os.path.realpath(os.path.join(directory, "CMakeLists.txt"))
+ self.sections = []
+
+ def add_section(self, section):
+ self.sections.append(section)
+
+ def write_file(self):
+ with open(self.path, "w", newline='\n') as f: # good luck for python2
+ f.write(CMakeListsFile.GEN_CMAKE_HEADER)
+ for s in self.sections:
+ code = s.gen_code()
+ f.write(code)
+ f.write("\n")
+ print_wrapper(" Writed", self.path)
+
+
+if __name__ == "__main__":
+ os.chdir(args.working_dir)
+ print_wrapper("Working in ", args.working_dir)
+
+ subdirs = get_subdirectories(".")
+ for d in subdirs:
+ if d.startswith('tfrnnlm'):
+ continue
+ cmakelists = CMakeListsFile(d)
+ if is_bin_dir(d):
+ for f in get_files(d):
+ if is_source(f):
+ dir_name = os.path.basename(d)
+ filename = os.path.basename(f)
+ exe = CMakeListsExecutable(dir_name, filename)
+ cmakelists.add_section(exe)
+ else:
+ dir_name = os.path.basename(d)
+ lib = None
+ makefile = os.path.join(d, "Makefile")
+ if not os.path.exists(makefile):
+ lib = CMakeListsHeaderLibrary(dir_name)
+ else:
+ lib = CMakeListsLibrary(dir_name)
+ lib.load_dependency_from_makefile(makefile)
+ cmakelists.add_section(lib)
+ for f in sorted(get_files(d)):
+ filename = os.path.basename(f)
+ if is_source(filename):
+ lib.add_source(filename)
+ elif is_cu_source(filename):
+ lib.add_cuda_source(filename)
+ elif is_test_source(filename):
+ lib.add_test_source(filename)
+ elif is_header(filename):
+ lib.add_header(filename)
+
+ cmakelists.write_file()
diff --git a/cmake/kaldi-config.cmake.in b/cmake/kaldi-config.cmake.in
new file mode 100644
index 00000000000..123f58c5699
--- /dev/null
+++ b/cmake/kaldi-config.cmake.in
@@ -0,0 +1,7 @@
+@PACKAGE_INIT@
+
+find_package(Threads)
+
+if(NOT TARGET kaldi-base)
+ include(${CMAKE_CURRENT_LIST_DIR}/kaldi-targets.cmake)
+endif()
diff --git a/cmake/third_party/get_third_party.cmake b/cmake/third_party/get_third_party.cmake
new file mode 100644
index 00000000000..8e24dc9f643
--- /dev/null
+++ b/cmake/third_party/get_third_party.cmake
@@ -0,0 +1,20 @@
+# Download and unpack a third-party library at configure time
+# The original code is at the README of google-test:
+# https://github.com/google/googletest/tree/master/googletest
+function(get_third_party name)
+ configure_file(
+ "${PROJECT_SOURCE_DIR}/cmake/third_party/${name}.cmake"
+ "${CMAKE_CURRENT_BINARY_DIR}/${name}-download/CMakeLists.txt")
+ execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
+ RESULT_VARIABLE result
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${name}-download")
+ if(result)
+ message(FATAL_ERROR "CMake step for ${name} failed: ${result}")
+ endif()
+ execute_process(COMMAND ${CMAKE_COMMAND} --build .
+ RESULT_VARIABLE result
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${name}-download")
+ if(result)
+ message(FATAL_ERROR "Build step for ${name} failed: ${result}")
+ endif()
+endfunction()
diff --git a/cmake/third_party/openfst.cmake b/cmake/third_party/openfst.cmake
new file mode 100644
index 00000000000..19a7f527f8f
--- /dev/null
+++ b/cmake/third_party/openfst.cmake
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 2.8.2)
+project(openfst-download NONE)
+
+include(ExternalProject)
+ExternalProject_Add(openfst
+ GIT_REPOSITORY https://github.com/kkm000/openfst
+ GIT_TAG 0bca6e76d24647427356dc242b0adbf3b5f1a8d9 # tag win/1.7.2.1
+ SOURCE_DIR "${CMAKE_BINARY_DIR}/openfst"
+ BINARY_DIR ""
+ CONFIGURE_COMMAND ""
+ BUILD_COMMAND ""
+ INSTALL_COMMAND ""
+ TEST_COMMAND ""
+)
diff --git a/cmake/third_party/openfst_lib_target.cmake b/cmake/third_party/openfst_lib_target.cmake
new file mode 100644
index 00000000000..dde5efc402a
--- /dev/null
+++ b/cmake/third_party/openfst_lib_target.cmake
@@ -0,0 +1,31 @@
+if(NOT OPENFST_ROOT_DIR)
+ message(FATAL_ERROR)
+endif()
+
+set(fst_source_dir ${OPENFST_ROOT_DIR}/src/lib)
+set(fst_include_dir ${OPENFST_ROOT_DIR}/src/include)
+
+include_directories(${fst_include_dir})
+file(GLOB fst_sources "${fst_source_dir}/*.cc")
+
+add_library(fst ${fst_sources})
+target_include_directories(fst PUBLIC
+ $
+ $
+)
+
+install(TARGETS fst
+ EXPORT kaldi-targets
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
+
+install(DIRECTORY ${fst_include_dir}/fst
+ DESTINATION include/openfst
+ PATTERN "test/*.h" EXCLUDE
+)
+
+unset(fst_source_dir)
+unset(fst_include_dir)
+unset(fst_sources)
diff --git a/egs/aidatatang_200zh/s5/local/chain/compare_wer.sh b/egs/aidatatang_200zh/s5/local/chain/compare_wer.sh
index 71e6fbe106d..c365a8ab780 100755
--- a/egs/aidatatang_200zh/s5/local/chain/compare_wer.sh
+++ b/egs/aidatatang_200zh/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Emotech LTD (Author: Xuechen Liu)
# compare wer between diff. models in aidatatang_200zh chain directory
diff --git a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh
index 0be0e2c79c6..9af9622d301 100644
--- a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on run_tdnn_7h.sh in swbd chain recipe.
diff --git a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh
index 78dd4000e58..0aead9a7103 100644
--- a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh
+++ b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on run_tdnn_1a.sh.
# This setup used online pitch to train the neural network.
diff --git a/egs/aidatatang_200zh/s5/local/data_prep.sh b/egs/aidatatang_200zh/s5/local/data_prep.sh
index bb278a7d904..1e4bf127b28 100644
--- a/egs/aidatatang_200zh/s5/local/data_prep.sh
+++ b/egs/aidatatang_200zh/s5/local/data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Xingyu Na
# Apache 2.0
diff --git a/egs/aidatatang_200zh/s5/local/download_and_untar.sh b/egs/aidatatang_200zh/s5/local/download_and_untar.sh
index 39f9ac01ff7..1056ead6d1a 100644
--- a/egs/aidatatang_200zh/s5/local/download_and_untar.sh
+++ b/egs/aidatatang_200zh/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# 2017 Xingyu Na
diff --git a/egs/aidatatang_200zh/s5/local/format_data.sh b/egs/aidatatang_200zh/s5/local/format_data.sh
index 47af9dd9dfd..2198bae3fe7 100644
--- a/egs/aidatatang_200zh/s5/local/format_data.sh
+++ b/egs/aidatatang_200zh/s5/local/format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
. ./path.sh
diff --git a/egs/aidatatang_200zh/s5/local/nnet3/compare_wer.sh b/egs/aidatatang_200zh/s5/local/nnet3/compare_wer.sh
index 2d85626c356..35c1330aab4 100755
--- a/egs/aidatatang_200zh/s5/local/nnet3/compare_wer.sh
+++ b/egs/aidatatang_200zh/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Emotech LTD (Author: Xuechen Liu)
# compare wer between diff. models in aidatatang_200zh nnet3 directory
diff --git a/egs/aidatatang_200zh/s5/local/nnet3/run_ivector_common.sh b/egs/aidatatang_200zh/s5/local/nnet3/run_ivector_common.sh
index 0fe55ecf000..f3ed8623495 100644
--- a/egs/aidatatang_200zh/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/aidatatang_200zh/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/aidatatang_200zh/s5/local/nnet3/tuning/run_tdnn.sh b/egs/aidatatang_200zh/s5/local/nnet3/tuning/run_tdnn.sh
index 2bcded42ed1..ca396e50542 100644
--- a/egs/aidatatang_200zh/s5/local/nnet3/tuning/run_tdnn.sh
+++ b/egs/aidatatang_200zh/s5/local/nnet3/tuning/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh
diff --git a/egs/aidatatang_200zh/s5/local/prepare_dict.sh b/egs/aidatatang_200zh/s5/local/prepare_dict.sh
index aa72bcd48d2..8096c45be34 100644
--- a/egs/aidatatang_200zh/s5/local/prepare_dict.sh
+++ b/egs/aidatatang_200zh/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#Copyright 2016 LeSpeech (Author: Xingyu Na)
# prepare dictionary for aidatatang
diff --git a/egs/aidatatang_200zh/s5/local/score.sh b/egs/aidatatang_200zh/s5/local/score.sh
index a9786169973..d283ceb68dc 100644
--- a/egs/aidatatang_200zh/s5/local/score.sh
+++ b/egs/aidatatang_200zh/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
set -x
diff --git a/egs/aidatatang_200zh/s5/local/train_lms.sh b/egs/aidatatang_200zh/s5/local/train_lms.sh
index bc52f8acb20..96da93d3e9f 100644
--- a/egs/aidatatang_200zh/s5/local/train_lms.sh
+++ b/egs/aidatatang_200zh/s5/local/train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
diff --git a/egs/aidatatang_200zh/s5/run.sh b/egs/aidatatang_200zh/s5/run.sh
index 47e46a660cd..3bd20469006 100644
--- a/egs/aidatatang_200zh/s5/run.sh
+++ b/egs/aidatatang_200zh/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 Beijing DataTang Tech. Co. Ltd. (Author: Liyuan Wang)
# 2017 Hui Bu
diff --git a/egs/aishell/s5/local/aishell_data_prep.sh b/egs/aishell/s5/local/aishell_data_prep.sh
index 4747e4f4d82..3be62708db2 100755
--- a/egs/aishell/s5/local/aishell_data_prep.sh
+++ b/egs/aishell/s5/local/aishell_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Xingyu Na
# Apache 2.0
diff --git a/egs/aishell/s5/local/aishell_prepare_dict.sh b/egs/aishell/s5/local/aishell_prepare_dict.sh
index c4cabb24de4..28ab5e2122f 100755
--- a/egs/aishell/s5/local/aishell_prepare_dict.sh
+++ b/egs/aishell/s5/local/aishell_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Xingyu Na
# Apache 2.0
diff --git a/egs/aishell/s5/local/aishell_train_lms.sh b/egs/aishell/s5/local/aishell_train_lms.sh
index 9b6cdad2960..eaca5e2fafa 100755
--- a/egs/aishell/s5/local/aishell_train_lms.sh
+++ b/egs/aishell/s5/local/aishell_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
diff --git a/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh
index b38fa4d9c7a..79b2023ab7e 100755
--- a/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on run_tdnn_7h.sh in swbd chain recipe.
diff --git a/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh
index 6b7223785d9..669a014e8cf 100755
--- a/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh
+++ b/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on run_tdnn_1a.sh.
# This setup used online pitch to train the neural network.
diff --git a/egs/aishell/s5/local/download_and_untar.sh b/egs/aishell/s5/local/download_and_untar.sh
index 58a278241d7..9c70836bf46 100755
--- a/egs/aishell/s5/local/download_and_untar.sh
+++ b/egs/aishell/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# 2017 Xingyu Na
diff --git a/egs/aishell/s5/local/nnet3/run_ivector_common.sh b/egs/aishell/s5/local/nnet3/run_ivector_common.sh
index af0ae122372..8f73deb145b 100755
--- a/egs/aishell/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/aishell/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh b/egs/aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh
index 3cb8cd861a3..db434b2b24b 100755
--- a/egs/aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on swbd/s5c/local/nnet3/run_tdnn.sh
diff --git a/egs/aishell/s5/local/nnet3/tuning/run_tdnn_2a.sh b/egs/aishell/s5/local/nnet3/tuning/run_tdnn_2a.sh
index 603149585f2..a5b129be31c 100755
--- a/egs/aishell/s5/local/nnet3/tuning/run_tdnn_2a.sh
+++ b/egs/aishell/s5/local/nnet3/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on aishell/s5/local/nnet3/tuning/run_tdnn_1a.sh
diff --git a/egs/aishell/s5/local/score.sh b/egs/aishell/s5/local/score.sh
index a9786169973..d283ceb68dc 100755
--- a/egs/aishell/s5/local/score.sh
+++ b/egs/aishell/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
set -x
diff --git a/egs/aishell/s5/run.sh b/egs/aishell/s5/run.sh
index a99cb51c656..66c85a3f82d 100755
--- a/egs/aishell/s5/run.sh
+++ b/egs/aishell/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Beijing Shell Shell Tech. Co. Ltd. (Authors: Hui Bu)
# 2017 Jiayu Du
@@ -141,5 +141,6 @@ local/chain/run_tdnn.sh
# getting results (see RESULTS file)
for x in exp/*/decode_test; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done 2>/dev/null
+for x in exp/*/*/decode_test; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done 2>/dev/null
exit 0;
diff --git a/egs/aishell/v1/local/aishell_data_prep.sh b/egs/aishell/v1/local/aishell_data_prep.sh
index 11d131dcdb1..022276cf2b6 100755
--- a/egs/aishell/v1/local/aishell_data_prep.sh
+++ b/egs/aishell/v1/local/aishell_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Xingyu Na
# Apache 2.0
diff --git a/egs/aishell/v1/local/download_and_untar.sh b/egs/aishell/v1/local/download_and_untar.sh
index 3578a1c0835..b0636a8cd86 100755
--- a/egs/aishell/v1/local/download_and_untar.sh
+++ b/egs/aishell/v1/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# 2017 Xingyu Na
diff --git a/egs/aishell/v1/run.sh b/egs/aishell/v1/run.sh
index 0aaa6d493d6..b16939bd37a 100755
--- a/egs/aishell/v1/run.sh
+++ b/egs/aishell/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Beijing Shell Shell Tech. Co. Ltd. (Authors: Hui Bu)
# 2017 Jiayu Du
# 2017 Chao Li
diff --git a/egs/aishell2/s5/local/chain/compare_wer.sh b/egs/aishell2/s5/local/chain/compare_wer.sh
index c66a861c3f3..e5730df9848 100755
--- a/egs/aishell2/s5/local/chain/compare_wer.sh
+++ b/egs/aishell2/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Emotech LTD (Author: Xuechen LIU)
# Apache 2.0
diff --git a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh
index 86c9becac5b..c1cc56ea3c6 100755
--- a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is the original baseline scripts, which is supposed to be deprecated.
diff --git a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh
index d8560e63909..f1bfaf8d373 100755
--- a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# _1b is as _1a, but with pitch feats, i-vector and dropout schedule added, referenced from wsj
diff --git a/egs/aishell2/s5/local/nnet3/compare_wer.sh b/egs/aishell2/s5/local/nnet3/compare_wer.sh
index 84dda2fda14..66c1f640704 100755
--- a/egs/aishell2/s5/local/nnet3/compare_wer.sh
+++ b/egs/aishell2/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Emotech LTD (Author: Xuechen LIU)
# Apache 2.0
diff --git a/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1a.sh b/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1a.sh
index 34ca1f0f224..130aee3cb5e 100755
--- a/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on swbd/s5c/local/nnet3/run_tdnn.sh
diff --git a/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1b.sh b/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1b.sh
index ea3a59e90ee..a6fa46f1444 100755
--- a/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1b.sh
+++ b/egs/aishell2/s5/local/nnet3/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on run_tdnn_1a.sh, but with pitch features applied
diff --git a/egs/aishell2/s5/local/prepare_all.sh b/egs/aishell2/s5/local/prepare_all.sh
index 3928eb95ca3..b9b9bb271ec 100755
--- a/egs/aishell2/s5/local/prepare_all.sh
+++ b/egs/aishell2/s5/local/prepare_all.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
# 2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
diff --git a/egs/aishell2/s5/local/prepare_data.sh b/egs/aishell2/s5/local/prepare_data.sh
index 4be9664ac31..6e0538155bb 100755
--- a/egs/aishell2/s5/local/prepare_data.sh
+++ b/egs/aishell2/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
# 2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
# Apache 2.0
diff --git a/egs/aishell2/s5/local/prepare_dict.sh b/egs/aishell2/s5/local/prepare_dict.sh
index 56ab885ae94..9df3d73f972 100755
--- a/egs/aishell2/s5/local/prepare_dict.sh
+++ b/egs/aishell2/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
# 2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
# Apache 2.0
diff --git a/egs/aishell2/s5/local/run_gmm.sh b/egs/aishell2/s5/local/run_gmm.sh
index 569e5ab570a..f32dde55348 100755
--- a/egs/aishell2/s5/local/run_gmm.sh
+++ b/egs/aishell2/s5/local/run_gmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
# 2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
# 2018 Emotech LTD (Author: Xuechen LIU)
diff --git a/egs/aishell2/s5/local/score.sh b/egs/aishell2/s5/local/score.sh
index a9786169973..d283ceb68dc 100755
--- a/egs/aishell2/s5/local/score.sh
+++ b/egs/aishell2/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
set -x
diff --git a/egs/aishell2/s5/local/train_lms.sh b/egs/aishell2/s5/local/train_lms.sh
index 179a7b78e14..0efeb2d2fd4 100755
--- a/egs/aishell2/s5/local/train_lms.sh
+++ b/egs/aishell2/s5/local/train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
# 2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
# Apache 2.0
diff --git a/egs/aishell2/s5/run.sh b/egs/aishell2/s5/run.sh
index 8afdd3ed310..ffa4268081b 100755
--- a/egs/aishell2/s5/run.sh
+++ b/egs/aishell2/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
# 2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
diff --git a/egs/ami/s5/local/ami_beamform.sh b/egs/ami/s5/local/ami_beamform.sh
index b5ff8c23ba8..dd4cf22ac7b 100755
--- a/egs/ami/s5/local/ami_beamform.sh
+++ b/egs/ami/s5/local/ami_beamform.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
# Apache 2.0
diff --git a/egs/ami/s5/local/ami_download.sh b/egs/ami/s5/local/ami_download.sh
index cba130c8467..8c48a16ae7a 100755
--- a/egs/ami/s5/local/ami_download.sh
+++ b/egs/ami/s5/local/ami_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski, Jonathan Kilgour)
# Copyright 2015, Brno University of Technology (Author: Karel Vesely)
@@ -56,7 +56,7 @@ wgetfile=$wdir/wget_$mic.sh
manifest="wget --continue -O $adir/MANIFEST.TXT http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-0153-Tue-Oct-2-2018.manifest.txt"
-echo "#!/bin/bash" > $wgetfile
+echo "#!/usr/bin/env bash" > $wgetfile
echo $manifest >> $wgetfile
while read line; do
diff --git a/egs/ami/s5/local/ami_format_data.sh b/egs/ami/s5/local/ami_format_data.sh
index b69583850ab..c8c5ff0946d 100755
--- a/egs/ami/s5/local/ami_format_data.sh
+++ b/egs/ami/s5/local/ami_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/ami/s5/local/ami_ihm_data_prep.sh b/egs/ami/s5/local/ami_ihm_data_prep.sh
index 16949aef9b8..1a31ee0c233 100755
--- a/egs/ami/s5/local/ami_ihm_data_prep.sh
+++ b/egs/ami/s5/local/ami_ihm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
# AMI Corpus training data preparation
diff --git a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh
index 7112e0259a0..b8f9614c907 100755
--- a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh
+++ b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
# AMI Corpus dev/eval data preparation
diff --git a/egs/ami/s5/local/ami_mdm_data_prep.sh b/egs/ami/s5/local/ami_mdm_data_prep.sh
index 22cebd1ea11..427bf49cd0b 100755
--- a/egs/ami/s5/local/ami_mdm_data_prep.sh
+++ b/egs/ami/s5/local/ami_mdm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
# AMI Corpus dev/eval data preparation
diff --git a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh
index 9c4b55308f2..c05e80169c2 100755
--- a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh
+++ b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
# AMI Corpus dev/eval data preparation
diff --git a/egs/ami/s5/local/ami_prepare_dict.sh b/egs/ami/s5/local/ami_prepare_dict.sh
index 1834cfd112b..26f75e83e1d 100755
--- a/egs/ami/s5/local/ami_prepare_dict.sh
+++ b/egs/ami/s5/local/ami_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#adapted from fisher dict preparation script, Author: Pawel Swietojanski
diff --git a/egs/ami/s5/local/ami_sdm_data_prep.sh b/egs/ami/s5/local/ami_sdm_data_prep.sh
index ea92055e089..055dd61aaa2 100755
--- a/egs/ami/s5/local/ami_sdm_data_prep.sh
+++ b/egs/ami/s5/local/ami_sdm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
# AMI Corpus dev/eval data preparation
diff --git a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh
index 815e1b2d270..ec6b7933df7 100755
--- a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh
+++ b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
# AMI Corpus dev/eval data preparation
diff --git a/egs/ami/s5/local/ami_text_prep.sh b/egs/ami/s5/local/ami_text_prep.sh
index 777c3d8b086..eace6dfc1c7 100755
--- a/egs/ami/s5/local/ami_text_prep.sh
+++ b/egs/ami/s5/local/ami_text_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Brno University of Technology (Author: Karel Vesely)
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski), 2014, Apache 2.0
diff --git a/egs/ami/s5/local/ami_train_lms.sh b/egs/ami/s5/local/ami_train_lms.sh
index 493a3edb5da..54ad87880a4 100755
--- a/egs/ami/s5/local/ami_train_lms.sh
+++ b/egs/ami/s5/local/ami_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Arnab Ghoshal, Pawel Swietojanski
diff --git a/egs/ami/s5/local/ami_xml2text.sh b/egs/ami/s5/local/ami_xml2text.sh
index c4b90a33702..6ccf28c12b8 100755
--- a/egs/ami/s5/local/ami_xml2text.sh
+++ b/egs/ami/s5/local/ami_xml2text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright, University of Edinburgh (Pawel Swietojanski and Jonathan Kilgour)
diff --git a/egs/ami/s5/local/beamformit.sh b/egs/ami/s5/local/beamformit.sh
index f50716d8872..563b303ecc0 100755
--- a/egs/ami/s5/local/beamformit.sh
+++ b/egs/ami/s5/local/beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/ami/s5/local/chain/run_blstm_ami_5.sh b/egs/ami/s5/local/chain/run_blstm_ami_5.sh
index 53221a2bd53..aade87d9a61 100755
--- a/egs/ami/s5/local/chain/run_blstm_ami_5.sh
+++ b/egs/ami/s5/local/chain/run_blstm_ami_5.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###
diff --git a/egs/ami/s5/local/chain/run_chain_common.sh b/egs/ami/s5/local/chain/run_chain_common.sh
index a7ed2a8dbab..f74ba71e2a0 100755
--- a/egs/ami/s5/local/chain/run_chain_common.sh
+++ b/egs/ami/s5/local/chain/run_chain_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script has common stages shared across AMI chain recipes
set -e
diff --git a/egs/ami/s5/local/chain/run_tdnn_ami_5.sh b/egs/ami/s5/local/chain/run_tdnn_ami_5.sh
index df635316127..b63987d1534 100755
--- a/egs/ami/s5/local/chain/run_tdnn_ami_5.sh
+++ b/egs/ami/s5/local/chain/run_tdnn_ami_5.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#adapted from swbd's local/chain/6z.sh script. We change the TDNN config
# These are the other modifications:
diff --git a/egs/ami/s5/local/confidence_calibration.sh b/egs/ami/s5/local/confidence_calibration.sh
index d1217afe0d0..87be0061803 100755
--- a/egs/ami/s5/local/confidence_calibration.sh
+++ b/egs/ami/s5/local/confidence_calibration.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
. ./path.sh
diff --git a/egs/ami/s5/local/nnet/prepare_ivectors.sh b/egs/ami/s5/local/nnet/prepare_ivectors.sh
index 5be120d600e..8b62bcc11bb 100755
--- a/egs/ami/s5/local/nnet/prepare_ivectors.sh
+++ b/egs/ami/s5/local/nnet/prepare_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016, Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/ami/s5/local/nnet3/prepare_parallel_datadirs.sh b/egs/ami/s5/local/nnet3/prepare_parallel_datadirs.sh
index cbf47682b1e..189f1b69cd8 100755
--- a/egs/ami/s5/local/nnet3/prepare_parallel_datadirs.sh
+++ b/egs/ami/s5/local/nnet3/prepare_parallel_datadirs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script creates a new data directory data/$new_mic
# where the train, dev and eval directories are copied from $original_mic
diff --git a/egs/ami/s5/local/nnet3/prepare_parallel_perturbed_alignments.sh b/egs/ami/s5/local/nnet3/prepare_parallel_perturbed_alignments.sh
index 458d31c200a..520a2bc9d84 100755
--- a/egs/ami/s5/local/nnet3/prepare_parallel_perturbed_alignments.sh
+++ b/egs/ami/s5/local/nnet3/prepare_parallel_perturbed_alignments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script creates the parallel data dir based on ihm data,
# creates speed perturbed versions of this parallel data
diff --git a/egs/ami/s5/local/nnet3/prepare_perturbed_alignments.sh b/egs/ami/s5/local/nnet3/prepare_perturbed_alignments.sh
index 70c429041ca..f9fe0a85ab3 100755
--- a/egs/ami/s5/local/nnet3/prepare_perturbed_alignments.sh
+++ b/egs/ami/s5/local/nnet3/prepare_perturbed_alignments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script creates speed perturbed versions of the training data
# and generates the corresponding alignments
diff --git a/egs/ami/s5/local/nnet3/run_ivector_common.sh b/egs/ami/s5/local/nnet3/run_ivector_common.sh
index 649f87f33d8..6eedd3df00d 100755
--- a/egs/ami/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/ami/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script contains some common (shared) parts of the run_nnet*.sh scripts.
# speed perturbation is done for the training data
diff --git a/egs/ami/s5/local/nnet3/run_lstm.sh b/egs/ami/s5/local/nnet3/run_lstm.sh
index b920482252a..d0b3aec1a3c 100755
--- a/egs/ami/s5/local/nnet3/run_lstm.sh
+++ b/egs/ami/s5/local/nnet3/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Johns Hopkins University (Author: Daniel Povey).
# 2015 Vijayaditya Peddinti
diff --git a/egs/ami/s5/local/nnet3/run_tdnn.sh b/egs/ami/s5/local/nnet3/run_tdnn.sh
index 2175d3bcc66..6c2dd913670 100755
--- a/egs/ami/s5/local/nnet3/run_tdnn.sh
+++ b/egs/ami/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is the standard "tdnn" system, built in nnet3; it's what we use to
# call multi-splice.
diff --git a/egs/ami/s5/local/online/run_nnet2_common.sh b/egs/ami/s5/local/online/run_nnet2_common.sh
index d03c491f805..ccaeb024e36 100755
--- a/egs/ami/s5/local/online/run_nnet2_common.sh
+++ b/egs/ami/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script contains some common (shared) parts of the run_nnet*.sh scripts.
diff --git a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh
index a6c2d02b7af..0f229fb5e7e 100755
--- a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh
+++ b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (author: Daniel Povey)
# 2014 Tom Ko
diff --git a/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh b/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh
index 9b8d7effd95..7811ef889f6 100755
--- a/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh
+++ b/egs/ami/s5/local/online/run_nnet2_ms_sp_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script does discriminative training on top of the online, multi-splice
diff --git a/egs/ami/s5/local/score.sh b/egs/ami/s5/local/score.sh
index 6a077c39644..9819a0f56cc 100755
--- a/egs/ami/s5/local/score.sh
+++ b/egs/ami/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012
# Copyright University of Edinburgh (Author: Pawel Swietojanski) 2014
diff --git a/egs/ami/s5/local/score_asclite.sh b/egs/ami/s5/local/score_asclite.sh
index 741591005a5..4c937a94ef2 100755
--- a/egs/ami/s5/local/score_asclite.sh
+++ b/egs/ami/s5/local/score_asclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# 2014, University of Edinburgh, (Author: Pawel Swietojanski)
# 2015, Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/ami/s5/local/tfrnnlm/rnnlm_data_prep.sh b/egs/ami/s5/local/tfrnnlm/rnnlm_data_prep.sh
index 3456a77ca55..de5fa8ee7ff 100755
--- a/egs/ami/s5/local/tfrnnlm/rnnlm_data_prep.sh
+++ b/egs/ami/s5/local/tfrnnlm/rnnlm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script prepares the data directory used for TensorFlow based RNNLM traiing
# it prepares the following files in the output-directory
diff --git a/egs/ami/s5/local/tfrnnlm/run_lstm.sh b/egs/ami/s5/local/tfrnnlm/run_lstm.sh
index d68fadb10f3..58986991271 100755
--- a/egs/ami/s5/local/tfrnnlm/run_lstm.sh
+++ b/egs/ami/s5/local/tfrnnlm/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
mic=ihm
ngram_order=4 # this option when used, the rescoring binary makes an approximation
# to merge the states of the FST generated from RNNLM. e.g. if ngram-order = 4
diff --git a/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh b/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
index 4cc71b55b5c..ae4f26e9cc4 100755
--- a/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
+++ b/egs/ami/s5/local/tfrnnlm/run_lstm_fast.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
mic=ihm
ngram_order=3 # this option when used, the rescoring binary makes an approximation
# to merge the states of the FST generated from RNNLM. e.g. if ngram-order = 4
diff --git a/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh b/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh
index 7a95f38ba1e..32b6e0ae2c7 100755
--- a/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh
+++ b/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
mic=ihm
ngram_order=4 # this option when used, the rescoring binary makes an approximation
# to merge the states of the FST generated from RNNLM. e.g. if ngram-order = 4
diff --git a/egs/ami/s5b/local/ami_beamform.sh b/egs/ami/s5b/local/ami_beamform.sh
index 3397bcf2ab0..ea8ec02af52 100755
--- a/egs/ami/s5b/local/ami_beamform.sh
+++ b/egs/ami/s5b/local/ami_beamform.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
# Apache 2.0
diff --git a/egs/ami/s5b/local/ami_download.sh b/egs/ami/s5b/local/ami_download.sh
index ef7b684df2b..bae72d1716a 100755
--- a/egs/ami/s5b/local/ami_download.sh
+++ b/egs/ami/s5b/local/ami_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 University of Edinburgh (Author: Pawel Swietojanski, Jonathan Kilgour)
# 2015 Brno University of Technology (Author: Karel Vesely)
@@ -59,7 +59,7 @@ wgetfile=$wdir/wget_$mic.sh
manifest="wget --continue -O $adir/MANIFEST.TXT http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-04237-Sun-Jun-15-2014.manifest.txt"
license="wget --continue -O $adir/LICENCE.TXT http://groups.inf.ed.ac.uk/ami/download/temp/Creative-Commons-Attribution-NonCommercial-ShareAlike-2.5.txt"
-echo "#!/bin/bash" > $wgetfile
+echo "#!/usr/bin/env bash" > $wgetfile
echo $manifest >> $wgetfile
echo $license >> $wgetfile
while read line; do
diff --git a/egs/ami/s5b/local/ami_format_data.sh b/egs/ami/s5b/local/ami_format_data.sh
index b69583850ab..c8c5ff0946d 100755
--- a/egs/ami/s5b/local/ami_format_data.sh
+++ b/egs/ami/s5b/local/ami_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/ami/s5b/local/ami_ihm_data_prep.sh b/egs/ami/s5b/local/ami_ihm_data_prep.sh
index 8ffa1f1e9c5..04cc6a4a68e 100755
--- a/egs/ami/s5b/local/ami_ihm_data_prep.sh
+++ b/egs/ami/s5b/local/ami_ihm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 University of Edinburgh (Author: Pawel Swietojanski)
# 2016 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh
index c54876331f1..2fe5a9db33d 100755
--- a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh
+++ b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/ami/s5b/local/ami_mdm_data_prep.sh b/egs/ami/s5b/local/ami_mdm_data_prep.sh
index d100347a356..a7a0d4fbb31 100755
--- a/egs/ami/s5b/local/ami_mdm_data_prep.sh
+++ b/egs/ami/s5b/local/ami_mdm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh
index 475ef5405ba..051079b0c1d 100755
--- a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh
+++ b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 University of Edinburgh (Author: Pawel Swietojanski)
# 2016 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/ami/s5b/local/ami_prepare_dict.sh b/egs/ami/s5b/local/ami_prepare_dict.sh
index 1834cfd112b..26f75e83e1d 100755
--- a/egs/ami/s5b/local/ami_prepare_dict.sh
+++ b/egs/ami/s5b/local/ami_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#adapted from fisher dict preparation script, Author: Pawel Swietojanski
diff --git a/egs/ami/s5b/local/ami_sdm_data_prep.sh b/egs/ami/s5b/local/ami_sdm_data_prep.sh
index 327595070a6..9099b3d9a9e 100755
--- a/egs/ami/s5b/local/ami_sdm_data_prep.sh
+++ b/egs/ami/s5b/local/ami_sdm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh
index 580880818fc..d0711b9d71e 100755
--- a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh
+++ b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
# 2016 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/ami/s5b/local/ami_text_prep.sh b/egs/ami/s5b/local/ami_text_prep.sh
index 9170c6729ea..3dbe37a8f4e 100755
--- a/egs/ami/s5b/local/ami_text_prep.sh
+++ b/egs/ami/s5b/local/ami_text_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Brno University of Technology (Author: Karel Vesely)
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski), 2014, Apache 2.0
diff --git a/egs/ami/s5b/local/ami_train_lms.sh b/egs/ami/s5b/local/ami_train_lms.sh
index 104b4ac5dd8..652eb37d20f 100755
--- a/egs/ami/s5b/local/ami_train_lms.sh
+++ b/egs/ami/s5b/local/ami_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Arnab Ghoshal, Pawel Swietojanski
diff --git a/egs/ami/s5b/local/ami_xml2text.sh b/egs/ami/s5b/local/ami_xml2text.sh
index c4b90a33702..6ccf28c12b8 100755
--- a/egs/ami/s5b/local/ami_xml2text.sh
+++ b/egs/ami/s5b/local/ami_xml2text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright, University of Edinburgh (Pawel Swietojanski and Jonathan Kilgour)
diff --git a/egs/ami/s5b/local/beamformit.sh b/egs/ami/s5b/local/beamformit.sh
index f50716d8872..563b303ecc0 100755
--- a/egs/ami/s5b/local/beamformit.sh
+++ b/egs/ami/s5b/local/beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/ami/s5b/local/chain/compare_wer_general.sh b/egs/ami/s5b/local/chain/compare_wer_general.sh
index 73118bf198d..808b26d0fd0 100755
--- a/egs/ami/s5b/local/chain/compare_wer_general.sh
+++ b/egs/ami/s5b/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
mic=$1;
shift;
diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh
index 4d260e3c517..586398ce085 100755
--- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh
+++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on swbd 7q TDNN-F recipe
# with resnet-style skip connections, more layers,
diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh
index 3546b6a7ced..f2ab59abf86 100755
--- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is a chain-training script with TDNN+LSTM neural networks.
# This script is based on local/chain/tuning/run_tdnn_lstm_1i.sh, but adding
diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh
index 1a839b045bd..f5190f2026b 100755
--- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is a chain-training script with TDNN+LSTM neural networks.
# This script is similar to local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh,
diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
index d926c1dc6d7..8f656fa6b82 100644
--- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# cnn_tdnn_lstm_1a is based on tdnn_lstm_1j, but adding the cnn front end, and
# replacing all renorm in tdnn layers with batchnorm
diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh
index d9cd1c356e8..d9c4620f27b 100644
--- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# cnn_tdnn_lstm_1b is based on cnn_tdnn_lstm_1a, but adding dropout and
# proportional-shrink with value 5
diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh
index a0805b4f9f1..5b4cc5b3d4a 100755
--- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# cnn_tdnn_lstm_1c is based on cnn_tdnn_lstm_1b, but using smaller dropout-schedule
# and larger decay-time option(40).
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh
index 03ebc5845e4..6bb2698acb9 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is a chain-training script with TDNN neural networks.
# Please see RESULTS_* for examples of command lines invoking this script.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh
index 997357b80a9..4e2fb5d3070 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is a chain-training script with TDNN neural networks.
# Please see RESULTS_* for examples of command lines invoking this script.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh
index 4d062e65429..6ada60c7047 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1b but with shorter minibatches
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh
index 387570388d0..8d3f1ab95d7 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1b but uses PCA instead of
# LDA features for the ivector extractor.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh
index 0436b08cdc0..b8497c809c4 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1b but uses batchnorm components instead of renorm
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh
index 4ca526d63b8..3e041d4a4cc 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1e but uses batchnorm components instead of renorm also adding
# proportional-shrink 10, trained with 4 epochs
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh
index baed760bb68..dcf1d7b03a4 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1e but uses batchnorm components instead of renorm also adding
# proportional-shrink 10, trained with 6 epochs
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh
index e721a858c0a..aca5b26b69b 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1g but adding two non-splicing layers towards the beginning
# of the network, trained with 9 epochs.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh
index de40cb2d1a4..89390de6690 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1h but replacing proportional-shrink with l2-regularize.
# The results match those from 1h.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh
index 80b2aee60e9..c6a12b1f4f9 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1j is same as swbd 7q. It uses modified topology with resnet-style skip connections, more layers,
# skinnier bottlenecks.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
index 4f580b88f6b..f6c65cc0826 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# TDNN+LSTM architecture similar to swbd/tdnn_lstm_1b
# results on sdm1 with ihm ali
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh
index 904a079d7de..3576179baa9 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1a but the neural network has two more TDNN layers (0,3 0,3)
# above the lstm
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh
index 511e520465a..dc2705ca577 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1a, but with more TDNN layers between each LSTM
# results on sdm1 with ihm ali
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh
index bd81b7df4eb..0a30490c9e7 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1c, but with more TDNN layers between each LSTM
# results on sdm1 with ihm ali
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh
index 50903e78b6d..f41b83fd448 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1c but with only right context for the TDNNs i.e., (0,3) in place
# of (-3,0,3)
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh
index f6c53001498..78653cd867b 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1a but the neural network has two more TDNN layers (0,3 0,3)
# above the lstm
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh
index 79fd9ef3fb5..8cd21a28715 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1c but with smaller minibatch
# using smaller minibatches seems to be better in TDNN+LSTM archs.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh
index e58a7f89e03..e0ac6fc7e8f 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1c but with one more stack of TDNN and LSTM layers
# results on sdm1 using ihm ali
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh
index 13f894f5a48..b567bcb4527 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1g but with TDNN output dim 1024 instead of 512
# (num-params 1g:21309812 1i: 43447156)
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh
index 48b31832e8c..806b305a847 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1j is same as 1i but with changes related to fast-lstmp layer
# changed num-chunk-per-minibatch to be variable
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh
index e675bc494bb..63430d903c1 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1k is same as 1j but with smaller delay on the first lstm layer
# there is a 37% increase in training time 11hrs vs 8hrs and the gains are modest
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh
index 2d019398274..5ab8333b043 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This (1l.sh) is the same as 1i but with per-frame dropout on LSTM layer
# It is a regular (non-fast) LSTM with per-frame dropout on [i, f, o] gates of the LSTM,
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh
index 9e5b971bbe2..a86bab5055a 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This (1m.sh) is the same as 1j but with per-frame dropout on LSTM layer
# It is a fast LSTM with per-frame dropout on [i, f, o] gates of the LSTM,
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh
index 9575c3cf686..ab3354675e1 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1i but with batchnorm replacing all renorm in TDNN
# and using proportional-shrink with value 10, this model uses
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh
index a7f2625c181..c260601be75 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as 1n but replacing proportional-shrink with l2-regularize.
# Also applied similar changes from 1i to 1j:
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh
index ca920869b30..58c11f4238e 100755
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# same as tdnn_lstm_1o but use backstitch training.
# Also num-epochs and l2-regularize are tuned for best performance.
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh
index 53dbd5238db..9fd2006aa03 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh
index dafef668e60..3948cf39566 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh
index 677946d0b9a..5bc025d90ef 100644
--- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh
+++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/ami/s5b/local/nnet3/multi_condition/run_ivector_common.sh b/egs/ami/s5b/local/nnet3/multi_condition/run_ivector_common.sh
index 5ba35fa421c..1112a7a2968 100755
--- a/egs/ami/s5b/local/nnet3/multi_condition/run_ivector_common.sh
+++ b/egs/ami/s5b/local/nnet3/multi_condition/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/ami/s5b/local/nnet3/prepare_lores_feats.sh b/egs/ami/s5b/local/nnet3/prepare_lores_feats.sh
index efa0046bd62..b98abe32eca 100755
--- a/egs/ami/s5b/local/nnet3/prepare_lores_feats.sh
+++ b/egs/ami/s5b/local/nnet3/prepare_lores_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/ami/s5b/local/nnet3/run_ivector_common.sh b/egs/ami/s5b/local/nnet3/run_ivector_common.sh
index e67d1039c40..7da982d49f9 100755
--- a/egs/ami/s5b/local/nnet3/run_ivector_common.sh
+++ b/egs/ami/s5b/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/ami/s5b/local/nnet3/run_lstm.sh b/egs/ami/s5b/local/nnet3/run_lstm.sh
index c5583e2d0ef..9b544706d36 100755
--- a/egs/ami/s5b/local/nnet3/run_lstm.sh
+++ b/egs/ami/s5b/local/nnet3/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is the standard "lstm" system, built in nnet3.
# Please see RESULTS_* for examples of command lines invoking this script.
diff --git a/egs/ami/s5b/local/nnet3/run_tdnn.sh b/egs/ami/s5b/local/nnet3/run_tdnn.sh
index cc6b60696b1..a2af870c8a1 100755
--- a/egs/ami/s5b/local/nnet3/run_tdnn.sh
+++ b/egs/ami/s5b/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is the standard "tdnn" system, built in nnet3.
# Please see RESULTS_* for examples of command lines invoking this script.
diff --git a/egs/ami/s5b/local/prepare_parallel_train_data.sh b/egs/ami/s5b/local/prepare_parallel_train_data.sh
index ad22ad7cf22..63b303d0a85 100755
--- a/egs/ami/s5b/local/prepare_parallel_train_data.sh
+++ b/egs/ami/s5b/local/prepare_parallel_train_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script creates a new data directory data/sdm1/train_cleanali or
# data/mdm8/train_cleanali which has the segment ids from (e.g.) data/sdm1/train
diff --git a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1a.sh b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
index 0c38955cc32..3b09e3dfbde 100755
--- a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
+++ b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson
# 2017 Hainan Xu
diff --git a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1b.sh b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
index eca8421b0f2..e892bb483f9 100755
--- a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
+++ b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson
# 2017 Hainan Xu
diff --git a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
index 00a6edb8125..769d1e00bc9 100755
--- a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
+++ b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson
# 2017 Hainan Xu
diff --git a/egs/ami/s5b/local/run_cleanup_segmentation.sh b/egs/ami/s5b/local/run_cleanup_segmentation.sh
index e2f0b0516ce..81d1fce9721 100755
--- a/egs/ami/s5b/local/run_cleanup_segmentation.sh
+++ b/egs/ami/s5b/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vimal Manohar
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/ami/s5b/local/score.sh b/egs/ami/s5b/local/score.sh
index 6a077c39644..9819a0f56cc 100755
--- a/egs/ami/s5b/local/score.sh
+++ b/egs/ami/s5b/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012
# Copyright University of Edinburgh (Author: Pawel Swietojanski) 2014
diff --git a/egs/ami/s5b/local/score_asclite.sh b/egs/ami/s5b/local/score_asclite.sh
index 7327f6246af..ad6243a6176 100755
--- a/egs/ami/s5b/local/score_asclite.sh
+++ b/egs/ami/s5b/local/score_asclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# 2014, University of Edinburgh, (Author: Pawel Swietojanski)
# 2015, Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/ami/s5b/run.sh b/egs/ami/s5b/run.sh
index eacc69a6845..79989f17004 100755
--- a/egs/ami/s5b/run.sh
+++ b/egs/ami/s5b/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
. ./path.sh
diff --git a/egs/an4/s5/local/download_and_untar.sh b/egs/an4/s5/local/download_and_untar.sh
index 81919284da7..ec55749768d 100755
--- a/egs/an4/s5/local/download_and_untar.sh
+++ b/egs/an4/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Allen Guo
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/an4/s5/run.sh b/egs/an4/s5/run.sh
index eef699edef7..dd9bbe03732 100755
--- a/egs/an4/s5/run.sh
+++ b/egs/an4/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Allen Guo
diff --git a/egs/apiai_decode/s5/download-model.sh b/egs/apiai_decode/s5/download-model.sh
index 11a00cb0979..4af6ad58b3d 100755
--- a/egs/apiai_decode/s5/download-model.sh
+++ b/egs/apiai_decode/s5/download-model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Downlaods Api.ai chain model into exp/api.ai-model/ (will replace one if exists)
DOWNLOAD_URL="https://github.com/api-ai/api-ai-english-asr-model/releases/download/1.0/api.ai-kaldi-asr-model.zip"
diff --git a/egs/apiai_decode/s5/local/create-corpus.sh b/egs/apiai_decode/s5/local/create-corpus.sh
index 8071aa226de..8f023d842b7 100755
--- a/egs/apiai_decode/s5/local/create-corpus.sh
+++ b/egs/apiai_decode/s5/local/create-corpus.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Checking arguments
if [ $# -le 1 ]; then
diff --git a/egs/apiai_decode/s5/recognize-wav.sh b/egs/apiai_decode/s5/recognize-wav.sh
index d76b6293642..c2049bcdb11 100755
--- a/egs/apiai_decode/s5/recognize-wav.sh
+++ b/egs/apiai_decode/s5/recognize-wav.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Api.ai (Author: Ilya Platonov)
# Apache 2.0
diff --git a/egs/aspire/s5/local/build_silprob.sh b/egs/aspire/s5/local/build_silprob.sh
index fbba50990c6..d4367f7f0ed 100755
--- a/egs/aspire/s5/local/build_silprob.sh
+++ b/egs/aspire/s5/local/build_silprob.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
diff --git a/egs/aspire/s5/local/chain/compare_wer_general.sh b/egs/aspire/s5/local/chain/compare_wer_general.sh
index 7b85dc373e0..73627bd585c 100755
--- a/egs/aspire/s5/local/chain/compare_wer_general.sh
+++ b/egs/aspire/s5/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer_general.sh exp/chain/tdnn_7b exp/chain/tdnn_lstm_1a
diff --git a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh
index bd13010c791..fce0e3ec40e 100755
--- a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh
+++ b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
diff --git a/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh b/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh
index b5979a3ce6b..0447bebcec0 100755
--- a/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh
+++ b/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh
index cd548142598..70972f7ae37 100755
--- a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh
+++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
@@ -44,7 +44,7 @@ lang=data/lang_chain
# The iVector-extraction and feature-dumping parts are the same as the standard
# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
# run those things.
-local/nnet3/run_ivector_common.sh --stage $stage --num-data-reps 3|| exit 1;
+local/nnet3/run_ivector_common.sh --stage $stage --num-data-reps ${num_data_reps} || exit 1;
if [ $stage -le 7 ]; then
# Create a version of the lang/ directory that has one state per phone in the
@@ -92,8 +92,8 @@ if [ $stage -le 9 ]; then
# combine the non-hires features for alignments/lattices
rm -rf data/${latgen_train_set}_min${min_seg_len}
- utt_prefix="THISISUNIQUESTRING_"
- spk_prefix="THISISUNIQUESTRING_"
+ utt_prefix="THISISUNIQUESTRING-"
+ spk_prefix="THISISUNIQUESTRING-"
utils/copy_data_dir.sh --spk-prefix "$spk_prefix" --utt-prefix "$utt_prefix" \
data/train data/train_temp_for_lats
utils/data/combine_short_segments.sh \
diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh
index 5b35c902354..22c7cc3a867 100755
--- a/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh
+++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index f98dff5e6fa..eefd8cbccc2 100755
--- a/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
diff --git a/egs/aspire/s5/local/extract_vad_weights.sh b/egs/aspire/s5/local/extract_vad_weights.sh
index 95e36ad12da..19f1bf037b4 100755
--- a/egs/aspire/s5/local/extract_vad_weights.sh
+++ b/egs/aspire/s5/local/extract_vad_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016. Apache 2.0.
# This script converts lattices available from a first pass decode into a per-frame weights file
diff --git a/egs/aspire/s5/local/fisher_create_test_lang.sh b/egs/aspire/s5/local/fisher_create_test_lang.sh
index 6739de822aa..dfe590adf16 100755
--- a/egs/aspire/s5/local/fisher_create_test_lang.sh
+++ b/egs/aspire/s5/local/fisher_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/aspire/s5/local/fisher_data_prep.sh b/egs/aspire/s5/local/fisher_data_prep.sh
index f3ad3c3f5bd..900ee385768 100755
--- a/egs/aspire/s5/local/fisher_data_prep.sh
+++ b/egs/aspire/s5/local/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
diff --git a/egs/aspire/s5/local/fisher_prepare_dict.sh b/egs/aspire/s5/local/fisher_prepare_dict.sh
index 577e2869c0b..c577ecf0c01 100755
--- a/egs/aspire/s5/local/fisher_prepare_dict.sh
+++ b/egs/aspire/s5/local/fisher_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# To be run from one directory above this script.
diff --git a/egs/aspire/s5/local/fisher_train_lms.sh b/egs/aspire/s5/local/fisher_train_lms.sh
index d338b82adef..bd2fddc3ac0 100755
--- a/egs/aspire/s5/local/fisher_train_lms.sh
+++ b/egs/aspire/s5/local/fisher_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
diff --git a/egs/aspire/s5/local/generate_uniformly_segmented_data_dir.sh b/egs/aspire/s5/local/generate_uniformly_segmented_data_dir.sh
index 2ceb4a4cf05..4100f500bab 100755
--- a/egs/aspire/s5/local/generate_uniformly_segmented_data_dir.sh
+++ b/egs/aspire/s5/local/generate_uniformly_segmented_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Vijayaditya Peddinti, 2016.
# Apache 2.0.
diff --git a/egs/aspire/s5/local/lattice_to_ctm.sh b/egs/aspire/s5/local/lattice_to_ctm.sh
index e5c88510ac8..aa882de5484 100755
--- a/egs/aspire/s5/local/lattice_to_ctm.sh
+++ b/egs/aspire/s5/local/lattice_to_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/aspire/s5/local/multi_condition/aspire_data_prep.sh b/egs/aspire/s5/local/multi_condition/aspire_data_prep.sh
index 6dd344463ba..b2d988cd2b3 100755
--- a/egs/aspire/s5/local/multi_condition/aspire_data_prep.sh
+++ b/egs/aspire/s5/local/multi_condition/aspire_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Johns Hopkins University (Author: Vijayaditya Peddinti)
# Apache 2.0.
set -e
diff --git a/egs/aspire/s5/local/multi_condition/check_version.sh b/egs/aspire/s5/local/multi_condition/check_version.sh
index 81c415a3d67..d432b4c3835 100755
--- a/egs/aspire/s5/local/multi_condition/check_version.sh
+++ b/egs/aspire/s5/local/multi_condition/check_version.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Script to check the tool versions necessary for the aspire recipe
function check_for_bad_sox {
diff --git a/egs/aspire/s5/local/multi_condition/copy_ali_dir.sh b/egs/aspire/s5/local/multi_condition/copy_ali_dir.sh
index 42ea2dc4b9d..9c7dc1637a3 100755
--- a/egs/aspire/s5/local/multi_condition/copy_ali_dir.sh
+++ b/egs/aspire/s5/local/multi_condition/copy_ali_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Vijayaditya Peddinti)
# Apache 2.0
diff --git a/egs/aspire/s5/local/multi_condition/decode.sh b/egs/aspire/s5/local/multi_condition/decode.sh
index b09c4780e71..538e581c169 100755
--- a/egs/aspire/s5/local/multi_condition/decode.sh
+++ b/egs/aspire/s5/local/multi_condition/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey).
# 2014 Vijayaditya Peddinti
diff --git a/egs/aspire/s5/local/multi_condition/prep_test_aspire.sh b/egs/aspire/s5/local/multi_condition/prep_test_aspire.sh
index 14cc9a9b04f..3ac2b29d780 100755
--- a/egs/aspire/s5/local/multi_condition/prep_test_aspire.sh
+++ b/egs/aspire/s5/local/multi_condition/prep_test_aspire.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2015. Apache 2.0.
# This script generates the ctm files for dev_aspire, test_aspire and eval_aspire
# for scoring with ASpIRE scoring server.
diff --git a/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh b/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh
index 8297cdee9ca..b94e8b7b344 100755
--- a/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh
+++ b/egs/aspire/s5/local/multi_condition/prepare_impulses_noises.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# set -e
# Copyright 2014 Johns Hopkins University (Author: Vijayaditya Peddinti)
diff --git a/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh b/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh
index f637c69f7c7..1e6482b8503 100755
--- a/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh
+++ b/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (Author: Vijayaditya Peddinti)
# 2015 Tom Ko
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_aalto.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_aalto.sh
index 381a809744c..4e3ff242e08 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_aalto.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_aalto.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Johns Hopkins University (author: Vijayaditya Peddinti)
# Apache 2.0
# This script downloads the Concert Hall Impulse Responses - Pori, Finland
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_air.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_air.sh
index 731c9e84317..3d57751934e 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_air.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_air.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Johns Hopkins University (author: Vijayaditya Peddinti)
# Apache 2.0
# This script downloads the Aachen impulse response database
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_c4dm.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_c4dm.sh
index be1628385f4..ff2ae6eee55 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_c4dm.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_c4dm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Johns Hopkins University (author: Vijayaditya Peddinti)
# Apache 2.0
# Room impulse responses from Center for Digital Music, Queen Mary University of London
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_mardy.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_mardy.sh
index df365c9c134..2c77a71a022 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_mardy.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_mardy.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Johns Hopkins University (author: Vijayaditya Peddinti)
# Apache 2.0
# This script downloads Multichannel Acoustic Reverberation Database at
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_openair.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_openair.sh
index a897671213b..8cdfe596dda 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_openair.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_openair.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Johns Hopkins University (author: Vijayaditya Peddinti)
# Apache 2.0
# This script downloads the impulse responses from http://www.openairlib.net/
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_rvb2014.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_rvb2014.sh
index 25617b22fdb..346bc8bf785 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_rvb2014.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_rvb2014.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Johns Hopkins University (author: Vijayaditya Peddinti)
# Apache 2.0
# This script downloads the impulse responses and noise files from the
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_rwcp.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_rwcp.sh
index f755a725fe4..c28e6f79952 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_rwcp.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_rwcp.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Johns Hopkins University (author: Vijayaditya Peddinti)
# Apache 2.0
# This script downloads the RWCP impulse responses and ambient noise
diff --git a/egs/aspire/s5/local/multi_condition/rirs/prep_varechoic.sh b/egs/aspire/s5/local/multi_condition/rirs/prep_varechoic.sh
index 6e2956e9a37..ee2ee848aa8 100755
--- a/egs/aspire/s5/local/multi_condition/rirs/prep_varechoic.sh
+++ b/egs/aspire/s5/local/multi_condition/rirs/prep_varechoic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Johns Hopkins University (author: Vijayaditya Peddinti)
# Apache 2.0
# This script downloads the impulse responses from the Varechoic room
diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
index 9345dfc92ef..f8a201eef46 100755
--- a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
+++ b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#set -e
# this script is based on local/online/run_nnet2_comman.sh
# but it operates on corrupted training/dev/test data sets
diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh
index 56b2de399f2..e9cb025bb5d 100755
--- a/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh
+++ b/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is the "multi-splice" version of the online-nnet2 training script.
# It's currently the best recipe for aspire.
diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh
index 129b1402cf4..43e5108019c 100755
--- a/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh
+++ b/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is run_nnet2_ms_disc.sh but with 4 jobs not 2 (and double the learning rate).
diff --git a/egs/aspire/s5/local/nnet3/decode.sh b/egs/aspire/s5/local/nnet3/decode.sh
index 8f965c51cf1..1eb599441fb 100755
--- a/egs/aspire/s5/local/nnet3/decode.sh
+++ b/egs/aspire/s5/local/nnet3/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016. Apache 2.0.
# This script generates the ctm files for dev_aspire, test_aspire and eval_aspire
diff --git a/egs/aspire/s5/local/nnet3/decode_online.sh b/egs/aspire/s5/local/nnet3/decode_online.sh
index 8a51e36b0a5..1e005820c59 100755
--- a/egs/aspire/s5/local/nnet3/decode_online.sh
+++ b/egs/aspire/s5/local/nnet3/decode_online.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016. Apache 2.0.
# This script does online decoding, unlike local/nnet3/decode.sh which does 2-pass decoding with
diff --git a/egs/aspire/s5/local/nnet3/run_autoencoder.sh b/egs/aspire/s5/local/nnet3/run_autoencoder.sh
index 3d16a97ab85..e1c94327cb0 100755
--- a/egs/aspire/s5/local/nnet3/run_autoencoder.sh
+++ b/egs/aspire/s5/local/nnet3/run_autoencoder.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is an example to show a "tdnn" system in raw nnet configuration
# i.e. without a transition model
diff --git a/egs/aspire/s5/local/nnet3/run_blstm.sh b/egs/aspire/s5/local/nnet3/run_blstm.sh
index 8fe53fa4db1..87dde580333 100755
--- a/egs/aspire/s5/local/nnet3/run_blstm.sh
+++ b/egs/aspire/s5/local/nnet3/run_blstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# based on egs/fisher_swbd/s5/local/nnet3/run_lstm.sh
diff --git a/egs/aspire/s5/local/nnet3/run_ivector_common.sh b/egs/aspire/s5/local/nnet3/run_ivector_common.sh
index ea226c230af..e867933477a 100755
--- a/egs/aspire/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/aspire/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#set -e
# this script is based on local/multicondition/run_nnet2_common.sh
# minor corrections were made to dir names for nnet3
diff --git a/egs/aspire/s5/local/nnet3/run_tdnn.sh b/egs/aspire/s5/local/nnet3/run_tdnn.sh
index 8e6a45ccbb4..f32c2a76038 100755
--- a/egs/aspire/s5/local/nnet3/run_tdnn.sh
+++ b/egs/aspire/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is a script to train the nnet3 TDNN acoustic model
diff --git a/egs/aspire/s5/local/nnet3/segment_and_decode.sh b/egs/aspire/s5/local/nnet3/segment_and_decode.sh
index e8917d091e2..80394ae15f3 100755
--- a/egs/aspire/s5/local/nnet3/segment_and_decode.sh
+++ b/egs/aspire/s5/local/nnet3/segment_and_decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016. Apache 2.0.
# This script generates the ctm files for dev_aspire, test_aspire and eval_aspire
diff --git a/egs/aspire/s5/local/run_asr_segmentation.sh b/egs/aspire/s5/local/run_asr_segmentation.sh
index 095e47e99de..47977597c9b 100755
--- a/egs/aspire/s5/local/run_asr_segmentation.sh
+++ b/egs/aspire/s5/local/run_asr_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Nagendra Kumar Goel
# 2017 Vimal Manohar
diff --git a/egs/aspire/s5/local/run_data_cleaning.sh b/egs/aspire/s5/local/run_data_cleaning.sh
index 68b752ad577..7154da73e27 100755
--- a/egs/aspire/s5/local/run_data_cleaning.sh
+++ b/egs/aspire/s5/local/run_data_cleaning.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script shows how you can do data-cleaning, and exclude data that has a
diff --git a/egs/aspire/s5/local/score.sh b/egs/aspire/s5/local/score.sh
index 91f8e77bc99..65e0adf0fd4 100755
--- a/egs/aspire/s5/local/score.sh
+++ b/egs/aspire/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/aspire/s5/local/score_aspire.sh b/egs/aspire/s5/local/score_aspire.sh
index 9c08a6c85d1..d9f7762cb08 100755
--- a/egs/aspire/s5/local/score_aspire.sh
+++ b/egs/aspire/s5/local/score_aspire.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016. Apache 2.0.
# This script generates the ctm files, filters and scores them if an stm file is available
diff --git a/egs/aspire/s5/local/score_stm.sh b/egs/aspire/s5/local/score_stm.sh
index 7f559f7dd79..15257491eeb 100755
--- a/egs/aspire/s5/local/score_stm.sh
+++ b/egs/aspire/s5/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_asr_sad_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
index 438cd1f1d5e..8c9d521592b 100755
--- a/egs/aspire/s5/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
+++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Nagendra Kumar Goel
# 2018 Vimal Manohar
diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index 80f9840f160..c789ff11630 100755
--- a/egs/aspire/s5/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Nagendra Kumar Goel
# 2018 Vimal Manohar
diff --git a/egs/aspire/s5/run.sh b/egs/aspire/s5/run.sh
index 851363a7532..ee7b7762383 100755
--- a/egs/aspire/s5/run.sh
+++ b/egs/aspire/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# ASpIRE submission, based on Fisher-english GMM-HMM system
# (March 2015)
diff --git a/egs/aurora4/s5/local/aurora4_data_prep.sh b/egs/aurora4/s5/local/aurora4_data_prep.sh
index 6a42c9e543c..7c669b258ce 100755
--- a/egs/aurora4/s5/local/aurora4_data_prep.sh
+++ b/egs/aurora4/s5/local/aurora4_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/aurora4/s5/local/aurora4_format_data.sh b/egs/aurora4/s5/local/aurora4_format_data.sh
index 0b94f7f796d..668fc8e6305 100755
--- a/egs/aurora4/s5/local/aurora4_format_data.sh
+++ b/egs/aurora4/s5/local/aurora4_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/aurora4/s5/local/chain/compare_wer.sh b/egs/aurora4/s5/local/chain/compare_wer.sh
index 91701cad9e9..5f47c11420f 100755
--- a/egs/aurora4/s5/local/chain/compare_wer.sh
+++ b/egs/aurora4/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/aurora4/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aurora4/s5/local/chain/tuning/run_tdnn_1a.sh
index 8bc69f9c8cf..84f6eab457d 100755
--- a/egs/aurora4/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/aurora4/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1a is same as 1h setup in WSJ
diff --git a/egs/aurora4/s5/local/cstr_wsj_data_prep.sh b/egs/aurora4/s5/local/cstr_wsj_data_prep.sh
index 35582646d95..a5c327ec34c 100755
--- a/egs/aurora4/s5/local/cstr_wsj_data_prep.sh
+++ b/egs/aurora4/s5/local/cstr_wsj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/aurora4/s5/local/cstr_wsj_extend_dict.sh b/egs/aurora4/s5/local/cstr_wsj_extend_dict.sh
index b2a9faad704..9447cd1249b 100755
--- a/egs/aurora4/s5/local/cstr_wsj_extend_dict.sh
+++ b/egs/aurora4/s5/local/cstr_wsj_extend_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script builds a larger word-list and dictionary
# than used for the LMs supplied with the WSJ corpus.
diff --git a/egs/aurora4/s5/local/generate_example_kws.sh b/egs/aurora4/s5/local/generate_example_kws.sh
index 2c849438192..ecba20efbf1 100755
--- a/egs/aurora4/s5/local/generate_example_kws.sh
+++ b/egs/aurora4/s5/local/generate_example_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/aurora4/s5/local/kws_data_prep.sh b/egs/aurora4/s5/local/kws_data_prep.sh
index 5222a88c9ef..fecfda52473 100755
--- a/egs/aurora4/s5/local/kws_data_prep.sh
+++ b/egs/aurora4/s5/local/kws_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/aurora4/s5/local/nnet/run_dnn.sh b/egs/aurora4/s5/local/nnet/run_dnn.sh
index 680a6ca31f0..5deb2c805bc 100755
--- a/egs/aurora4/s5/local/nnet/run_dnn.sh
+++ b/egs/aurora4/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2013 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/aurora4/s5/local/nnet2/run_5b.sh b/egs/aurora4/s5/local/nnet2/run_5b.sh
index 676f340ecd1..6666eee769c 100755
--- a/egs/aurora4/s5/local/nnet2/run_5b.sh
+++ b/egs/aurora4/s5/local/nnet2/run_5b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
stage=0
diff --git a/egs/aurora4/s5/local/nnet2/run_5c.sh b/egs/aurora4/s5/local/nnet2/run_5c.sh
index 27f0db36f67..10b658b6da6 100755
--- a/egs/aurora4/s5/local/nnet2/run_5c.sh
+++ b/egs/aurora4/s5/local/nnet2/run_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is neural net training on top of adapted 40-dimensional features.
#
diff --git a/egs/aurora4/s5/local/nnet3/run_ivector_common.sh b/egs/aurora4/s5/local/nnet3/run_ivector_common.sh
index a489a273c6b..e13ed59b60e 100755
--- a/egs/aurora4/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/aurora4/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/aurora4/s5/local/run_basis_fmllr.sh b/egs/aurora4/s5/local/run_basis_fmllr.sh
index 4e412535dd5..f7ee77b5506 100755
--- a/egs/aurora4/s5/local/run_basis_fmllr.sh
+++ b/egs/aurora4/s5/local/run_basis_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/aurora4/s5/local/run_mmi_tri2b.sh b/egs/aurora4/s5/local/run_mmi_tri2b.sh
index 8a4d03c59c4..22b670c144e 100755
--- a/egs/aurora4/s5/local/run_mmi_tri2b.sh
+++ b/egs/aurora4/s5/local/run_mmi_tri2b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/aurora4/s5/local/run_mmi_tri4b.sh b/egs/aurora4/s5/local/run_mmi_tri4b.sh
index db34f8e1d84..cac895720a6 100755
--- a/egs/aurora4/s5/local/run_mmi_tri4b.sh
+++ b/egs/aurora4/s5/local/run_mmi_tri4b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
steps/make_denlats.sh --nj 30 --sub-split 30 --cmd "$train_cmd" \
diff --git a/egs/aurora4/s5/local/run_nnet_cpu.sh b/egs/aurora4/s5/local/run_nnet_cpu.sh
index c72e521f18b..8693d7f7619 100755
--- a/egs/aurora4/s5/local/run_nnet_cpu.sh
+++ b/egs/aurora4/s5/local/run_nnet_cpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/aurora4/s5/local/run_raw_fmllr.sh b/egs/aurora4/s5/local/run_raw_fmllr.sh
index c4847a93f27..26989eb6982 100644
--- a/egs/aurora4/s5/local/run_raw_fmllr.sh
+++ b/egs/aurora4/s5/local/run_raw_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
steps/align_raw_fmllr.sh --nj 10 --cmd "$train_cmd" --use-graphs true \
diff --git a/egs/aurora4/s5/local/run_rnnlms_sgmm5b.sh b/egs/aurora4/s5/local/run_rnnlms_sgmm5b.sh
index 67fcee50a93..867294d2e77 100755
--- a/egs/aurora4/s5/local/run_rnnlms_sgmm5b.sh
+++ b/egs/aurora4/s5/local/run_rnnlms_sgmm5b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
for test in dev93 eval92; do
diff --git a/egs/aurora4/s5/local/run_rnnlms_tri3b.sh b/egs/aurora4/s5/local/run_rnnlms_tri3b.sh
index fac8842f960..32d5f55af91 100755
--- a/egs/aurora4/s5/local/run_rnnlms_tri3b.sh
+++ b/egs/aurora4/s5/local/run_rnnlms_tri3b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/aurora4/s5/local/run_sgmm2.sh b/egs/aurora4/s5/local/run_sgmm2.sh
index 2eb70785bcb..c129ff47f2e 100755
--- a/egs/aurora4/s5/local/run_sgmm2.sh
+++ b/egs/aurora4/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is invoked from ../run.sh
# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
diff --git a/egs/aurora4/s5/local/score.sh b/egs/aurora4/s5/local/score.sh
index abd8149a672..332f038c575 100755
--- a/egs/aurora4/s5/local/score.sh
+++ b/egs/aurora4/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/aurora4/s5/local/score_combine.sh b/egs/aurora4/s5/local/score_combine.sh
index 65caab06ecc..c4d3c13886a 100755
--- a/egs/aurora4/s5/local/score_combine.sh
+++ b/egs/aurora4/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Arnab Ghoshal
diff --git a/egs/aurora4/s5/local/score_mbr.sh b/egs/aurora4/s5/local/score_mbr.sh
index 04b84ccce5a..8c752368906 100755
--- a/egs/aurora4/s5/local/score_mbr.sh
+++ b/egs/aurora4/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Script for minimum bayes risk decoding.
diff --git a/egs/aurora4/s5/local/wsj_prepare_dict.sh b/egs/aurora4/s5/local/wsj_prepare_dict.sh
index 2bbea907873..8bde9807c03 100755
--- a/egs/aurora4/s5/local/wsj_prepare_dict.sh
+++ b/egs/aurora4/s5/local/wsj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/aurora4/s5/run.sh b/egs/aurora4/s5/run.sh
index f7eb67580ae..730194fb8b9 100755
--- a/egs/aurora4/s5/run.sh
+++ b/egs/aurora4/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
diff --git a/egs/babel/s5/local/CHECKPOINT.sh b/egs/babel/s5/local/CHECKPOINT.sh
index 91b64d7fe1a..b8bdc48a9aa 100755
--- a/egs/babel/s5/local/CHECKPOINT.sh
+++ b/egs/babel/s5/local/CHECKPOINT.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
function GETAPPROVAL {
until false ; do
diff --git a/egs/babel/s5/local/arpa2G.sh b/egs/babel/s5/local/arpa2G.sh
index f037caf0d7b..4209388f61f 100755
--- a/egs/babel/s5/local/arpa2G.sh
+++ b/egs/babel/s5/local/arpa2G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5/local/buildSRILM.sh b/egs/babel/s5/local/buildSRILM.sh
index 0633789a6a9..f113c322444 100755
--- a/egs/babel/s5/local/buildSRILM.sh
+++ b/egs/babel/s5/local/buildSRILM.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
targetDir=$1
diff --git a/egs/babel/s5/local/check_models.sh b/egs/babel/s5/local/check_models.sh
index d02fc4e561a..4389344ed85 100755
--- a/egs/babel/s5/local/check_models.sh
+++ b/egs/babel/s5/local/check_models.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
check_model () {
diff --git a/egs/babel/s5/local/check_wers.sh b/egs/babel/s5/local/check_wers.sh
index ebd6bb28790..1204fd459e7 100755
--- a/egs/babel/s5/local/check_wers.sh
+++ b/egs/babel/s5/local/check_wers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
diff --git a/egs/babel/s5/local/create_shadow_dataset.sh b/egs/babel/s5/local/create_shadow_dataset.sh
index d275b9aaca6..ce2215254e3 100755
--- a/egs/babel/s5/local/create_shadow_dataset.sh
+++ b/egs/babel/s5/local/create_shadow_dataset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University
# Apache 2.0.
diff --git a/egs/babel/s5/local/cstr_wsj_data_prep.sh b/egs/babel/s5/local/cstr_wsj_data_prep.sh
index 35582646d95..a5c327ec34c 100755
--- a/egs/babel/s5/local/cstr_wsj_data_prep.sh
+++ b/egs/babel/s5/local/cstr_wsj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/babel/s5/local/cstr_wsj_extend_dict.sh b/egs/babel/s5/local/cstr_wsj_extend_dict.sh
index b2a9faad704..9447cd1249b 100755
--- a/egs/babel/s5/local/cstr_wsj_extend_dict.sh
+++ b/egs/babel/s5/local/cstr_wsj_extend_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script builds a larger word-list and dictionary
# than used for the LMs supplied with the WSJ corpus.
diff --git a/egs/babel/s5/local/generate_example_kws.sh b/egs/babel/s5/local/generate_example_kws.sh
index 2c849438192..ecba20efbf1 100755
--- a/egs/babel/s5/local/generate_example_kws.sh
+++ b/egs/babel/s5/local/generate_example_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5/local/generate_proxy_keywords.sh b/egs/babel/s5/local/generate_proxy_keywords.sh
index fca2326b278..67111a75165 100755
--- a/egs/babel/s5/local/generate_proxy_keywords.sh
+++ b/egs/babel/s5/local/generate_proxy_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5/local/get_syllable_text.sh b/egs/babel/s5/local/get_syllable_text.sh
index 97d2af7ed65..b98868c8b16 100755
--- a/egs/babel/s5/local/get_syllable_text.sh
+++ b/egs/babel/s5/local/get_syllable_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University 2013 (author: Daniel Povey)
# Apache 2.0.
diff --git a/egs/babel/s5/local/kws_data_prep.sh b/egs/babel/s5/local/kws_data_prep.sh
index 909e9b2596c..8b52ccc4d20 100755
--- a/egs/babel/s5/local/kws_data_prep.sh
+++ b/egs/babel/s5/local/kws_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5/local/kws_data_prep_syllables.sh b/egs/babel/s5/local/kws_data_prep_syllables.sh
index c6245e52c9e..221647e8d60 100755
--- a/egs/babel/s5/local/kws_data_prep_syllables.sh
+++ b/egs/babel/s5/local/kws_data_prep_syllables.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5/local/kws_gen_oracle_lattices.sh b/egs/babel/s5/local/kws_gen_oracle_lattices.sh
index aa9e22cca96..87a5b5df435 100755
--- a/egs/babel/s5/local/kws_gen_oracle_lattices.sh
+++ b/egs/babel/s5/local/kws_gen_oracle_lattices.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5/local/kws_oracle.sh b/egs/babel/s5/local/kws_oracle.sh
index 44334ba1413..2ba7b070546 100755
--- a/egs/babel/s5/local/kws_oracle.sh
+++ b/egs/babel/s5/local/kws_oracle.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal)
# 2013 Johns Hopkins University
diff --git a/egs/babel/s5/local/kws_score_f4de.sh b/egs/babel/s5/local/kws_score_f4de.sh
index 5501bbf84fe..fd194851f96 100755
--- a/egs/babel/s5/local/kws_score_f4de.sh
+++ b/egs/babel/s5/local/kws_score_f4de.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5/local/kws_search.sh b/egs/babel/s5/local/kws_search.sh
index 77fd983ebc1..2dc217fc1cd 100755
--- a/egs/babel/s5/local/kws_search.sh
+++ b/egs/babel/s5/local/kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
@@ -144,7 +144,7 @@ if [ $stage -le 2 ]; then
echo "Writing normalized results"
$cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/write_normalized.LMWT.log \
set -e ';' set -o pipefail ';'\
- cat ${kwsoutdir}_LMWT/result.* \| \
+ gunzip -c ${kwsoutdir}_LMWT/result.* \| \
utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$duration \
--segments=$datadir/segments --normalize=true --duptime=$duptime --remove-dup=true\
--map-utter=$kwsdatadir/utter_map --digits=3 \
@@ -155,7 +155,7 @@ if [ $stage -le 3 ]; then
echo "Writing unnormalized results"
$cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/write_unnormalized.LMWT.log \
set -e ';' set -o pipefail ';'\
- cat ${kwsoutdir}_LMWT/result.* \| \
+ gunzip -c ${kwsoutdir}_LMWT/result.* \| \
utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$duration \
--segments=$datadir/segments --normalize=false --duptime=$duptime --remove-dup=true\
--map-utter=$kwsdatadir/utter_map \
diff --git a/egs/babel/s5/local/kws_setup.sh b/egs/babel/s5/local/kws_setup.sh
index f49267c233c..c33bb6350ae 100755
--- a/egs/babel/s5/local/kws_setup.sh
+++ b/egs/babel/s5/local/kws_setup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5/local/lattice_to_ctm.sh b/egs/babel/s5/local/lattice_to_ctm.sh
index 9bf1b3ca882..a3f0024e8bc 100755
--- a/egs/babel/s5/local/lattice_to_ctm.sh
+++ b/egs/babel/s5/local/lattice_to_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/babel/s5/local/lattice_to_ctm_syllable.sh b/egs/babel/s5/local/lattice_to_ctm_syllable.sh
index 7165a7a04e5..4a51ef50397 100755
--- a/egs/babel/s5/local/lattice_to_ctm_syllable.sh
+++ b/egs/babel/s5/local/lattice_to_ctm_syllable.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/babel/s5/local/make_ecf_subset.sh b/egs/babel/s5/local/make_ecf_subset.sh
index be81cc21a5d..af8aac116bc 100755
--- a/egs/babel/s5/local/make_ecf_subset.sh
+++ b/egs/babel/s5/local/make_ecf_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5/local/make_ffv.sh b/egs/babel/s5/local/make_ffv.sh
index 3820e4cb659..8036d606c9b 100755
--- a/egs/babel/s5/local/make_ffv.sh
+++ b/egs/babel/s5/local/make_ffv.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
# Bagher BabaAli
@@ -151,7 +151,7 @@ for ((n=1; n<=nj; n++)); do
done
cat <<'EOF' > $ffv_script
-#!/bin/bash
+#!/usr/bin/env bash
# script for execution of ffv
flen=0.01
sfreq=8000
@@ -189,7 +189,7 @@ fi
# script file in the experimental directory. Quotes around 'EOF' disable any
# interpretation in the here-doc.
cat <<'EOF' > $expdir/convert.sh
-#!/bin/bash
+#!/usr/bin/env bash
ffv_flist=$1
scpfile=$2
[ $# -ne 2 ] && echo "Usage: convert.sh " && exit 1;
diff --git a/egs/babel/s5/local/make_lexicon_subset.sh b/egs/babel/s5/local/make_lexicon_subset.sh
index 62464f3c461..82ce6a8af78 100755
--- a/egs/babel/s5/local/make_lexicon_subset.sh
+++ b/egs/babel/s5/local/make_lexicon_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
echo "$0 $@" # Print the command line for logging
diff --git a/egs/babel/s5/local/make_pitch.sh b/egs/babel/s5/local/make_pitch.sh
index f3597f504dd..0758e414e24 100755
--- a/egs/babel/s5/local/make_pitch.sh
+++ b/egs/babel/s5/local/make_pitch.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
# Bagher BabaAli
@@ -158,7 +158,7 @@ fi
# script file in the experimental directory. Quotes around 'EOF' disable any
# interpretation in the here-doc.
cat <<'EOF' > $expdir/convert.sh
-#!/bin/bash
+#!/usr/bin/env bash
sacc_flist=$1
scpfile=$2
[ $# -ne 2 ] && echo "Usage: convert.sh " && exit 1;
diff --git a/egs/babel/s5/local/make_syllable_lexicon.sh b/egs/babel/s5/local/make_syllable_lexicon.sh
index 118845982b9..e50e72929f1 100755
--- a/egs/babel/s5/local/make_syllable_lexicon.sh
+++ b/egs/babel/s5/local/make_syllable_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
help="Usage: $(basename $0)
diff --git a/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh b/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh
index 5ef283af54e..c35ae50c102 100755
--- a/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh
+++ b/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script was copied from ../10hSystem/local (Author: Guoguo Chen?)
# It will be modified to make it somewhat more reusable
diff --git a/egs/babel/s5/local/score_combine.sh b/egs/babel/s5/local/score_combine.sh
index 42d9cfc0918..d77879541d4 100755
--- a/egs/babel/s5/local/score_combine.sh
+++ b/egs/babel/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2013 Arnab Ghoshal
# Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur)
diff --git a/egs/babel/s5/local/score_map.sh b/egs/babel/s5/local/score_map.sh
index 94c31acc348..c44cc19c94f 100755
--- a/egs/babel/s5/local/score_map.sh
+++ b/egs/babel/s5/local/score_map.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/babel/s5/local/score_mbr.sh b/egs/babel/s5/local/score_mbr.sh
index 1c39830b4c7..b99568668a1 100755
--- a/egs/babel/s5/local/score_mbr.sh
+++ b/egs/babel/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Script for minimum bayes risk decoding.
diff --git a/egs/babel/s5/local/score_sctk.sh b/egs/babel/s5/local/score_sctk.sh
index cef470421a0..b210c8c5ec5 100755
--- a/egs/babel/s5/local/score_sctk.sh
+++ b/egs/babel/s5/local/score_sctk.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013. Apache 2.0.
# begin configuration section.
diff --git a/egs/babel/s5/local/score_sctk_prune.sh b/egs/babel/s5/local/score_sctk_prune.sh
index a6eca9fd071..5ad8e175fd9 100755
--- a/egs/babel/s5/local/score_sctk_prune.sh
+++ b/egs/babel/s5/local/score_sctk_prune.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013. Apache 2.0.
# begin configuration section.
diff --git a/egs/babel/s5/local/score_stm.sh b/egs/babel/s5/local/score_stm.sh
index 6a43c718d3b..cecfa3f8d2b 100755
--- a/egs/babel/s5/local/score_stm.sh
+++ b/egs/babel/s5/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5/local/shadow_set_kws_search.sh b/egs/babel/s5/local/shadow_set_kws_search.sh
index 733a84d4acf..6d4498fb5d9 100755
--- a/egs/babel/s5/local/shadow_set_kws_search.sh
+++ b/egs/babel/s5/local/shadow_set_kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5/local/train_lms_srilm.sh b/egs/babel/s5/local/train_lms_srilm.sh
index e35a4322364..814b23cd175 100755
--- a/egs/babel/s5/local/train_lms_srilm.sh
+++ b/egs/babel/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
export LC_ALL=C
words_file=
diff --git a/egs/babel/s5/local/train_mmi_sgmm2.sh b/egs/babel/s5/local/train_mmi_sgmm2.sh
index 2d3d0b5bf49..e8cc4132f5c 100755
--- a/egs/babel/s5/local/train_mmi_sgmm2.sh
+++ b/egs/babel/s5/local/train_mmi_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# MMI training (or optionally boosted MMI, if you give the --boost option),
diff --git a/egs/babel/s5/make_release.sh b/egs/babel/s5/make_release.sh
index 56fdc068442..7dbc0f275a6 100755
--- a/egs/babel/s5/make_release.sh
+++ b/egs/babel/s5/make_release.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
lp=
lr=
diff --git a/egs/babel/s5/run-1-main.sh b/egs/babel/s5/run-1-main.sh
index a156661c1f3..3ed870a5977 100755
--- a/egs/babel/s5/run-1-main.sh
+++ b/egs/babel/s5/run-1-main.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first.
tri5_only=false
diff --git a/egs/babel/s5/run-2a-nnet.sh b/egs/babel/s5/run-2a-nnet.sh
index 00a3b44fe2e..54d6c343b2b 100755
--- a/egs/babel/s5/run-2a-nnet.sh
+++ b/egs/babel/s5/run-2a-nnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. conf/common_vars.sh
diff --git a/egs/babel/s5/run-2b-bnf.sh b/egs/babel/s5/run-2b-bnf.sh
index 1176834ce70..b3de0b014d1 100755
--- a/egs/babel/s5/run-2b-bnf.sh
+++ b/egs/babel/s5/run-2b-bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is the "final" version of the script that runs trains the bottleneck system.
# It is to be run after run.sh (the new version, that uses the same number of phases
diff --git a/egs/babel/s5/run-6-combine.sh b/egs/babel/s5/run-6-combine.sh
index 92d749ca486..2cbec7a9816 100755
--- a/egs/babel/s5/run-6-combine.sh
+++ b/egs/babel/s5/run-6-combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. conf/common_vars.sh
diff --git a/egs/babel/s5/steps_BNF/build_nnet_pfile.sh b/egs/babel/s5/steps_BNF/build_nnet_pfile.sh
index 20e9754bea7..99b373d346a 100755
--- a/egs/babel/s5/steps_BNF/build_nnet_pfile.sh
+++ b/egs/babel/s5/steps_BNF/build_nnet_pfile.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Carnegie Mellon University (Author: Yajie Miao)
# Apache 2.0
diff --git a/egs/babel/s5/steps_BNF/make_bnf_feat.sh b/egs/babel/s5/steps_BNF/make_bnf_feat.sh
index 52f49475076..691d53d2c2b 100755
--- a/egs/babel/s5/steps_BNF/make_bnf_feat.sh
+++ b/egs/babel/s5/steps_BNF/make_bnf_feat.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Carnegie Mellon University (Author: Yajie Miao)
# Apache 2.0
diff --git a/egs/babel/s5/steps_BNF/make_denlats_sgmm2.sh b/egs/babel/s5/steps_BNF/make_denlats_sgmm2.sh
index c3d92561089..7402e8499e7 100755
--- a/egs/babel/s5/steps_BNF/make_denlats_sgmm2.sh
+++ b/egs/babel/s5/steps_BNF/make_denlats_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Carnegie Mellon University (Author: Yajie Miao)
diff --git a/egs/babel/s5b/local/CHECKPOINT.sh b/egs/babel/s5b/local/CHECKPOINT.sh
index 91b64d7fe1a..b8bdc48a9aa 100755
--- a/egs/babel/s5b/local/CHECKPOINT.sh
+++ b/egs/babel/s5b/local/CHECKPOINT.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
function GETAPPROVAL {
until false ; do
diff --git a/egs/babel/s5b/local/apply_g2p.sh b/egs/babel/s5b/local/apply_g2p.sh
index f47274cb21c..2342b081893 100755
--- a/egs/babel/s5b/local/apply_g2p.sh
+++ b/egs/babel/s5b/local/apply_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/babel/s5b/local/arpa2G.sh b/egs/babel/s5b/local/arpa2G.sh
index db816abc7a5..7d427314dd6 100755
--- a/egs/babel/s5b/local/arpa2G.sh
+++ b/egs/babel/s5b/local/arpa2G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013-2014 Johns Hopkins University (authors: Yenda Trmal, Daniel Povey)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5b/local/best_path_weights.sh b/egs/babel/s5b/local/best_path_weights.sh
index 8e88a3610a4..4ec5dc47a67 100755
--- a/egs/babel/s5b/local/best_path_weights.sh
+++ b/egs/babel/s5b/local/best_path_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar
diff --git a/egs/babel/s5b/local/buildSRILM.sh b/egs/babel/s5b/local/buildSRILM.sh
index 0633789a6a9..f113c322444 100755
--- a/egs/babel/s5b/local/buildSRILM.sh
+++ b/egs/babel/s5b/local/buildSRILM.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
targetDir=$1
diff --git a/egs/babel/s5b/local/check_models.sh b/egs/babel/s5b/local/check_models.sh
index d02fc4e561a..4389344ed85 100755
--- a/egs/babel/s5b/local/check_models.sh
+++ b/egs/babel/s5b/local/check_models.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
check_model () {
diff --git a/egs/babel/s5b/local/check_wers.sh b/egs/babel/s5b/local/check_wers.sh
index ebd6bb28790..1204fd459e7 100755
--- a/egs/babel/s5b/local/check_wers.sh
+++ b/egs/babel/s5b/local/check_wers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
diff --git a/egs/babel/s5b/local/create_shadow_dataset.sh b/egs/babel/s5b/local/create_shadow_dataset.sh
index 6783ee49770..2b65392b096 100755
--- a/egs/babel/s5b/local/create_shadow_dataset.sh
+++ b/egs/babel/s5b/local/create_shadow_dataset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University
# Apache 2.0.
diff --git a/egs/babel/s5b/local/extend_lexicon.sh b/egs/babel/s5b/local/extend_lexicon.sh
index 2250d4f5dcf..3d2724609a0 100755
--- a/egs/babel/s5b/local/extend_lexicon.sh
+++ b/egs/babel/s5b/local/extend_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (authors: Daniel Povey, Yenda Trmal)
# 2014 Guoguo Chen
diff --git a/egs/babel/s5b/local/generate_confusion_matrix.sh b/egs/babel/s5b/local/generate_confusion_matrix.sh
index 6529057db9e..b52a6a2b271 100755
--- a/egs/babel/s5b/local/generate_confusion_matrix.sh
+++ b/egs/babel/s5b/local/generate_confusion_matrix.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/babel/s5b/local/generate_example_kws.sh b/egs/babel/s5b/local/generate_example_kws.sh
index 2c849438192..ecba20efbf1 100755
--- a/egs/babel/s5b/local/generate_example_kws.sh
+++ b/egs/babel/s5b/local/generate_example_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/generate_proxy_keywords.sh b/egs/babel/s5b/local/generate_proxy_keywords.sh
index 8562953efa4..324044a604a 100755
--- a/egs/babel/s5b/local/generate_proxy_keywords.sh
+++ b/egs/babel/s5b/local/generate_proxy_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Guoguo Chen
# Apache 2.0.
diff --git a/egs/babel/s5b/local/get_syllable_text.sh b/egs/babel/s5b/local/get_syllable_text.sh
index 97d2af7ed65..b98868c8b16 100755
--- a/egs/babel/s5b/local/get_syllable_text.sh
+++ b/egs/babel/s5b/local/get_syllable_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University 2013 (author: Daniel Povey)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_combine.sh b/egs/babel/s5b/local/kws_combine.sh
index 32ec93fa49e..7f55f798aaa 100755
--- a/egs/babel/s5b/local/kws_combine.sh
+++ b/egs/babel/s5b/local/kws_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey)
diff --git a/egs/babel/s5b/local/kws_data_prep.sh b/egs/babel/s5b/local/kws_data_prep.sh
index 909e9b2596c..8b52ccc4d20 100755
--- a/egs/babel/s5b/local/kws_data_prep.sh
+++ b/egs/babel/s5b/local/kws_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_data_prep_proxy.sh b/egs/babel/s5b/local/kws_data_prep_proxy.sh
index 787cb009960..5a640dcc890 100755
--- a/egs/babel/s5b/local/kws_data_prep_proxy.sh
+++ b/egs/babel/s5b/local/kws_data_prep_proxy.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Guoguo Chen
# Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_data_prep_syllables.sh b/egs/babel/s5b/local/kws_data_prep_syllables.sh
index c6245e52c9e..221647e8d60 100755
--- a/egs/babel/s5b/local/kws_data_prep_syllables.sh
+++ b/egs/babel/s5b/local/kws_data_prep_syllables.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_gen_oracle_lattices.sh b/egs/babel/s5b/local/kws_gen_oracle_lattices.sh
index aa9e22cca96..87a5b5df435 100755
--- a/egs/babel/s5b/local/kws_gen_oracle_lattices.sh
+++ b/egs/babel/s5b/local/kws_gen_oracle_lattices.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_oracle.sh b/egs/babel/s5b/local/kws_oracle.sh
index 44334ba1413..2ba7b070546 100755
--- a/egs/babel/s5b/local/kws_oracle.sh
+++ b/egs/babel/s5b/local/kws_oracle.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal)
# 2013 Johns Hopkins University
diff --git a/egs/babel/s5b/local/kws_score_f4de.sh b/egs/babel/s5b/local/kws_score_f4de.sh
index d761e080c1c..ffb77e336b0 100755
--- a/egs/babel/s5b/local/kws_score_f4de.sh
+++ b/egs/babel/s5b/local/kws_score_f4de.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_search.sh b/egs/babel/s5b/local/kws_search.sh
index 4b275048e0e..359cd5c74d0 100755
--- a/egs/babel/s5b/local/kws_search.sh
+++ b/egs/babel/s5b/local/kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/kws_setup.sh b/egs/babel/s5b/local/kws_setup.sh
index f1036f100de..dea9d605e21 100755
--- a/egs/babel/s5b/local/kws_setup.sh
+++ b/egs/babel/s5b/local/kws_setup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/lattice_to_ctm.sh b/egs/babel/s5b/local/lattice_to_ctm.sh
index 08a1b5889a7..5169b275195 100755
--- a/egs/babel/s5b/local/lattice_to_ctm.sh
+++ b/egs/babel/s5b/local/lattice_to_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/babel/s5b/local/lattice_to_ctm_syllable.sh b/egs/babel/s5b/local/lattice_to_ctm_syllable.sh
index 7165a7a04e5..4a51ef50397 100755
--- a/egs/babel/s5b/local/lattice_to_ctm_syllable.sh
+++ b/egs/babel/s5b/local/lattice_to_ctm_syllable.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/babel/s5b/local/make_corpus_subset.sh b/egs/babel/s5b/local/make_corpus_subset.sh
index add194d48e8..acd5e91a18b 100755
--- a/egs/babel/s5b/local/make_corpus_subset.sh
+++ b/egs/babel/s5b/local/make_corpus_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/make_ecf_subset.sh b/egs/babel/s5b/local/make_ecf_subset.sh
index 53bddcbc839..bc776d8446e 100755
--- a/egs/babel/s5b/local/make_ecf_subset.sh
+++ b/egs/babel/s5b/local/make_ecf_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/make_lexicon_subset.sh b/egs/babel/s5b/local/make_lexicon_subset.sh
index 924a22866e7..c66e1164ae7 100755
--- a/egs/babel/s5b/local/make_lexicon_subset.sh
+++ b/egs/babel/s5b/local/make_lexicon_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
echo "$0 $@" # Print the command line for logging
diff --git a/egs/babel/s5b/local/make_syllable_lexicon.sh b/egs/babel/s5b/local/make_syllable_lexicon.sh
index 118845982b9..e50e72929f1 100755
--- a/egs/babel/s5b/local/make_syllable_lexicon.sh
+++ b/egs/babel/s5b/local/make_syllable_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
help="Usage: $(basename $0)
diff --git a/egs/babel/s5b/local/nist_eval/create_compound_set.sh b/egs/babel/s5b/local/nist_eval/create_compound_set.sh
index 3b35ad11e29..5582536b093 100755
--- a/egs/babel/s5b/local/nist_eval/create_compound_set.sh
+++ b/egs/babel/s5b/local/nist_eval/create_compound_set.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#Simple script to create compound set info that will allow for more automatized
#work with the shadow set.
diff --git a/egs/babel/s5b/local/nist_eval/export_systems.sh b/egs/babel/s5b/local/nist_eval/export_systems.sh
index 7e514bcc077..7ba72662f6b 100755
--- a/egs/babel/s5b/local/nist_eval/export_systems.sh
+++ b/egs/babel/s5b/local/nist_eval/export_systems.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
set -o pipefail
diff --git a/egs/babel/s5b/local/nist_eval/make_release.sh b/egs/babel/s5b/local/nist_eval/make_release.sh
index bb74188b4d9..e32ad82d341 100755
--- a/egs/babel/s5b/local/nist_eval/make_release.sh
+++ b/egs/babel/s5b/local/nist_eval/make_release.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
team=RADICAL
corpusid=
diff --git a/egs/babel/s5b/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5b/local/nnet2/get_egs_semi_supervised.sh
index 760d7ee80d5..7b16ebf8e3b 100755
--- a/egs/babel/s5b/local/nnet2/get_egs_semi_supervised.sh
+++ b/egs/babel/s5b/local/nnet2/get_egs_semi_supervised.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Copyright 2014 Vimal Manohar
diff --git a/egs/babel/s5b/local/prepare_kaldi_lm_from_training_text.sh b/egs/babel/s5b/local/prepare_kaldi_lm_from_training_text.sh
index 6566860a5d6..9d48391cc38 100755
--- a/egs/babel/s5b/local/prepare_kaldi_lm_from_training_text.sh
+++ b/egs/babel/s5b/local/prepare_kaldi_lm_from_training_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script was copied from ../10hSystem/local (Author: Guoguo Chen?)
# It will be modified to make it somewhat more reusable
diff --git a/egs/babel/s5b/local/resegment/generate_segments.sh b/egs/babel/s5b/local/resegment/generate_segments.sh
index 01917c3d4e9..1ca8bd9aa06 100755
--- a/egs/babel/s5b/local/resegment/generate_segments.sh
+++ b/egs/babel/s5b/local/resegment/generate_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
# Apache 2.0
diff --git a/egs/babel/s5b/local/resegment/train_segmentation.sh b/egs/babel/s5b/local/resegment/train_segmentation.sh
index 511c451993e..264a1cc3c88 100755
--- a/egs/babel/s5b/local/resegment/train_segmentation.sh
+++ b/egs/babel/s5b/local/resegment/train_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
# Apache 2.0
diff --git a/egs/babel/s5b/local/score_combine.sh b/egs/babel/s5b/local/score_combine.sh
index f425b5afc68..b6c3b613509 100755
--- a/egs/babel/s5b/local/score_combine.sh
+++ b/egs/babel/s5b/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2013 Arnab Ghoshal
# Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur)
diff --git a/egs/babel/s5b/local/score_map.sh b/egs/babel/s5b/local/score_map.sh
index 94c31acc348..c44cc19c94f 100755
--- a/egs/babel/s5b/local/score_map.sh
+++ b/egs/babel/s5b/local/score_map.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/babel/s5b/local/score_mbr.sh b/egs/babel/s5b/local/score_mbr.sh
index 1c39830b4c7..b99568668a1 100755
--- a/egs/babel/s5b/local/score_mbr.sh
+++ b/egs/babel/s5b/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Script for minimum bayes risk decoding.
diff --git a/egs/babel/s5b/local/score_sctk_prune.sh b/egs/babel/s5b/local/score_sctk_prune.sh
index a6eca9fd071..5ad8e175fd9 100755
--- a/egs/babel/s5b/local/score_sctk_prune.sh
+++ b/egs/babel/s5b/local/score_sctk_prune.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013. Apache 2.0.
# begin configuration section.
diff --git a/egs/babel/s5b/local/score_stm.sh b/egs/babel/s5b/local/score_stm.sh
index 2406af4e726..40c082f050f 100755
--- a/egs/babel/s5b/local/score_stm.sh
+++ b/egs/babel/s5b/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5b/local/shadow_set_kws_search.sh b/egs/babel/s5b/local/shadow_set_kws_search.sh
index 76521fda9b6..d8b62418cdf 100755
--- a/egs/babel/s5b/local/shadow_set_kws_search.sh
+++ b/egs/babel/s5b/local/shadow_set_kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5b/local/show_lattice.sh b/egs/babel/s5b/local/show_lattice.sh
index 0865d0d1225..df13d3091f3 100755
--- a/egs/babel/s5b/local/show_lattice.sh
+++ b/egs/babel/s5b/local/show_lattice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./path.sh
diff --git a/egs/babel/s5b/local/train_g2p.sh b/egs/babel/s5b/local/train_g2p.sh
index 385c474abad..d26617025b8 100755
--- a/egs/babel/s5b/local/train_g2p.sh
+++ b/egs/babel/s5b/local/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/babel/s5b/local/train_lms_srilm.sh b/egs/babel/s5b/local/train_lms_srilm.sh
index 5bb1bfaa760..f9f13bb344b 100755
--- a/egs/babel/s5b/local/train_lms_srilm.sh
+++ b/egs/babel/s5b/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
export LC_ALL=C
words_file=
diff --git a/egs/babel/s5b/local/train_mmi_sgmm2.sh b/egs/babel/s5b/local/train_mmi_sgmm2.sh
index 2d3d0b5bf49..e8cc4132f5c 100755
--- a/egs/babel/s5b/local/train_mmi_sgmm2.sh
+++ b/egs/babel/s5b/local/train_mmi_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# MMI training (or optionally boosted MMI, if you give the --boost option),
diff --git a/egs/babel/s5b/run-1-main-extend-lex.sh b/egs/babel/s5b/run-1-main-extend-lex.sh
index ccc62441c1f..3bb077490c9 100755
--- a/egs/babel/s5b/run-1-main-extend-lex.sh
+++ b/egs/babel/s5b/run-1-main-extend-lex.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Parameters for extended lexicon.
extend_lexicon=true
diff --git a/egs/babel/s5b/run-1-main.sh b/egs/babel/s5b/run-1-main.sh
index 6f2e8d444be..dc4983c032e 100755
--- a/egs/babel/s5b/run-1-main.sh
+++ b/egs/babel/s5b/run-1-main.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first.
tri5_only=false
diff --git a/egs/babel/s5b/run-2-segmentation.sh b/egs/babel/s5b/run-2-segmentation.sh
index d832a9421c8..f7651c2ae91 100755
--- a/egs/babel/s5b/run-2-segmentation.sh
+++ b/egs/babel/s5b/run-2-segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
# Apache 2.0
diff --git a/egs/babel/s5b/run-2a-nnet-cpu.sh b/egs/babel/s5b/run-2a-nnet-cpu.sh
index 35e7d3ceab3..0f246d9e1aa 100755
--- a/egs/babel/s5b/run-2a-nnet-cpu.sh
+++ b/egs/babel/s5b/run-2a-nnet-cpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. conf/common_vars.sh
. ./lang.conf
diff --git a/egs/babel/s5b/run-2a-nnet-ensemble-gpu.sh b/egs/babel/s5b/run-2a-nnet-ensemble-gpu.sh
index 06c9a330295..953ee4baef5 100755
--- a/egs/babel/s5b/run-2a-nnet-ensemble-gpu.sh
+++ b/egs/babel/s5b/run-2a-nnet-ensemble-gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. conf/common_vars.sh
. ./lang.conf
diff --git a/egs/babel/s5b/run-2a-nnet-gpu-realign.sh b/egs/babel/s5b/run-2a-nnet-gpu-realign.sh
index 4652789fb2d..963e0c4a422 100755
--- a/egs/babel/s5b/run-2a-nnet-gpu-realign.sh
+++ b/egs/babel/s5b/run-2a-nnet-gpu-realign.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
dir=exp/tri6_nnet
train_stage=-10
realign_epochs="6 10"
diff --git a/egs/babel/s5b/run-2a-nnet-gpu.sh b/egs/babel/s5b/run-2a-nnet-gpu.sh
index 87faa58f733..76dd415667b 100755
--- a/egs/babel/s5b/run-2a-nnet-gpu.sh
+++ b/egs/babel/s5b/run-2a-nnet-gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
dir=exp/tri6_nnet
train_stage=-10
diff --git a/egs/babel/s5b/run-2a-nnet-mpe.sh b/egs/babel/s5b/run-2a-nnet-mpe.sh
index 457d85e4a48..49f1b11a493 100755
--- a/egs/babel/s5b/run-2a-nnet-mpe.sh
+++ b/egs/babel/s5b/run-2a-nnet-mpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. conf/common_vars.sh
. ./lang.conf
diff --git a/egs/babel/s5b/run-2b-bnf.sh b/egs/babel/s5b/run-2b-bnf.sh
index 505e647cf93..a30a73ebdb8 100755
--- a/egs/babel/s5b/run-2b-bnf.sh
+++ b/egs/babel/s5b/run-2b-bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Pegah Ghahremani
# Apache 2.0
diff --git a/egs/babel/s5b/run-3b-bnf-nnet.sh b/egs/babel/s5b/run-3b-bnf-nnet.sh
index 169eec6f62f..fff73266305 100755
--- a/egs/babel/s5b/run-3b-bnf-nnet.sh
+++ b/egs/babel/s5b/run-3b-bnf-nnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Pegah Ghahremani
# 2014 Johns Hopkins (Yenda Trmal)
diff --git a/egs/babel/s5b/run-3b-bnf-sgmm.sh b/egs/babel/s5b/run-3b-bnf-sgmm.sh
index 81e4ae538b4..96fcd396893 100755
--- a/egs/babel/s5b/run-3b-bnf-sgmm.sh
+++ b/egs/babel/s5b/run-3b-bnf-sgmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Pegah Ghahremani
# 2014 Johns Hopkins (Yenda Trmal)
diff --git a/egs/babel/s5b/run-6-combine.sh b/egs/babel/s5b/run-6-combine.sh
index 7998ade9cfd..9ee8b6274f8 100755
--- a/egs/babel/s5b/run-6-combine.sh
+++ b/egs/babel/s5b/run-6-combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
source_sys=shadow.seg
master_sys=dev10h.seg
diff --git a/egs/babel/s5c/local/CHECKPOINT.sh b/egs/babel/s5c/local/CHECKPOINT.sh
index ed0ddd18399..b7897a67ad6 100755
--- a/egs/babel/s5c/local/CHECKPOINT.sh
+++ b/egs/babel/s5c/local/CHECKPOINT.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
function GETAPPROVAL {
until false ; do
diff --git a/egs/babel/s5c/local/ali_to_rttm.sh b/egs/babel/s5c/local/ali_to_rttm.sh
index ef11f516ea3..96cf92b8dc8 100755
--- a/egs/babel/s5c/local/ali_to_rttm.sh
+++ b/egs/babel/s5c/local/ali_to_rttm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2013 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5c/local/apply_g2p.sh b/egs/babel/s5c/local/apply_g2p.sh
index 385b1f3536e..8807a93b762 100755
--- a/egs/babel/s5c/local/apply_g2p.sh
+++ b/egs/babel/s5c/local/apply_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/babel/s5c/local/arpa2G.sh b/egs/babel/s5c/local/arpa2G.sh
index db816abc7a5..7d427314dd6 100755
--- a/egs/babel/s5c/local/arpa2G.sh
+++ b/egs/babel/s5c/local/arpa2G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013-2014 Johns Hopkins University (authors: Yenda Trmal, Daniel Povey)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/best_path_weights.sh b/egs/babel/s5c/local/best_path_weights.sh
index 52782ee3655..5a308a13f85 100755
--- a/egs/babel/s5c/local/best_path_weights.sh
+++ b/egs/babel/s5c/local/best_path_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar
diff --git a/egs/babel/s5c/local/buildSRILM.sh b/egs/babel/s5c/local/buildSRILM.sh
index 0633789a6a9..f113c322444 100755
--- a/egs/babel/s5c/local/buildSRILM.sh
+++ b/egs/babel/s5c/local/buildSRILM.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
targetDir=$1
diff --git a/egs/babel/s5c/local/check_models.sh b/egs/babel/s5c/local/check_models.sh
index 88b3dacc94b..a8ed6247bd4 100755
--- a/egs/babel/s5c/local/check_models.sh
+++ b/egs/babel/s5c/local/check_models.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
check_model () {
diff --git a/egs/babel/s5c/local/check_wers.sh b/egs/babel/s5c/local/check_wers.sh
index 10e1a89ee3a..f3d2483213b 100755
--- a/egs/babel/s5c/local/check_wers.sh
+++ b/egs/babel/s5c/local/check_wers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
diff --git a/egs/babel/s5c/local/create_shadow_dataset.sh b/egs/babel/s5c/local/create_shadow_dataset.sh
index 49467ed28c1..7e1c675e539 100755
--- a/egs/babel/s5c/local/create_shadow_dataset.sh
+++ b/egs/babel/s5c/local/create_shadow_dataset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University
# Apache 2.0.
diff --git a/egs/babel/s5c/local/extend_lexicon.sh b/egs/babel/s5c/local/extend_lexicon.sh
index 74a0c6a5569..58f1ebc145e 100755
--- a/egs/babel/s5c/local/extend_lexicon.sh
+++ b/egs/babel/s5c/local/extend_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (authors: Daniel Povey, Yenda Trmal)
# 2014 Guoguo Chen
diff --git a/egs/babel/s5c/local/generate_confusion_matrix.sh b/egs/babel/s5c/local/generate_confusion_matrix.sh
index 6296d7486ce..7a3b059efba 100755
--- a/egs/babel/s5c/local/generate_confusion_matrix.sh
+++ b/egs/babel/s5c/local/generate_confusion_matrix.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/babel/s5c/local/generate_example_kws.sh b/egs/babel/s5c/local/generate_example_kws.sh
index e90752926b3..fca383c64af 100755
--- a/egs/babel/s5c/local/generate_example_kws.sh
+++ b/egs/babel/s5c/local/generate_example_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5c/local/generate_proxy_keywords.sh b/egs/babel/s5c/local/generate_proxy_keywords.sh
index 584f7d7902e..67ec24b931f 100755
--- a/egs/babel/s5c/local/generate_proxy_keywords.sh
+++ b/egs/babel/s5c/local/generate_proxy_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Guoguo Chen
# Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_combine.sh b/egs/babel/s5c/local/kws_combine.sh
index f795c63aad9..35d42599b79 100755
--- a/egs/babel/s5c/local/kws_combine.sh
+++ b/egs/babel/s5c/local/kws_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey)
diff --git a/egs/babel/s5c/local/kws_data_prep.sh b/egs/babel/s5c/local/kws_data_prep.sh
index 3882c99ce6d..442a49eb059 100755
--- a/egs/babel/s5c/local/kws_data_prep.sh
+++ b/egs/babel/s5c/local/kws_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_data_prep_proxy.sh b/egs/babel/s5c/local/kws_data_prep_proxy.sh
index 04cc59b6499..42e330116c9 100755
--- a/egs/babel/s5c/local/kws_data_prep_proxy.sh
+++ b/egs/babel/s5c/local/kws_data_prep_proxy.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Guoguo Chen
# Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_gen_oracle_lattices.sh b/egs/babel/s5c/local/kws_gen_oracle_lattices.sh
index b73112b191d..a1bc1ec7048 100755
--- a/egs/babel/s5c/local/kws_gen_oracle_lattices.sh
+++ b/egs/babel/s5c/local/kws_gen_oracle_lattices.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_oracle.sh b/egs/babel/s5c/local/kws_oracle.sh
index c7aa661664f..2b10c721961 100755
--- a/egs/babel/s5c/local/kws_oracle.sh
+++ b/egs/babel/s5c/local/kws_oracle.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal)
# 2013 Johns Hopkins University
diff --git a/egs/babel/s5c/local/kws_score_f4de.sh b/egs/babel/s5c/local/kws_score_f4de.sh
index cd6948a8a08..e5acfbd9ee8 100755
--- a/egs/babel/s5c/local/kws_score_f4de.sh
+++ b/egs/babel/s5c/local/kws_score_f4de.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_search.sh b/egs/babel/s5c/local/kws_search.sh
index 9e998d6c3f9..c2ee7671508 100755
--- a/egs/babel/s5c/local/kws_search.sh
+++ b/egs/babel/s5c/local/kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5c/local/kws_setup.sh b/egs/babel/s5c/local/kws_setup.sh
index a6b87ef004f..6a77953f181 100755
--- a/egs/babel/s5c/local/kws_setup.sh
+++ b/egs/babel/s5c/local/kws_setup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5c/local/lattice_to_ctm.sh b/egs/babel/s5c/local/lattice_to_ctm.sh
index 5fbde42d237..d23c4f17abf 100755
--- a/egs/babel/s5c/local/lattice_to_ctm.sh
+++ b/egs/babel/s5c/local/lattice_to_ctm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/babel/s5c/local/make_L_align.sh b/egs/babel/s5c/local/make_L_align.sh
index 50e46a00493..9d9d48c6530 100755
--- a/egs/babel/s5c/local/make_L_align.sh
+++ b/egs/babel/s5c/local/make_L_align.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Guoguo Chen, Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/make_corpus_subset.sh b/egs/babel/s5c/local/make_corpus_subset.sh
index add194d48e8..acd5e91a18b 100755
--- a/egs/babel/s5c/local/make_corpus_subset.sh
+++ b/egs/babel/s5c/local/make_corpus_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5c/local/make_ecf_subset.sh b/egs/babel/s5c/local/make_ecf_subset.sh
index 9bdd95c3e27..9fe8df841b7 100755
--- a/egs/babel/s5c/local/make_ecf_subset.sh
+++ b/egs/babel/s5c/local/make_ecf_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5c/local/make_lexicon_subset.sh b/egs/babel/s5c/local/make_lexicon_subset.sh
index bf2ebe45f7d..b6f19c088e9 100755
--- a/egs/babel/s5c/local/make_lexicon_subset.sh
+++ b/egs/babel/s5c/local/make_lexicon_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
echo "$0 $@" # Print the command line for logging
diff --git a/egs/babel/s5c/local/nist_eval/create_compound_set.sh b/egs/babel/s5c/local/nist_eval/create_compound_set.sh
index ae5492a9f9e..7450396aa89 100755
--- a/egs/babel/s5c/local/nist_eval/create_compound_set.sh
+++ b/egs/babel/s5c/local/nist_eval/create_compound_set.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#Simple script to create compound set info that will allow for more automatized
#work with the shadow set.
diff --git a/egs/babel/s5c/local/nist_eval/export_systems.sh b/egs/babel/s5c/local/nist_eval/export_systems.sh
index d0af608416c..f0984c703ab 100755
--- a/egs/babel/s5c/local/nist_eval/export_systems.sh
+++ b/egs/babel/s5c/local/nist_eval/export_systems.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
set -o pipefail
diff --git a/egs/babel/s5c/local/nist_eval/make_release.sh b/egs/babel/s5c/local/nist_eval/make_release.sh
index 179d5cbe619..0e4e1f2cb15 100755
--- a/egs/babel/s5c/local/nist_eval/make_release.sh
+++ b/egs/babel/s5c/local/nist_eval/make_release.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
team=RADICAL
corpusid=
diff --git a/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh
index 3b12222e13a..968b8b36c1e 100755
--- a/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh
+++ b/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Copyright 2014 Vimal Manohar
diff --git a/egs/babel/s5c/local/prepare_kaldi_lm_from_training_text.sh b/egs/babel/s5c/local/prepare_kaldi_lm_from_training_text.sh
index 6566860a5d6..9d48391cc38 100755
--- a/egs/babel/s5c/local/prepare_kaldi_lm_from_training_text.sh
+++ b/egs/babel/s5c/local/prepare_kaldi_lm_from_training_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script was copied from ../10hSystem/local (Author: Guoguo Chen?)
# It will be modified to make it somewhat more reusable
diff --git a/egs/babel/s5c/local/resegment/generate_segments.sh b/egs/babel/s5c/local/resegment/generate_segments.sh
index 95e88deb87d..33eb5b36848 100755
--- a/egs/babel/s5c/local/resegment/generate_segments.sh
+++ b/egs/babel/s5c/local/resegment/generate_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
# Apache 2.0
diff --git a/egs/babel/s5c/local/resegment/train_segmentation.sh b/egs/babel/s5c/local/resegment/train_segmentation.sh
index 511c451993e..264a1cc3c88 100755
--- a/egs/babel/s5c/local/resegment/train_segmentation.sh
+++ b/egs/babel/s5c/local/resegment/train_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
# Apache 2.0
diff --git a/egs/babel/s5c/local/run_kws_stt_task.sh b/egs/babel/s5c/local/run_kws_stt_task.sh
index d622aac9442..d21aeabc2e3 100755
--- a/egs/babel/s5c/local/run_kws_stt_task.sh
+++ b/egs/babel/s5c/local/run_kws_stt_task.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/score_combine.sh b/egs/babel/s5c/local/score_combine.sh
index 7e8af85b2d8..eb9f8412089 100755
--- a/egs/babel/s5c/local/score_combine.sh
+++ b/egs/babel/s5c/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2013 Arnab Ghoshal
# Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur)
diff --git a/egs/babel/s5c/local/score_map.sh b/egs/babel/s5c/local/score_map.sh
index 94c31acc348..c44cc19c94f 100755
--- a/egs/babel/s5c/local/score_map.sh
+++ b/egs/babel/s5c/local/score_map.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/babel/s5c/local/score_mbr.sh b/egs/babel/s5c/local/score_mbr.sh
index a86dd5c3f71..66b825f8d1d 100755
--- a/egs/babel/s5c/local/score_mbr.sh
+++ b/egs/babel/s5c/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Script for minimum bayes risk decoding.
diff --git a/egs/babel/s5c/local/score_sctk_prune.sh b/egs/babel/s5c/local/score_sctk_prune.sh
index 09662af57c8..b7787950744 100755
--- a/egs/babel/s5c/local/score_sctk_prune.sh
+++ b/egs/babel/s5c/local/score_sctk_prune.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013. Apache 2.0.
# begin configuration section.
diff --git a/egs/babel/s5c/local/score_stm.sh b/egs/babel/s5c/local/score_stm.sh
index 56835109722..b86d83570e2 100755
--- a/egs/babel/s5c/local/score_stm.sh
+++ b/egs/babel/s5c/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/shadow_set_kws_search.sh b/egs/babel/s5c/local/shadow_set_kws_search.sh
index a67a3a57f6a..b2765928cbc 100755
--- a/egs/babel/s5c/local/shadow_set_kws_search.sh
+++ b/egs/babel/s5c/local/shadow_set_kws_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5c/local/show_lattice.sh b/egs/babel/s5c/local/show_lattice.sh
index 3435fcb8c41..3373b66f666 100755
--- a/egs/babel/s5c/local/show_lattice.sh
+++ b/egs/babel/s5c/local/show_lattice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./path.sh
diff --git a/egs/babel/s5c/local/split_ctms.sh b/egs/babel/s5c/local/split_ctms.sh
index b24a1380111..84d8811fc7b 100755
--- a/egs/babel/s5c/local/split_ctms.sh
+++ b/egs/babel/s5c/local/split_ctms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/syllab/ali_to_syllabs.sh b/egs/babel/s5c/local/syllab/ali_to_syllabs.sh
index 8f0cb88771a..84b7d24eeb3 100755
--- a/egs/babel/s5c/local/syllab/ali_to_syllabs.sh
+++ b/egs/babel/s5c/local/syllab/ali_to_syllabs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5c/local/syllab/generate_syllable_lang.sh b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh
index 4a0810b9415..26486a08487 100755
--- a/egs/babel/s5c/local/syllab/generate_syllable_lang.sh
+++ b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5c/local/train_g2p.sh b/egs/babel/s5c/local/train_g2p.sh
index 08be0014656..5a0594d8f46 100755
--- a/egs/babel/s5c/local/train_g2p.sh
+++ b/egs/babel/s5c/local/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/babel/s5c/local/train_lms_srilm.sh b/egs/babel/s5c/local/train_lms_srilm.sh
index be2b0247aeb..ea8e5840be5 100755
--- a/egs/babel/s5c/local/train_lms_srilm.sh
+++ b/egs/babel/s5c/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
export LC_ALL=C
words_file=
diff --git a/egs/babel/s5c/local/train_mmi_sgmm2.sh b/egs/babel/s5c/local/train_mmi_sgmm2.sh
index cdf9e28b1bf..210269bf23e 100755
--- a/egs/babel/s5c/local/train_mmi_sgmm2.sh
+++ b/egs/babel/s5c/local/train_mmi_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# MMI training (or optionally boosted MMI, if you give the --boost option),
diff --git a/egs/babel/s5c/run-1-main-extend-lex.sh b/egs/babel/s5c/run-1-main-extend-lex.sh
index ccc62441c1f..3bb077490c9 100755
--- a/egs/babel/s5c/run-1-main-extend-lex.sh
+++ b/egs/babel/s5c/run-1-main-extend-lex.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Parameters for extended lexicon.
extend_lexicon=true
diff --git a/egs/babel/s5c/run-1-main.sh b/egs/babel/s5c/run-1-main.sh
index 61c875fc84e..f5741d4ce8a 100755
--- a/egs/babel/s5c/run-1-main.sh
+++ b/egs/babel/s5c/run-1-main.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first.
tri5_only=false
diff --git a/egs/babel/s5c/run-2-segmentation.sh b/egs/babel/s5c/run-2-segmentation.sh
index d832a9421c8..f7651c2ae91 100755
--- a/egs/babel/s5c/run-2-segmentation.sh
+++ b/egs/babel/s5c/run-2-segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
# Apache 2.0
diff --git a/egs/babel/s5c/run-2a-nnet-cpu.sh b/egs/babel/s5c/run-2a-nnet-cpu.sh
index 35e7d3ceab3..0f246d9e1aa 100755
--- a/egs/babel/s5c/run-2a-nnet-cpu.sh
+++ b/egs/babel/s5c/run-2a-nnet-cpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. conf/common_vars.sh
. ./lang.conf
diff --git a/egs/babel/s5c/run-2a-nnet-ensemble-gpu.sh b/egs/babel/s5c/run-2a-nnet-ensemble-gpu.sh
index 06c9a330295..953ee4baef5 100755
--- a/egs/babel/s5c/run-2a-nnet-ensemble-gpu.sh
+++ b/egs/babel/s5c/run-2a-nnet-ensemble-gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. conf/common_vars.sh
. ./lang.conf
diff --git a/egs/babel/s5c/run-2a-nnet-gpu.sh b/egs/babel/s5c/run-2a-nnet-gpu.sh
index 87faa58f733..76dd415667b 100755
--- a/egs/babel/s5c/run-2a-nnet-gpu.sh
+++ b/egs/babel/s5c/run-2a-nnet-gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
dir=exp/tri6_nnet
train_stage=-10
diff --git a/egs/babel/s5c/run-2a-nnet-mpe.sh b/egs/babel/s5c/run-2a-nnet-mpe.sh
index 457d85e4a48..49f1b11a493 100755
--- a/egs/babel/s5c/run-2a-nnet-mpe.sh
+++ b/egs/babel/s5c/run-2a-nnet-mpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. conf/common_vars.sh
. ./lang.conf
diff --git a/egs/babel/s5c/run-2b-bnf.sh b/egs/babel/s5c/run-2b-bnf.sh
index 505e647cf93..a30a73ebdb8 100755
--- a/egs/babel/s5c/run-2b-bnf.sh
+++ b/egs/babel/s5c/run-2b-bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Pegah Ghahremani
# Apache 2.0
diff --git a/egs/babel/s5c/run-3b-bnf-nnet.sh b/egs/babel/s5c/run-3b-bnf-nnet.sh
index 169eec6f62f..fff73266305 100755
--- a/egs/babel/s5c/run-3b-bnf-nnet.sh
+++ b/egs/babel/s5c/run-3b-bnf-nnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Pegah Ghahremani
# 2014 Johns Hopkins (Yenda Trmal)
diff --git a/egs/babel/s5c/run-3b-bnf-sgmm.sh b/egs/babel/s5c/run-3b-bnf-sgmm.sh
index 81e4ae538b4..96fcd396893 100755
--- a/egs/babel/s5c/run-3b-bnf-sgmm.sh
+++ b/egs/babel/s5c/run-3b-bnf-sgmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Pegah Ghahremani
# 2014 Johns Hopkins (Yenda Trmal)
diff --git a/egs/babel/s5c/run-4-anydecode.sh b/egs/babel/s5c/run-4-anydecode.sh
index 56b7836683f..50300a186fb 100755
--- a/egs/babel/s5c/run-4-anydecode.sh
+++ b/egs/babel/s5c/run-4-anydecode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
set -o pipefail
diff --git a/egs/babel/s5c/run-6-combine.sh b/egs/babel/s5c/run-6-combine.sh
index 81dc42caca3..d868c4dcefe 100755
--- a/egs/babel/s5c/run-6-combine.sh
+++ b/egs/babel/s5c/run-6-combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. conf/common_vars.sh
diff --git a/egs/babel/s5d/local/add_to_multilang.sh b/egs/babel/s5d/local/add_to_multilang.sh
index 162d8ffa709..dbf159a5eda 100755
--- a/egs/babel/s5d/local/add_to_multilang.sh
+++ b/egs/babel/s5d/local/add_to_multilang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/ali_to_rttm.sh b/egs/babel/s5d/local/ali_to_rttm.sh
index cb4f0740130..217758dfea4 100755
--- a/egs/babel/s5d/local/ali_to_rttm.sh
+++ b/egs/babel/s5d/local/ali_to_rttm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2013 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5d/local/apply_g2p.sh b/egs/babel/s5d/local/apply_g2p.sh
index 385b1f3536e..8807a93b762 100755
--- a/egs/babel/s5d/local/apply_g2p.sh
+++ b/egs/babel/s5d/local/apply_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/babel/s5d/local/arpa2G.sh b/egs/babel/s5d/local/arpa2G.sh
index 887b393b459..f3806d010a7 100755
--- a/egs/babel/s5d/local/arpa2G.sh
+++ b/egs/babel/s5d/local/arpa2G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013-2014 Johns Hopkins University (authors: Yenda Trmal, Daniel Povey)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5d/local/best_scores.sh b/egs/babel/s5d/local/best_scores.sh
index 33bcdb07183..5e6cd2dd512 100755
--- a/egs/babel/s5d/local/best_scores.sh
+++ b/egs/babel/s5d/local/best_scores.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/best_scores_kws.sh b/egs/babel/s5d/local/best_scores_kws.sh
index 164ec8dc4b0..a50509ae1e5 100755
--- a/egs/babel/s5d/local/best_scores_kws.sh
+++ b/egs/babel/s5d/local/best_scores_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/chain/run_ivector_common.sh b/egs/babel/s5d/local/chain/run_ivector_common.sh
index a1a145564d0..1cf19bd5351 100755
--- a/egs/babel/s5d/local/chain/run_ivector_common.sh
+++ b/egs/babel/s5d/local/chain/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -eu -o pipefail
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn.sh
index 7b4535f8c5e..f753e9f3bf2 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh
index 5fc14dda826..c64f4b78edd 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh
index 8c7de5d18d4..91938b9ce99 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh
index 0b3e70b5a04..031af6b468b 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh
index 45f2907645e..2149c9eb263 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh
index 0d92aff5c28..9ed6631ec09 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh
index 4129c00dcb4..6e1f496944d 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh
index 1cfa50c1aa1..e352e6aa5cd 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh
index ba8ac1e0373..eda24b245b3 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# by default, with cleanup
diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh
index 5de285e080e..c65a8eb58ca 100755
--- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh
+++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# by default, with cleanup
diff --git a/egs/babel/s5d/local/chain2/run_tdnn.sh b/egs/babel/s5d/local/chain2/run_tdnn.sh
new file mode 100755
index 00000000000..3abcee589cd
--- /dev/null
+++ b/egs/babel/s5d/local/chain2/run_tdnn.sh
@@ -0,0 +1,316 @@
+#!/bin/bash
+# Copyright 2020 Idiap Research Institute (Srikanth Madikeri)
+# chain2 recipe for monolingual systems for BABEL
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=-1
+nj=30
+train_set=train
+gmm=tri5 # the gmm for the target data
+langdir=data/lang
+num_threads_ubm=1
+nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
+
+# The rest are configs specific to this script. Most of the parameters
+# are just hardcoded at this level, in the commands below.
+train_stage=-10
+tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
+tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
+common_egs_dir= # you can set this to use previously dumped egs.
+chunk_width=150,120,90,75
+frame_subsampling_factor=3
+langs=default # has multiple values for a multilingual system
+srand=-1
+num_jobs_initial=2
+num_jobs_final=12
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+max_param_change=2.0
+xent_regularize=0.1
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+ cat <data/lang_chain/topo
+ fi
+fi
+
+if [ $stage -le 8 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom).
+ # use the same num-jobs as the alignments
+ steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+ $langdir $gmm_dir $lat_dir
+ rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 9 ]; then
+ # Build a tree using our new topology. We know we have alignments for the
+ # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+ # those.
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ steps/nnet3/chain/build_tree.sh --frame-subsampling-factor $frame_subsampling_factor \
+ --context-opts "--context-width=2 --central-position=1" \
+ --leftmost-questions-truncate -1 \
+ --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir
+fi
+
+if [ $stage -le 10 ]; then
+ mkdir -p $dir
+
+ echo "$0: creating neural net configs using the xconfig parser";
+
+ num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+ [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; }
+ learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+
+ mkdir -p $dir/configs
+ cat < $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=43 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ # the first splicing is moved before the lda layer, so no splicing here
+ relu-batchnorm-layer name=tdnn1 dim=450
+ relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=450
+ relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=450
+ relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=450
+ relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=450
+ relu-batchnorm-layer name=tdnn7 input=Append(-6,-3,0) dim=450
+
+ ## adding the layers for chain branch
+ relu-batchnorm-layer name=prefinal-chain input=tdnn7 dim=450 target-rms=0.5
+ output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+ output-layer name=output-default input=prefinal-chain include-log-softmax=false dim=$num_targets max-change=1.5
+
+ ## adding the layers for chain branch
+
+ # adding the layers for xent branch
+ # This block prints the configs for a separate output that will be
+ # trained with a cross-entropy objective in the 'chain' models... this
+ # has the effect of regularizing the hidden parts of the model. we use
+ # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+ # 0.5 / args.xent_regularize is suitable as it means the xent
+ # final-layer learns at a rate independent of the regularization
+ # constant; and the 0.5 was tuned so as to make the relative progress
+ # similar in the xent and regular final layers.
+ relu-batchnorm-layer name=prefinal-xent input=tdnn7 dim=450 target-rms=0.5
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+ output-layer name=output-default-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+ if [ ! -f $dir/init/default_trans.mdl ]; then # checking this because it may have been copied in a previous run of the same script
+ copy-transition-model $tree_dir/final.mdl $dir/init/default_trans.mdl || exit 1 &
+ else
+ echo "Keeping the old $dir/init/default_trans.mdl as it already exists."
+ fi
+
+fi
+
+init_info=$dir/init/info.txt
+if [ $stage -le 11 ]; then
+
+ if [ ! -f $dir/configs/ref.raw ]; then
+ echo "Expected $dir/configs/ref.raw to exist"
+ exit
+ fi
+
+ mkdir -p $dir/init
+ nnet3-info $dir/configs/ref.raw > $dir/configs/temp.info
+ model_left_context=`fgrep 'left-context' $dir/configs/temp.info | awk '{print $2}'`
+ model_right_context=`fgrep 'right-context' $dir/configs/temp.info | awk '{print $2}'`
+ cat >$init_info < )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh
index a4f301e2c14..4168604fce7 100755
--- a/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh
+++ b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/nist_eval/export_systems.sh b/egs/babel/s5d/local/nist_eval/export_systems.sh
index d0af608416c..f0984c703ab 100755
--- a/egs/babel/s5d/local/nist_eval/export_systems.sh
+++ b/egs/babel/s5d/local/nist_eval/export_systems.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
set -o pipefail
diff --git a/egs/babel/s5d/local/nist_eval/make_release.sh b/egs/babel/s5d/local/nist_eval/make_release.sh
index 179d5cbe619..0e4e1f2cb15 100755
--- a/egs/babel/s5d/local/nist_eval/make_release.sh
+++ b/egs/babel/s5d/local/nist_eval/make_release.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
team=RADICAL
corpusid=
diff --git a/egs/babel/s5d/local/nist_eval/split_compound_set.sh b/egs/babel/s5d/local/nist_eval/split_compound_set.sh
index 59ea4c162d7..3c236430b73 100755
--- a/egs/babel/s5d/local/nist_eval/split_compound_set.sh
+++ b/egs/babel/s5d/local/nist_eval/split_compound_set.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh
index 3b12222e13a..968b8b36c1e 100755
--- a/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh
+++ b/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Copyright 2014 Vimal Manohar
diff --git a/egs/babel/s5d/local/prepare_extended_lexicon.sh b/egs/babel/s5d/local/prepare_extended_lexicon.sh
index 3cc5ca6c21f..7e8ca02529e 100755
--- a/egs/babel/s5d/local/prepare_extended_lexicon.sh
+++ b/egs/babel/s5d/local/prepare_extended_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/reestimate_langp.sh b/egs/babel/s5d/local/reestimate_langp.sh
index ae70b6a8f46..b31438fca57 100755
--- a/egs/babel/s5d/local/reestimate_langp.sh
+++ b/egs/babel/s5d/local/reestimate_langp.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/resegment/generate_segments.sh b/egs/babel/s5d/local/resegment/generate_segments.sh
index 95e88deb87d..33eb5b36848 100755
--- a/egs/babel/s5d/local/resegment/generate_segments.sh
+++ b/egs/babel/s5d/local/resegment/generate_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
# Apache 2.0
diff --git a/egs/babel/s5d/local/resegment/train_segmentation.sh b/egs/babel/s5d/local/resegment/train_segmentation.sh
index 511c451993e..264a1cc3c88 100755
--- a/egs/babel/s5d/local/resegment/train_segmentation.sh
+++ b/egs/babel/s5d/local/resegment/train_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
# Apache 2.0
diff --git a/egs/babel/s5d/local/run_asr_segmentation.sh b/egs/babel/s5d/local/run_asr_segmentation.sh
index f70775526b6..025f645dcf3 100755
--- a/egs/babel/s5d/local/run_asr_segmentation.sh
+++ b/egs/babel/s5d/local/run_asr_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Vimal Manohar
# Apache 2.0
diff --git a/egs/babel/s5d/local/run_cleanup_segmentation.sh b/egs/babel/s5d/local/run_cleanup_segmentation.sh
index 324d796b1b1..5a320ece5cf 100755
--- a/egs/babel/s5d/local/run_cleanup_segmentation.sh
+++ b/egs/babel/s5d/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vimal Manohar
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/babel/s5d/local/run_kws_stt_task.sh b/egs/babel/s5d/local/run_kws_stt_task.sh
index e2f719bde9f..447adf7ac3f 100755
--- a/egs/babel/s5d/local/run_kws_stt_task.sh
+++ b/egs/babel/s5d/local/run_kws_stt_task.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5d/local/run_kws_stt_task2.sh b/egs/babel/s5d/local/run_kws_stt_task2.sh
index 73c4e730ab5..35efefb775e 100755
--- a/egs/babel/s5d/local/run_kws_stt_task2.sh
+++ b/egs/babel/s5d/local/run_kws_stt_task2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5d/local/score_combine.sh b/egs/babel/s5d/local/score_combine.sh
index 7e8af85b2d8..eb9f8412089 100755
--- a/egs/babel/s5d/local/score_combine.sh
+++ b/egs/babel/s5d/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2013 Arnab Ghoshal
# Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur)
diff --git a/egs/babel/s5d/local/score_stm.sh b/egs/babel/s5d/local/score_stm.sh
index 56835109722..b86d83570e2 100755
--- a/egs/babel/s5d/local/score_stm.sh
+++ b/egs/babel/s5d/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5d/local/search/combine.sh b/egs/babel/s5d/local/search/combine.sh
index 4f77c0f0f7c..612c55a3a3d 100755
--- a/egs/babel/s5d/local/search/combine.sh
+++ b/egs/babel/s5d/local/search/combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey)
# Copyright (c) 2016, Johns Hopkins University (Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/combine_special.sh b/egs/babel/s5d/local/search/combine_special.sh
index 5802f49be06..0c1fbfe1c43 100755
--- a/egs/babel/s5d/local/search/combine_special.sh
+++ b/egs/babel/s5d/local/search/combine_special.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey)
# Copyright (c) 2016, Johns Hopkins University (Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/compile_keywords.sh b/egs/babel/s5d/local/search/compile_keywords.sh
index 92dc4220a8e..39cbb299a8e 100755
--- a/egs/babel/s5d/local/search/compile_keywords.sh
+++ b/egs/babel/s5d/local/search/compile_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/compile_proxy_keywords.sh b/egs/babel/s5d/local/search/compile_proxy_keywords.sh
index 33d8dd52938..873a61b7b23 100755
--- a/egs/babel/s5d/local/search/compile_proxy_keywords.sh
+++ b/egs/babel/s5d/local/search/compile_proxy_keywords.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal )
# 2012-2014 Guoguo Chen
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/normalize.sh b/egs/babel/s5d/local/search/normalize.sh
index 38054f75879..4b56fb97a05 100755
--- a/egs/babel/s5d/local/search/normalize.sh
+++ b/egs/babel/s5d/local/search/normalize.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5d/local/search/rttm_to_hitlists.sh b/egs/babel/s5d/local/search/rttm_to_hitlists.sh
index c6a8d2b5ad0..27bbbdd6fe0 100755
--- a/egs/babel/s5d/local/search/rttm_to_hitlists.sh
+++ b/egs/babel/s5d/local/search/rttm_to_hitlists.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/run_phn_search.sh b/egs/babel/s5d/local/search/run_phn_search.sh
index 3d39f55efa7..e03fe0dfe84 100755
--- a/egs/babel/s5d/local/search/run_phn_search.sh
+++ b/egs/babel/s5d/local/search/run_phn_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/run_search.sh b/egs/babel/s5d/local/search/run_search.sh
index 1fbdb071123..b341062f3cf 100755
--- a/egs/babel/s5d/local/search/run_search.sh
+++ b/egs/babel/s5d/local/search/run_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/run_syll_search.sh b/egs/babel/s5d/local/search/run_syll_search.sh
index 0694414b6b6..cce8885c73d 100755
--- a/egs/babel/s5d/local/search/run_syll_search.sh
+++ b/egs/babel/s5d/local/search/run_syll_search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/score.sh b/egs/babel/s5d/local/search/score.sh
index e429b1da030..bf423648bd8 100755
--- a/egs/babel/s5d/local/search/score.sh
+++ b/egs/babel/s5d/local/search/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel/s5d/local/search/search.sh b/egs/babel/s5d/local/search/search.sh
index 854719b6d24..3da7c9edc46 100755
--- a/egs/babel/s5d/local/search/search.sh
+++ b/egs/babel/s5d/local/search/search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/search/setup.sh b/egs/babel/s5d/local/search/setup.sh
index d4e2013a443..03d5cdc4f37 100755
--- a/egs/babel/s5d/local/search/setup.sh
+++ b/egs/babel/s5d/local/search/setup.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/search_index.sh b/egs/babel/s5d/local/search_index.sh
index 9e7cdb77f3d..6f0c6b46fb5 100755
--- a/egs/babel/s5d/local/search_index.sh
+++ b/egs/babel/s5d/local/search_index.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
# Apache 2.0
diff --git a/egs/babel/s5d/local/segmentation/tuning/train_lstm_asr_sad_1a.sh b/egs/babel/s5d/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
index 63f78aa8092..c14560dfed8 100755
--- a/egs/babel/s5d/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
+++ b/egs/babel/s5d/local/segmentation/tuning/train_lstm_asr_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is a script to train a TDNN-LSTM for speech activity detection (SAD)
# using LSTM for long-context information.
diff --git a/egs/babel/s5d/local/segmentation/tuning/train_stats_asr_sad_1a.sh b/egs/babel/s5d/local/segmentation/tuning/train_stats_asr_sad_1a.sh
index 2dfe9a0bb96..d27a60eb4ac 100755
--- a/egs/babel/s5d/local/segmentation/tuning/train_stats_asr_sad_1a.sh
+++ b/egs/babel/s5d/local/segmentation/tuning/train_stats_asr_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is a script to train a TDNN for speech activity detection (SAD)
# using statistics pooling for long-context information.
diff --git a/egs/babel/s5d/local/syllab/ali_to_syllabs.sh b/egs/babel/s5d/local/syllab/ali_to_syllabs.sh
index 8f0cb88771a..84b7d24eeb3 100755
--- a/egs/babel/s5d/local/syllab/ali_to_syllabs.sh
+++ b/egs/babel/s5d/local/syllab/ali_to_syllabs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/syllab/create_syll_datadir.sh b/egs/babel/s5d/local/syllab/create_syll_datadir.sh
index 4c014285619..6a95d3945a7 100755
--- a/egs/babel/s5d/local/syllab/create_syll_datadir.sh
+++ b/egs/babel/s5d/local/syllab/create_syll_datadir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/syllab/generate_phone_lang.sh b/egs/babel/s5d/local/syllab/generate_phone_lang.sh
index 81d8a0acdc7..d9c91405e64 100755
--- a/egs/babel/s5d/local/syllab/generate_phone_lang.sh
+++ b/egs/babel/s5d/local/syllab/generate_phone_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/syllab/generate_syllable_lang.sh b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh
index a7bd667027c..a375971d432 100755
--- a/egs/babel/s5d/local/syllab/generate_syllable_lang.sh
+++ b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/syllab/lattice_word2syll.sh b/egs/babel/s5d/local/syllab/lattice_word2syll.sh
index 63e9114875d..6e20e78ff73 100755
--- a/egs/babel/s5d/local/syllab/lattice_word2syll.sh
+++ b/egs/babel/s5d/local/syllab/lattice_word2syll.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/syllab/run_phones.sh b/egs/babel/s5d/local/syllab/run_phones.sh
index 7c4a13c61f9..aea28cd4dd7 100755
--- a/egs/babel/s5d/local/syllab/run_phones.sh
+++ b/egs/babel/s5d/local/syllab/run_phones.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/syllab/run_syllabs.sh b/egs/babel/s5d/local/syllab/run_syllabs.sh
index 7366ac9ad35..f9697e86420 100755
--- a/egs/babel/s5d/local/syllab/run_syllabs.sh
+++ b/egs/babel/s5d/local/syllab/run_syllabs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/babel/s5d/local/train_g2p.sh b/egs/babel/s5d/local/train_g2p.sh
index 08be0014656..5a0594d8f46 100755
--- a/egs/babel/s5d/local/train_g2p.sh
+++ b/egs/babel/s5d/local/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/babel/s5d/local/train_lms_srilm.sh b/egs/babel/s5d/local/train_lms_srilm.sh
index cf357260d8c..4283461aa47 100755
--- a/egs/babel/s5d/local/train_lms_srilm.sh
+++ b/egs/babel/s5d/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
export LC_ALL=C
words_file=
diff --git a/egs/babel/s5d/run-1-main-extend-lex.sh b/egs/babel/s5d/run-1-main-extend-lex.sh
index 69651416cfb..38e059fed72 100755
--- a/egs/babel/s5d/run-1-main-extend-lex.sh
+++ b/egs/babel/s5d/run-1-main-extend-lex.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Parameters for extended lexicon.
extend_lexicon=true
diff --git a/egs/babel/s5d/run-1-main-unicode-extend-lex.sh b/egs/babel/s5d/run-1-main-unicode-extend-lex.sh
index d7e831febeb..eac1f565468 100755
--- a/egs/babel/s5d/run-1-main-unicode-extend-lex.sh
+++ b/egs/babel/s5d/run-1-main-unicode-extend-lex.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Parameters for extended lexicon.
extend_lexicon=true
diff --git a/egs/babel/s5d/run-1-main-unicode.sh b/egs/babel/s5d/run-1-main-unicode.sh
index b6641433c84..0eefeaed3e5 100755
--- a/egs/babel/s5d/run-1-main-unicode.sh
+++ b/egs/babel/s5d/run-1-main-unicode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first.
tri5_only=false
diff --git a/egs/babel/s5d/run-1-main.sh b/egs/babel/s5d/run-1-main.sh
index 329e8480c54..a72596bd15e 100755
--- a/egs/babel/s5d/run-1-main.sh
+++ b/egs/babel/s5d/run-1-main.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first.
tri5_only=false
diff --git a/egs/babel/s5d/run-2-segmentation.sh b/egs/babel/s5d/run-2-segmentation.sh
index d832a9421c8..f7651c2ae91 100755
--- a/egs/babel/s5d/run-2-segmentation.sh
+++ b/egs/babel/s5d/run-2-segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal)
# Apache 2.0
diff --git a/egs/babel/s5d/run-4-anydecode.sh b/egs/babel/s5d/run-4-anydecode.sh
index 52c997ae26a..2593eb340a8 100755
--- a/egs/babel/s5d/run-4-anydecode.sh
+++ b/egs/babel/s5d/run-4-anydecode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
set -o pipefail
diff --git a/egs/babel_multilang/s5/local/chain2/run_tdnn.sh b/egs/babel_multilang/s5/local/chain2/run_tdnn.sh
new file mode 100755
index 00000000000..c0681455bf7
--- /dev/null
+++ b/egs/babel_multilang/s5/local/chain2/run_tdnn.sh
@@ -0,0 +1,447 @@
+#!/bin/bash
+# chain2 recipe for monolingual systems for BABEL
+# Copyright 2016 Pegah Ghahremani
+# Copyright 2020 Srikanth Madikeri (Idiap Research Institute)
+
+# This script is used to train multilingual LF-MMI system with a multi-task training
+# setup.
+
+# local.conf should exists (check README.txt), which contains configs for
+# multilingual training such as lang_list as array of space-separated languages used
+# for multilingual training.
+
+set -e -o pipefail
+
+remove_egs=false
+cmd=queue.pl
+srand=-1
+stage=0
+train_stage=-10
+get_egs_stage=-10
+decode_stage=-10
+
+speed_perturb=true
+use_pitch=true # if true, pitch feature used to train multilingual setup
+use_pitch_ivector=false # if true, pitch feature used in ivector extraction.
+use_ivector=true
+megs_dir=
+alidir=tri5_ali
+stage=-1
+nj=30
+train_set=train
+gmm=tri5 # the gmm for the target data
+langdir=data/lang
+num_threads_ubm=1
+nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
+tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
+tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
+feat_suffix=_hires
+
+label_delay=5
+frame_subsampling_factor=3
+xent_regularize=0.01
+max_param_change=2.0
+num_jobs_initial=2
+num_jobs_final=12
+initial_effective_lrate=0.001
+final_effective_lrate=0.0001
+num_jobs_initial=2
+num_jobs_final=8
+chunk_width=150
+extra_left_context=50
+extra_right_context=0
+common_egs_dir= # you can set this to use previously dumped egs.
+langconf=local.conf
+
+speed_perturb=true
+global_extractor=exp/multi/nnet3/extractor
+dir=exp/chain2${nnet3_affix}/tdnn${tdnn_affix}_multi
+
+. ./path.sh
+. ./cmd.sh
+. ./utils/parse_options.sh
+
+[ ! -f $langconf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1;
+. $langconf || exit 1;
+
+[ ! -f local.conf ] && echo 'the file local.conf does not exist!' && exit 1;
+. local.conf || exit 1;
+
+suffix=
+if $speed_perturb; then
+ suffix=_sp
+fi
+
+num_langs=${#lang_list[@]}
+echo "$0 $@" # Print the command line for logging
+if ! cuda-compiled; then
+ cat <${multi_lfmmi_lang[$lang_index]}/topo
+ fi
+ done
+fi
+
+if [ $stage -le 9 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom).
+ # use the same num-jobs as the alignments
+ for lang_index in `seq 0 $[$num_langs-1]`;do
+ langdir=${multi_lang[$lang_index]}
+ lores_train_data_dir=${multi_lores_data_dirs[$lang_index]}
+ gmm_dir=${multi_gmm_dir[$lang_index]}
+ lat_dir=${multi_ali_latdirs[$lang_index]}
+
+ steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+ $langdir $gmm_dir $lat_dir
+ rm $lat_dir/fsts.*.gz # save space
+ exit
+ done
+fi
+
+if [ $stage -le 10 ]; then
+ for lang_index in `seq 0 $[$num_langs-1]`;do
+ lang_name=${lang_list[$lang_index]}
+ echo "$0: Building tree for $lang_name"
+
+ tree_dir=${multi_ali_treedirs[$lang_index]}
+ ali_dir=${multi_ali_dirs[$lang_index]}
+ lores_train_data_dir=${multi_lores_data_dirs[$lang_index]}
+ lang_dir=${multi_lfmmi_lang[$lang_index]}
+ if [ -f $tree_dir/final.mdl -a -f $tree_dir/tree ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ continue
+ fi
+ steps/nnet3/chain/build_tree.sh --frame-subsampling-factor $frame_subsampling_factor \
+ --context-opts "--context-width=2 --central-position=1" \
+ --leftmost-questions-truncate -1 \
+ --cmd "$train_cmd" 4000 ${lores_train_data_dir} $lang_dir $ali_dir $tree_dir
+ done
+fi
+
+if [ $stage -le 11 ]; then
+ echo "$0: creating multilingual neural net configs using the xconfig parser";
+ if [ -z $bnf_dim ]; then
+ bnf_dim=80
+ fi
+ mkdir -p $dir/configs
+ ivector_node_xconfig=""
+ ivector_to_append=""
+ if $use_ivector; then
+ ivector_node_xconfig="input dim=$ivector_dim name=ivector"
+ ivector_to_append=", ReplaceIndex(ivector, t, 0)"
+ fi
+ learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+ dummy_tree_dir=${multi_ali_treedirs[0]}
+ num_targets=`tree-info $dummy_tree_dir/tree 2>/dev/null | grep num-pdfs | awk '{print $2}'` || exit 1;
+ cat < $dir/configs/network.xconfig
+ input dim=$feat_dim name=input
+ $ivector_node_xconfig
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ # the first splicing is moved before the lda layer, so no splicing here
+ relu-batchnorm-layer name=tdnn1 input=Append(input@-2,input@-1,input,input@1,input@2$ivector_to_append) dim=450
+ relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=450
+ relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=450
+ relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=450
+ relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=450
+ relu-batchnorm-layer name=tdnn7 input=Append(-6,-3,0) dim=450
+ #relu-batchnorm-layer name=tdnn_bn dim=$bnf_dim
+ # adding the layers for diffrent language's output
+ # dummy output node
+ output-layer name=output dim=$num_targets max-change=1.5 include-log-softmax=false
+ output-layer name=output-xent input=tdnn7 dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+ # added separate outptut layer and softmax for all languages.
+ for lang_index in `seq 0 $[$num_langs-1]`;do
+ tree_dir=${multi_ali_treedirs[$lang_index]}
+ num_targets=`tree-info $tree_dir/tree 2>/dev/null | grep num-pdfs | awk '{print $2}'` || exit 1;
+
+ lang_name=${lang_list[${lang_index}]}
+ #echo "relu-renorm-layer name=prefinal-affine-lang-${lang_name} input=tdnn7 dim=450 target-rms=0.5"
+ echo "output-layer name=output-${lang_name} dim=$num_targets input=tdnn7 max-change=1.5 include-log-softmax=false"
+ echo "output-layer name=output-${lang_name}-xent input=tdnn7 dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5"
+ done >> $dir/configs/network.xconfig
+
+ lang_name=${lang_list[0]}
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
+ --config-dir $dir/configs/
+fi
+
+init_info=$dir/init/info.txt
+if [ $stage -le 12 ]; then
+ if [ ! -f $dir/configs/ref.raw ]; then
+ echo "Expected $dir/configs/ref.raw to exist"
+ exit
+ fi
+ mkdir -p $dir/init
+ nnet3-info $dir/configs/ref.raw > $dir/configs/temp.info
+ model_left_context=`fgrep 'left-context' $dir/configs/temp.info | awk '{print $2}'`
+ model_right_context=`fgrep 'right-context' $dir/configs/temp.info | awk '{print $2}'`
+ cat >$init_info < $dir/${lang_name}/init/info.txt
+ done
+fi
diff --git a/egs/babel_multilang/s5/local/make_corpus_subset.sh b/egs/babel_multilang/s5/local/make_corpus_subset.sh
index add194d48e8..acd5e91a18b 100755
--- a/egs/babel_multilang/s5/local/make_corpus_subset.sh
+++ b/egs/babel_multilang/s5/local/make_corpus_subset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0.
diff --git a/egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh b/egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh
index 70d4ef22148..266f02f7ae1 100755
--- a/egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh
+++ b/egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Pegah Ghahremani
diff --git a/egs/babel_multilang/s5/local/nnet3/prepare_multilingual_egs.sh b/egs/babel_multilang/s5/local/nnet3/prepare_multilingual_egs.sh
index acd88b1cee8..7d678c5f55c 100755
--- a/egs/babel_multilang/s5/local/nnet3/prepare_multilingual_egs.sh
+++ b/egs/babel_multilang/s5/local/nnet3/prepare_multilingual_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# This script generates separate egs directory for each input
# language in multilingual setup, which contains both egs.*.ark and egs.*.scp.
diff --git a/egs/babel_multilang/s5/local/nnet3/run_common_langs.sh b/egs/babel_multilang/s5/local/nnet3/run_common_langs.sh
index 63b7da82f60..073d01cb257 100755
--- a/egs/babel_multilang/s5/local/nnet3/run_common_langs.sh
+++ b/egs/babel_multilang/s5/local/nnet3/run_common_langs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Pegah Ghahremani
diff --git a/egs/babel_multilang/s5/local/nnet3/run_decode_lang.sh b/egs/babel_multilang/s5/local/nnet3/run_decode_lang.sh
index bd80fe9a701..5456d307d9f 100755
--- a/egs/babel_multilang/s5/local/nnet3/run_decode_lang.sh
+++ b/egs/babel_multilang/s5/local/nnet3/run_decode_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Pegah Ghahremani
diff --git a/egs/babel_multilang/s5/local/nnet3/run_multilingual_bnf.sh b/egs/babel_multilang/s5/local/nnet3/run_multilingual_bnf.sh
index 8fb01c19d00..2e48e057bd3 100755
--- a/egs/babel_multilang/s5/local/nnet3/run_multilingual_bnf.sh
+++ b/egs/babel_multilang/s5/local/nnet3/run_multilingual_bnf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script trains a multilingual model using 6 layer TDNN + Xent
# with 42 dim bottleneck layer in th fifth layer.
diff --git a/egs/babel_multilang/s5/local/nnet3/run_shared_ivector_extractor.sh b/egs/babel_multilang/s5/local/nnet3/run_shared_ivector_extractor.sh
index 7034743beca..28006a752c5 100755
--- a/egs/babel_multilang/s5/local/nnet3/run_shared_ivector_extractor.sh
+++ b/egs/babel_multilang/s5/local/nnet3/run_shared_ivector_extractor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Pegah Ghahremani
diff --git a/egs/babel_multilang/s5/local/nnet3/run_tdnn_multilingual.sh b/egs/babel_multilang/s5/local/nnet3/run_tdnn_multilingual.sh
index 22ba636f06a..eb2cb77ba0d 100755
--- a/egs/babel_multilang/s5/local/nnet3/run_tdnn_multilingual.sh
+++ b/egs/babel_multilang/s5/local/nnet3/run_tdnn_multilingual.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Pegah Ghahremani
diff --git a/egs/bentham/v1/local/chain/compare_wer.sh b/egs/bentham/v1/local/chain/compare_wer.sh
index 2ce14e13694..a1b8fffe166 100755
--- a/egs/bentham/v1/local/chain/compare_wer.sh
+++ b/egs/bentham/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index ec530ef1ce4..5343890db4e 100755
--- a/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ exp/chain/cnn_e2eali_1a
# System e2e_cnn_1a cnn_e2eali_1a
diff --git a/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 716bdce3729..eda95d391c5 100755
--- a/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# This script does end2end chain training (i.e. from scratch)
diff --git a/egs/bentham/v1/local/create_splits.sh b/egs/bentham/v1/local/create_splits.sh
index e8ea2279a49..a510959d472 100755
--- a/egs/bentham/v1/local/create_splits.sh
+++ b/egs/bentham/v1/local/create_splits.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Desh Raj (Johns Hopkins University)
# This script reads the extracted Bentham database files and creates
diff --git a/egs/bentham/v1/local/download_bentham_text.sh b/egs/bentham/v1/local/download_bentham_text.sh
index e09403718a1..5bcd1244b44 100755
--- a/egs/bentham/v1/local/download_bentham_text.sh
+++ b/egs/bentham/v1/local/download_bentham_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Desh Raj
# Apache 2.0
diff --git a/egs/bentham/v1/local/extract_features.sh b/egs/bentham/v1/local/extract_features.sh
index 460e467e99c..374309ba375 100755
--- a/egs/bentham/v1/local/extract_features.sh
+++ b/egs/bentham/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Yiwen Shao
# 2018 Ashish Arora
diff --git a/egs/bentham/v1/local/prepare_data.sh b/egs/bentham/v1/local/prepare_data.sh
index bbcc9863611..018f6843120 100755
--- a/egs/bentham/v1/local/prepare_data.sh
+++ b/egs/bentham/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Desh Raj (Johns Hopkins University)
diff --git a/egs/bentham/v1/local/score.sh b/egs/bentham/v1/local/score.sh
index 1d84815fc69..6168f38a929 100755
--- a/egs/bentham/v1/local/score.sh
+++ b/egs/bentham/v1/local/score.sh
@@ -1,5 +1,5 @@
-#!/bin/bash
+#!/usr/bin/env bash
steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/bentham/v1/local/train_lm.sh b/egs/bentham/v1/local/train_lm.sh
index 48632a90769..b5434c38e0a 100755
--- a/egs/bentham/v1/local/train_lm.sh
+++ b/egs/bentham/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/bentham/v1/run_end2end.sh b/egs/bentham/v1/run_end2end.sh
index 63c034e41f6..1140fdb8e47 100755
--- a/egs/bentham/v1/run_end2end.sh
+++ b/egs/bentham/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Ashish Arora (Johns Hopkins University)
# 2018 Desh Raj (Johns Hopkins University)
diff --git a/egs/bn_music_speech/v1/local/make_bn.sh b/egs/bn_music_speech/v1/local/make_bn.sh
index 5e2a29f0cca..accecba7662 100755
--- a/egs/bn_music_speech/v1/local/make_bn.sh
+++ b/egs/bn_music_speech/v1/local/make_bn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 David Snyder
# Apache 2.0.
#
diff --git a/egs/bn_music_speech/v1/run.sh b/egs/bn_music_speech/v1/run.sh
index 08d5c022a9d..5cc82ddb320 100755
--- a/egs/bn_music_speech/v1/run.sh
+++ b/egs/bn_music_speech/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 David Snyder
# Apache 2.0.
#
diff --git a/egs/callhome_diarization/v1/diarization/VB_resegmentation.sh b/egs/callhome_diarization/v1/diarization/VB_resegmentation.sh
index 765c4eee8b8..ed3b2da0b3a 100755
--- a/egs/callhome_diarization/v1/diarization/VB_resegmentation.sh
+++ b/egs/callhome_diarization/v1/diarization/VB_resegmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 Zili Huang
diff --git a/egs/callhome_diarization/v1/diarization/cluster.sh b/egs/callhome_diarization/v1/diarization/cluster.sh
index 5e5c6e9dbe5..6105129f907 100755
--- a/egs/callhome_diarization/v1/diarization/cluster.sh
+++ b/egs/callhome_diarization/v1/diarization/cluster.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 David Snyder
# 2017-2018 Matthew Maciejewski
diff --git a/egs/callhome_diarization/v1/diarization/extract_ivectors.sh b/egs/callhome_diarization/v1/diarization/extract_ivectors.sh
index d7bb389bad5..7fb2c6c510b 100755
--- a/egs/callhome_diarization/v1/diarization/extract_ivectors.sh
+++ b/egs/callhome_diarization/v1/diarization/extract_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Daniel Povey
# 2016 David Snyder
diff --git a/egs/callhome_diarization/v1/diarization/make_rttm.py b/egs/callhome_diarization/v1/diarization/make_rttm.py
index cc1145ab9ab..fc32eafd530 100755
--- a/egs/callhome_diarization/v1/diarization/make_rttm.py
+++ b/egs/callhome_diarization/v1/diarization/make_rttm.py
@@ -34,9 +34,7 @@
import argparse
import sys
-
-sys.path.append('steps/libs')
-import common as common_lib
+import codecs
def get_args():
@@ -63,14 +61,14 @@ def main():
# File containing speaker labels per segment
seg2label = {}
- with common_lib.smart_open(args.labels) as labels_file:
+ with codecs.open(args.labels, 'r', 'utf-8') as labels_file:
for line in labels_file:
seg, label = line.strip().split()
seg2label[seg] = label
# Segments file
reco2segs = {}
- with common_lib.smart_open(args.segments) as segments_file:
+ with codecs.open(args.segments, 'r', 'utf-8') as segments_file:
for line in segments_file:
seg, reco, start, end = line.strip().split()
try:
@@ -117,7 +115,7 @@ def main():
new_segs += " " + start + "," + end + "," + label
merged_segs.append(reco + new_segs)
- with common_lib.smart_open(args.rttm_file, 'w') as rttm_writer:
+ with codecs.open(args.rttm_file, 'w', 'utf-8') as rttm_writer:
for reco_line in merged_segs:
segs = reco_line.strip().split()
reco = segs[0]
diff --git a/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh b/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh
index 8d579138c73..9091c52cd1a 100755
--- a/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh
+++ b/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Daniel Povey
# 2017-2018 David Snyder
diff --git a/egs/callhome_diarization/v1/diarization/nnet3/xvector/score_plda.sh b/egs/callhome_diarization/v1/diarization/nnet3/xvector/score_plda.sh
index 703bafd8912..5616b032be0 100755
--- a/egs/callhome_diarization/v1/diarization/nnet3/xvector/score_plda.sh
+++ b/egs/callhome_diarization/v1/diarization/nnet3/xvector/score_plda.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016-2018 David Snyder
# 2017-2018 Matthew Maciejewski
# Apache 2.0.
diff --git a/egs/callhome_diarization/v1/diarization/score_plda.sh b/egs/callhome_diarization/v1/diarization/score_plda.sh
index a5be35e8f39..4ea0904aef2 100755
--- a/egs/callhome_diarization/v1/diarization/score_plda.sh
+++ b/egs/callhome_diarization/v1/diarization/score_plda.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016-2018 David Snyder
# 2017-2018 Matthew Maciejewski
# Apache 2.0.
diff --git a/egs/callhome_diarization/v1/diarization/train_ivector_extractor_diag.sh b/egs/callhome_diarization/v1/diarization/train_ivector_extractor_diag.sh
index 9254012f3b0..39b571ddd41 100755
--- a/egs/callhome_diarization/v1/diarization/train_ivector_extractor_diag.sh
+++ b/egs/callhome_diarization/v1/diarization/train_ivector_extractor_diag.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Daniel Povey
# 2014 David Snyder
diff --git a/egs/callhome_diarization/v1/diarization/vad_to_segments.sh b/egs/callhome_diarization/v1/diarization/vad_to_segments.sh
index d653e0313ea..7f0623c8cdb 100755
--- a/egs/callhome_diarization/v1/diarization/vad_to_segments.sh
+++ b/egs/callhome_diarization/v1/diarization/vad_to_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Matthew Maciejewski
# Apache 2.0
diff --git a/egs/callhome_diarization/v1/local/make_callhome.sh b/egs/callhome_diarization/v1/local/make_callhome.sh
index 21411fb6194..c2a014d7025 100755
--- a/egs/callhome_diarization/v1/local/make_callhome.sh
+++ b/egs/callhome_diarization/v1/local/make_callhome.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 David Snyder
# Apache 2.0.
#
diff --git a/egs/callhome_diarization/v1/local/make_sre.sh b/egs/callhome_diarization/v1/local/make_sre.sh
index bef4e06e68e..9dc68069a1f 100755
--- a/egs/callhome_diarization/v1/local/make_sre.sh
+++ b/egs/callhome_diarization/v1/local/make_sre.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 David Snyder
# Apache 2.0.
#
diff --git a/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats.sh b/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats.sh
index 62879623df4..b05dbd552f9 100755
--- a/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats.sh
+++ b/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Apache 2.0.
diff --git a/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
index dcdbe1b1593..326b6dbb9fa 100755
--- a/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
+++ b/egs/callhome_diarization/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Apache 2.0.
diff --git a/egs/callhome_diarization/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/callhome_diarization/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
index 4fdf0cfbad6..b08764259f9 100755
--- a/egs/callhome_diarization/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
+++ b/egs/callhome_diarization/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 David Snyder
# 2018 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2018 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/callhome_diarization/v1/run.sh b/egs/callhome_diarization/v1/run.sh
index f4652c0c0ef..5fa663f8e84 100755
--- a/egs/callhome_diarization/v1/run.sh
+++ b/egs/callhome_diarization/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 David Snyder
# 2017-2018 Matthew Maciejewski
# Apache 2.0.
diff --git a/egs/callhome_diarization/v2/run.sh b/egs/callhome_diarization/v2/run.sh
index 85a2c7fdf2b..331b2c56613 100755
--- a/egs/callhome_diarization/v2/run.sh
+++ b/egs/callhome_diarization/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 David Snyder
# 2017-2018 Matthew Maciejewski
#
diff --git a/egs/callhome_egyptian/s5/local/callhome_create_test_lang.sh b/egs/callhome_egyptian/s5/local/callhome_create_test_lang.sh
index f4a5cf6d1e2..ab3f5ec0ad8 100755
--- a/egs/callhome_egyptian/s5/local/callhome_create_test_lang.sh
+++ b/egs/callhome_egyptian/s5/local/callhome_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/callhome_egyptian/s5/local/callhome_data_prep.sh b/egs/callhome_egyptian/s5/local/callhome_data_prep.sh
index 8afe6049b8b..08b6866e58f 100755
--- a/egs/callhome_egyptian/s5/local/callhome_data_prep.sh
+++ b/egs/callhome_egyptian/s5/local/callhome_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Johns Hopkins University : (Gaurav Kumar)
# The input is the Callhome Egyptian Arabic Dataset which contains *.sph files
diff --git a/egs/callhome_egyptian/s5/local/callhome_train_lms.sh b/egs/callhome_egyptian/s5/local/callhome_train_lms.sh
index ec92b43e2f8..e960e2c420d 100755
--- a/egs/callhome_egyptian/s5/local/callhome_train_lms.sh
+++ b/egs/callhome_egyptian/s5/local/callhome_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one level above this directory
# Generate the text for the LM training
diff --git a/egs/callhome_egyptian/s5/local/nnet3/run_ivector_common.sh b/egs/callhome_egyptian/s5/local/nnet3/run_ivector_common.sh
index f062af8e89d..5a810c72379 100755
--- a/egs/callhome_egyptian/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/callhome_egyptian/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Inherited from the WSJ nnet3 recipe, modified for use with ECA
diff --git a/egs/callhome_egyptian/s5/local/nnet3/run_tdnn.sh b/egs/callhome_egyptian/s5/local/nnet3/run_tdnn.sh
index bd3868da42d..7c8f0d8e6d9 100755
--- a/egs/callhome_egyptian/s5/local/nnet3/run_tdnn.sh
+++ b/egs/callhome_egyptian/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is the standard "tdnn" system, built in nnet3; it's what we use to
# call multi-splice.
diff --git a/egs/callhome_egyptian/s5/local/score.sh b/egs/callhome_egyptian/s5/local/score.sh
index 1e493d44a98..be4322eb2db 100755
--- a/egs/callhome_egyptian/s5/local/score.sh
+++ b/egs/callhome_egyptian/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/callhome_egyptian/s5/run.sh b/egs/callhome_egyptian/s5/run.sh
index ebe550c9814..d0069c4df2c 100755
--- a/egs/callhome_egyptian/s5/run.sh
+++ b/egs/callhome_egyptian/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Johns Hopkins University (Author : Gaurav Kumar, Daniel Povey)
# Recipe for CallHome Egyptian Arabic
diff --git a/egs/casia_hwdb/v1/local/augment_data.sh b/egs/casia_hwdb/v1/local/augment_data.sh
index 1f13ed15ded..d498c54665c 100755
--- a/egs/casia_hwdb/v1/local/augment_data.sh
+++ b/egs/casia_hwdb/v1/local/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Hossein Hadian
# 2018 Ashish Arora
diff --git a/egs/casia_hwdb/v1/local/chain/compare_wer.sh b/egs/casia_hwdb/v1/local/chain/compare_wer.sh
index ab880c1adb5..eeb831e8e6b 100755
--- a/egs/casia_hwdb/v1/local/chain/compare_wer.sh
+++ b/egs/casia_hwdb/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/casia_hwdb/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/casia_hwdb/v1/local/chain/run_cnn_e2eali_1b.sh
index 300c8ae8e31..0ed75baa41b 100755
--- a/egs/casia_hwdb/v1/local/chain/run_cnn_e2eali_1b.sh
+++ b/egs/casia_hwdb/v1/local/chain/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
# lattice alignments and to build a tree
diff --git a/egs/casia_hwdb/v1/local/chain/run_flatstart_cnn1a.sh b/egs/casia_hwdb/v1/local/chain/run_flatstart_cnn1a.sh
index 023fbff1c14..55be6acbadc 100755
--- a/egs/casia_hwdb/v1/local/chain/run_flatstart_cnn1a.sh
+++ b/egs/casia_hwdb/v1/local/chain/run_flatstart_cnn1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# This script does end2end chain training (i.e. from scratch)
diff --git a/egs/casia_hwdb/v1/local/extract_database.sh b/egs/casia_hwdb/v1/local/extract_database.sh
index 1af3713d586..62c8151f9c5 100755
--- a/egs/casia_hwdb/v1/local/extract_database.sh
+++ b/egs/casia_hwdb/v1/local/extract_database.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Chun-Chieh Chang
# The original format of the dataset given is GEDI and page images.
diff --git a/egs/casia_hwdb/v1/local/extract_features.sh b/egs/casia_hwdb/v1/local/extract_features.sh
index f75837ae5b3..c9a36991e94 100755
--- a/egs/casia_hwdb/v1/local/extract_features.sh
+++ b/egs/casia_hwdb/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Yiwen Shao
# 2018 Ashish Arora
diff --git a/egs/casia_hwdb/v1/local/score.sh b/egs/casia_hwdb/v1/local/score.sh
index f2405205f02..6e98902f5bd 100755
--- a/egs/casia_hwdb/v1/local/score.sh
+++ b/egs/casia_hwdb/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
steps/scoring/score_kaldi_wer.sh --max-lmwt 10 "$@"
diff --git a/egs/casia_hwdb/v1/local/train_lm.sh b/egs/casia_hwdb/v1/local/train_lm.sh
index bc738f217da..9e651d63aff 100755
--- a/egs/casia_hwdb/v1/local/train_lm.sh
+++ b/egs/casia_hwdb/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/casia_hwdb/v1/local/train_lm_lr.sh b/egs/casia_hwdb/v1/local/train_lm_lr.sh
index a8b1bfb76a4..70efc7fd8dd 100755
--- a/egs/casia_hwdb/v1/local/train_lm_lr.sh
+++ b/egs/casia_hwdb/v1/local/train_lm_lr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/casia_hwdb/v1/run.sh b/egs/casia_hwdb/v1/run.sh
index 44d1f26117c..987ca5d5078 100755
--- a/egs/casia_hwdb/v1/run.sh
+++ b/egs/casia_hwdb/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
stage=0
diff --git a/egs/chime1/s5/local/chime1_prepare_data.sh b/egs/chime1/s5/local/chime1_prepare_data.sh
index c5963b5d4ab..153890d7554 100755
--- a/egs/chime1/s5/local/chime1_prepare_data.sh
+++ b/egs/chime1/s5/local/chime1_prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
diff --git a/egs/chime1/s5/local/chime1_prepare_dict.sh b/egs/chime1/s5/local/chime1_prepare_dict.sh
index a5dc4cbd50d..4ccff9274c4 100755
--- a/egs/chime1/s5/local/chime1_prepare_dict.sh
+++ b/egs/chime1/s5/local/chime1_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
diff --git a/egs/chime1/s5/local/chime1_prepare_grammar.sh b/egs/chime1/s5/local/chime1_prepare_grammar.sh
index e06f736245f..689704aa24f 100755
--- a/egs/chime1/s5/local/chime1_prepare_grammar.sh
+++ b/egs/chime1/s5/local/chime1_prepare_grammar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
diff --git a/egs/chime1/s5/local/score.sh b/egs/chime1/s5/local/score.sh
index 778a4283461..0c2f14de7ad 100755
--- a/egs/chime1/s5/local/score.sh
+++ b/egs/chime1/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/chime1/s5/run.sh b/egs/chime1/s5/run.sh
index 617c1a99f5c..8087f326bc1 100755
--- a/egs/chime1/s5/run.sh
+++ b/egs/chime1/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
diff --git a/egs/chime2/s5/local/chime_format_data.sh b/egs/chime2/s5/local/chime_format_data.sh
index 5870174aff4..fd9276ae8bf 100755
--- a/egs/chime2/s5/local/chime_format_data.sh
+++ b/egs/chime2/s5/local/chime_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/chime2/s5/local/clean_wsj0_data_prep.sh b/egs/chime2/s5/local/clean_wsj0_data_prep.sh
index 7cc39e4a847..ae5750f3572 100755
--- a/egs/chime2/s5/local/clean_wsj0_data_prep.sh
+++ b/egs/chime2/s5/local/clean_wsj0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime2/s5/local/noisy_wsj0_data_prep.sh b/egs/chime2/s5/local/noisy_wsj0_data_prep.sh
index 8744f25d67e..c98804a3468 100755
--- a/egs/chime2/s5/local/noisy_wsj0_data_prep.sh
+++ b/egs/chime2/s5/local/noisy_wsj0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime2/s5/local/reverb_wsj0_data_prep.sh b/egs/chime2/s5/local/reverb_wsj0_data_prep.sh
index c6903f21c13..47d2e03b867 100755
--- a/egs/chime2/s5/local/reverb_wsj0_data_prep.sh
+++ b/egs/chime2/s5/local/reverb_wsj0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime2/s5/local/score.sh b/egs/chime2/s5/local/score.sh
index 93d8a11613c..6e2af231b07 100755
--- a/egs/chime2/s5/local/score.sh
+++ b/egs/chime2/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/chime2/s5/local/wsj_prepare_dict.sh b/egs/chime2/s5/local/wsj_prepare_dict.sh
index 1fa59e69875..ff71fd0c4a7 100755
--- a/egs/chime2/s5/local/wsj_prepare_dict.sh
+++ b/egs/chime2/s5/local/wsj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime2/s5/run.sh b/egs/chime2/s5/run.sh
index 138ce941ce7..894e961f8aa 100755
--- a/egs/chime2/s5/run.sh
+++ b/egs/chime2/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
diff --git a/egs/chime3/s5/local/bth_chime3_data_prep.sh b/egs/chime3/s5/local/bth_chime3_data_prep.sh
index 6fefc798487..8cc5cea86cf 100755
--- a/egs/chime3/s5/local/bth_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/bth_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/chime3_beamform.sh b/egs/chime3/s5/local/chime3_beamform.sh
index 170a37ccd84..39055196f41 100755
--- a/egs/chime3/s5/local/chime3_beamform.sh
+++ b/egs/chime3/s5/local/chime3_beamform.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
diff --git a/egs/chime3/s5/local/chime3_calc_wers.sh b/egs/chime3/s5/local/chime3_calc_wers.sh
index 4770e5cf38c..58fba170c06 100755
--- a/egs/chime3/s5/local/chime3_calc_wers.sh
+++ b/egs/chime3/s5/local/chime3_calc_wers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
# Apache 2.0.
diff --git a/egs/chime3/s5/local/chime3_calc_wers_smbr.sh b/egs/chime3/s5/local/chime3_calc_wers_smbr.sh
index ba5fc03d1da..66ac84690cc 100755
--- a/egs/chime3/s5/local/chime3_calc_wers_smbr.sh
+++ b/egs/chime3/s5/local/chime3_calc_wers_smbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
# Apache 2.0.
diff --git a/egs/chime3/s5/local/chime3_train_lms.sh b/egs/chime3/s5/local/chime3_train_lms.sh
index 984ef766b2a..9ff1fad7e35 100755
--- a/egs/chime3/s5/local/chime3_train_lms.sh
+++ b/egs/chime3/s5/local/chime3_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Modified from the script for CHiME3 baseline
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori)
diff --git a/egs/chime3/s5/local/chime3_train_rnnlms.sh b/egs/chime3/s5/local/chime3_train_rnnlms.sh
index 429ca828aa3..37a75d38b38 100755
--- a/egs/chime3/s5/local/chime3_train_rnnlms.sh
+++ b/egs/chime3/s5/local/chime3_train_rnnlms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori)
diff --git a/egs/chime3/s5/local/clean_chime3_format_data.sh b/egs/chime3/s5/local/clean_chime3_format_data.sh
index f2d81bc5324..51201c7ec65 100755
--- a/egs/chime3/s5/local/clean_chime3_format_data.sh
+++ b/egs/chime3/s5/local/clean_chime3_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/chime3/s5/local/clean_wsj0_data_prep.sh b/egs/chime3/s5/local/clean_wsj0_data_prep.sh
index fe96881cf8d..671379ae732 100755
--- a/egs/chime3/s5/local/clean_wsj0_data_prep.sh
+++ b/egs/chime3/s5/local/clean_wsj0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/real_close_chime3_data_prep.sh b/egs/chime3/s5/local/real_close_chime3_data_prep.sh
index 4ef1fc4dffc..a420d094805 100755
--- a/egs/chime3/s5/local/real_close_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/real_close_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/real_enhan_chime3_data_prep.sh b/egs/chime3/s5/local/real_enhan_chime3_data_prep.sh
index 4230a1adbed..5c53f4c4f95 100755
--- a/egs/chime3/s5/local/real_enhan_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/real_enhan_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/real_noisy_chime3_data_prep.sh b/egs/chime3/s5/local/real_noisy_chime3_data_prep.sh
index 94a2d0226db..96428e143a9 100755
--- a/egs/chime3/s5/local/real_noisy_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/real_noisy_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/run_dnn.sh b/egs/chime3/s5/local/run_dnn.sh
index 78dc4283ee3..6962896e7b4 100755
--- a/egs/chime3/s5/local/run_dnn.sh
+++ b/egs/chime3/s5/local/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
diff --git a/egs/chime3/s5/local/run_gmm.sh b/egs/chime3/s5/local/run_gmm.sh
index 5b9fbaa1736..f2afbc5be65 100755
--- a/egs/chime3/s5/local/run_gmm.sh
+++ b/egs/chime3/s5/local/run_gmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
diff --git a/egs/chime3/s5/local/run_init.sh b/egs/chime3/s5/local/run_init.sh
index 9db289a12a5..2350599411f 100755
--- a/egs/chime3/s5/local/run_init.sh
+++ b/egs/chime3/s5/local/run_init.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
diff --git a/egs/chime3/s5/local/run_lmrescore.sh b/egs/chime3/s5/local/run_lmrescore.sh
index 0c364367c98..20d22890f36 100755
--- a/egs/chime3/s5/local/run_lmrescore.sh
+++ b/egs/chime3/s5/local/run_lmrescore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
diff --git a/egs/chime3/s5/local/score.sh b/egs/chime3/s5/local/score.sh
index 93d8a11613c..6e2af231b07 100755
--- a/egs/chime3/s5/local/score.sh
+++ b/egs/chime3/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/chime3/s5/local/simu_enhan_chime3_data_prep.sh b/egs/chime3/s5/local/simu_enhan_chime3_data_prep.sh
index 8f47bcff095..827dfa5cba0 100755
--- a/egs/chime3/s5/local/simu_enhan_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/simu_enhan_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/simu_noisy_chime3_data_prep.sh b/egs/chime3/s5/local/simu_noisy_chime3_data_prep.sh
index 68c3ba2f0c7..16fe2ee6271 100755
--- a/egs/chime3/s5/local/simu_noisy_chime3_data_prep.sh
+++ b/egs/chime3/s5/local/simu_noisy_chime3_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/local/wsj_prepare_dict.sh b/egs/chime3/s5/local/wsj_prepare_dict.sh
index 6ddebd60293..7c5b4f98506 100755
--- a/egs/chime3/s5/local/wsj_prepare_dict.sh
+++ b/egs/chime3/s5/local/wsj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2012 Microsoft Corporation
# 2012-2014 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime3/s5/run.sh b/egs/chime3/s5/run.sh
index 3ef5e630c13..d10e83c0cac 100755
--- a/egs/chime3/s5/run.sh
+++ b/egs/chime3/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Kaldi ASR baseline for the 3rd CHiME Challenge
#
diff --git a/egs/chime4/s5_1ch/local/chain/compare_wer.sh b/egs/chime4/s5_1ch/local/chain/compare_wer.sh
index edfefad547f..f59cac3bf5b 100755
--- a/egs/chime4/s5_1ch/local/chain/compare_wer.sh
+++ b/egs/chime4/s5_1ch/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh
index 3f8b7c60090..593ffe290ae 100755
--- a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This was modified from wsj/local/chain/tunning/run_tdnn_1e.sh to be
# used in Chime4.
diff --git a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh
index 8b4e93cd05b..8a7cb1813a2 100755
--- a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is a TDNN+LSTM chain system.
diff --git a/egs/chime4/s5_1ch/local/chime4_calc_wers.sh b/egs/chime4/s5_1ch/local/chime4_calc_wers.sh
index a4c115c1093..a0eacac54bf 100755
--- a/egs/chime4/s5_1ch/local/chime4_calc_wers.sh
+++ b/egs/chime4/s5_1ch/local/chime4_calc_wers.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
# Apache 2.0.
diff --git a/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh b/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh
index 84bb2cb8dbd..a78e7d4583b 100755
--- a/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh
+++ b/egs/chime4/s5_1ch/local/chime4_calc_wers_looped.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
# Apache 2.0.
diff --git a/egs/chime4/s5_1ch/local/chime4_calc_wers_smbr.sh b/egs/chime4/s5_1ch/local/chime4_calc_wers_smbr.sh
index b316208b168..a56dc66dfd1 100755
--- a/egs/chime4/s5_1ch/local/chime4_calc_wers_smbr.sh
+++ b/egs/chime4/s5_1ch/local/chime4_calc_wers_smbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
# Apache 2.0.
diff --git a/egs/chime4/s5_1ch/local/chime4_train_lms.sh b/egs/chime4/s5_1ch/local/chime4_train_lms.sh
index 06dd716e789..f3002741bde 100755
--- a/egs/chime4/s5_1ch/local/chime4_train_lms.sh
+++ b/egs/chime4/s5_1ch/local/chime4_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Modified from the script for CHiME3 baseline
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori)
diff --git a/egs/chime4/s5_1ch/local/chime4_train_rnnlms.sh b/egs/chime4/s5_1ch/local/chime4_train_rnnlms.sh
index 8324c8e06b1..fa539584067 100755
--- a/egs/chime4/s5_1ch/local/chime4_train_rnnlms.sh
+++ b/egs/chime4/s5_1ch/local/chime4_train_rnnlms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori)
diff --git a/egs/chime4/s5_1ch/local/clean_chime4_format_data.sh b/egs/chime4/s5_1ch/local/clean_chime4_format_data.sh
index 23dc8a70d9e..42f3ee70209 100755
--- a/egs/chime4/s5_1ch/local/clean_chime4_format_data.sh
+++ b/egs/chime4/s5_1ch/local/clean_chime4_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# 2015 Guoguo Chen
diff --git a/egs/chime4/s5_1ch/local/clean_wsj0_data_prep.sh b/egs/chime4/s5_1ch/local/clean_wsj0_data_prep.sh
index 8c6989bc0b2..9f831546e5c 100755
--- a/egs/chime4/s5_1ch/local/clean_wsj0_data_prep.sh
+++ b/egs/chime4/s5_1ch/local/clean_wsj0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/local/compute_pesq.sh b/egs/chime4/s5_1ch/local/compute_pesq.sh
index 1d290a4893f..7121a02dca8 100755
--- a/egs/chime4/s5_1ch/local/compute_pesq.sh
+++ b/egs/chime4/s5_1ch/local/compute_pesq.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/chime4/s5_1ch/local/compute_stoi_estoi_sdr.sh b/egs/chime4/s5_1ch/local/compute_stoi_estoi_sdr.sh
index b7627560b67..bb2accc3a81 100755
--- a/egs/chime4/s5_1ch/local/compute_stoi_estoi_sdr.sh
+++ b/egs/chime4/s5_1ch/local/compute_stoi_estoi_sdr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/chime4/s5_1ch/local/download_se_eval_tool.sh b/egs/chime4/s5_1ch/local/download_se_eval_tool.sh
index ddd86a03d8a..a926ba6fda9 100755
--- a/egs/chime4/s5_1ch/local/download_se_eval_tool.sh
+++ b/egs/chime4/s5_1ch/local/download_se_eval_tool.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/chime4/s5_1ch/local/nnet3/compare_wer.sh b/egs/chime4/s5_1ch/local/nnet3/compare_wer.sh
index 7a2fbd8a123..605c870a264 100755
--- a/egs/chime4/s5_1ch/local/nnet3/compare_wer.sh
+++ b/egs/chime4/s5_1ch/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/nnet3/compare_wer.sh exp/nnet3/tdnn_{c,d}_sp
diff --git a/egs/chime4/s5_1ch/local/nnet3/run_ivector_common.sh b/egs/chime4/s5_1ch/local/nnet3/run_ivector_common.sh
index 1009958dc0f..e3584f5f06e 100755
--- a/egs/chime4/s5_1ch/local/nnet3/run_ivector_common.sh
+++ b/egs/chime4/s5_1ch/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/chime4/s5_1ch/local/real_enhan_chime4_data_prep.sh b/egs/chime4/s5_1ch/local/real_enhan_chime4_data_prep.sh
index 7d4f9c892a8..d3892ac2197 100755
--- a/egs/chime4/s5_1ch/local/real_enhan_chime4_data_prep.sh
+++ b/egs/chime4/s5_1ch/local/real_enhan_chime4_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/local/real_noisy_chime4_data_prep.sh b/egs/chime4/s5_1ch/local/real_noisy_chime4_data_prep.sh
index 0173b022176..f319d30c314 100755
--- a/egs/chime4/s5_1ch/local/real_noisy_chime4_data_prep.sh
+++ b/egs/chime4/s5_1ch/local/real_noisy_chime4_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/local/rnnlm/run_lstm_back.sh b/egs/chime4/s5_1ch/local/rnnlm/run_lstm_back.sh
index 76e2b563e6b..be46d1934d4 100755
--- a/egs/chime4/s5_1ch/local/rnnlm/run_lstm_back.sh
+++ b/egs/chime4/s5_1ch/local/rnnlm/run_lstm_back.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey)
# 2015 Guoguo Chen
diff --git a/egs/chime4/s5_1ch/local/rnnlm/tuning/run_lstm_1a.sh b/egs/chime4/s5_1ch/local/rnnlm/tuning/run_lstm_1a.sh
index 8825364e6fa..dfec3e4915e 100755
--- a/egs/chime4/s5_1ch/local/rnnlm/tuning/run_lstm_1a.sh
+++ b/egs/chime4/s5_1ch/local/rnnlm/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey)
# 2015 Guoguo Chen
@@ -66,6 +66,7 @@ if [ $stage -le 0 ]; then
mkdir -p $text_dir
cp $srcdir/train.rnn $text_dir/chime4.txt.tmp
sed -e "s///g" $text_dir/chime4.txt.tmp > $text_dir/chime4.txt
+ rm $text_dir/chime4.txt.tmp
cp $srcdir/valid.rnn $text_dir/dev.txt
fi
diff --git a/egs/chime4/s5_1ch/local/run_beamform_2ch_track.sh b/egs/chime4/s5_1ch/local/run_beamform_2ch_track.sh
index c9ce5a72040..6282b1c8dd5 100755
--- a/egs/chime4/s5_1ch/local/run_beamform_2ch_track.sh
+++ b/egs/chime4/s5_1ch/local/run_beamform_2ch_track.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
diff --git a/egs/chime4/s5_1ch/local/run_beamform_6ch_track.sh b/egs/chime4/s5_1ch/local/run_beamform_6ch_track.sh
index 6bdbc61adc2..2145cb3480b 100755
--- a/egs/chime4/s5_1ch/local/run_beamform_6ch_track.sh
+++ b/egs/chime4/s5_1ch/local/run_beamform_6ch_track.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
diff --git a/egs/chime4/s5_1ch/local/run_blstm_gev.sh b/egs/chime4/s5_1ch/local/run_blstm_gev.sh
index 2ee92b70fbd..d581a9d1e09 100755
--- a/egs/chime4/s5_1ch/local/run_blstm_gev.sh
+++ b/egs/chime4/s5_1ch/local/run_blstm_gev.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/chime4/s5_1ch/local/run_gmm.sh b/egs/chime4/s5_1ch/local/run_gmm.sh
index 5178433dfc2..d148761ce40 100755
--- a/egs/chime4/s5_1ch/local/run_gmm.sh
+++ b/egs/chime4/s5_1ch/local/run_gmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
diff --git a/egs/chime4/s5_1ch/local/run_init.sh b/egs/chime4/s5_1ch/local/run_init.sh
index f8c4782cc48..36ae519dfe4 100755
--- a/egs/chime4/s5_1ch/local/run_init.sh
+++ b/egs/chime4/s5_1ch/local/run_init.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
diff --git a/egs/chime4/s5_1ch/local/run_lmrescore.sh b/egs/chime4/s5_1ch/local/run_lmrescore.sh
index 58a19c6da25..0c5bef0b757 100755
--- a/egs/chime4/s5_1ch/local/run_lmrescore.sh
+++ b/egs/chime4/s5_1ch/local/run_lmrescore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
diff --git a/egs/chime4/s5_1ch/local/run_lmrescore_tdnn.sh b/egs/chime4/s5_1ch/local/run_lmrescore_tdnn.sh
index 58af793615e..5d2555fb0dd 100755
--- a/egs/chime4/s5_1ch/local/run_lmrescore_tdnn.sh
+++ b/egs/chime4/s5_1ch/local/run_lmrescore_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
diff --git a/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh b/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh
index 0bea4dd7102..1b7c28654d5 100755
--- a/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh
+++ b/egs/chime4/s5_1ch/local/run_lmrescore_tdnn_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
diff --git a/egs/chime4/s5_1ch/local/run_nn-gev.sh b/egs/chime4/s5_1ch/local/run_nn-gev.sh
index a17dd3d3f15..2f9222fefc2 100755
--- a/egs/chime4/s5_1ch/local/run_nn-gev.sh
+++ b/egs/chime4/s5_1ch/local/run_nn-gev.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/chime4/s5_1ch/local/score.sh b/egs/chime4/s5_1ch/local/score.sh
index 93d8a11613c..6e2af231b07 100755
--- a/egs/chime4/s5_1ch/local/score.sh
+++ b/egs/chime4/s5_1ch/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/chime4/s5_1ch/local/simu_enhan_chime4_data_prep.sh b/egs/chime4/s5_1ch/local/simu_enhan_chime4_data_prep.sh
index d6419fa90b9..10873130853 100755
--- a/egs/chime4/s5_1ch/local/simu_enhan_chime4_data_prep.sh
+++ b/egs/chime4/s5_1ch/local/simu_enhan_chime4_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/local/simu_noisy_chime4_data_prep.sh b/egs/chime4/s5_1ch/local/simu_noisy_chime4_data_prep.sh
index 124cde82b8a..0fed4cde63b 100755
--- a/egs/chime4/s5_1ch/local/simu_noisy_chime4_data_prep.sh
+++ b/egs/chime4/s5_1ch/local/simu_noisy_chime4_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/local/write_se_results.sh b/egs/chime4/s5_1ch/local/write_se_results.sh
index 7ada63f8ccc..8a844467d5a 100755
--- a/egs/chime4/s5_1ch/local/write_se_results.sh
+++ b/egs/chime4/s5_1ch/local/write_se_results.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/chime4/s5_1ch/local/wsj_prepare_dict.sh b/egs/chime4/s5_1ch/local/wsj_prepare_dict.sh
index 6ddebd60293..7c5b4f98506 100755
--- a/egs/chime4/s5_1ch/local/wsj_prepare_dict.sh
+++ b/egs/chime4/s5_1ch/local/wsj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2012 Microsoft Corporation
# 2012-2014 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime4/s5_1ch/run.sh b/egs/chime4/s5_1ch/run.sh
index 5b980dec827..4265ed92a9e 100755
--- a/egs/chime4/s5_1ch/run.sh
+++ b/egs/chime4/s5_1ch/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Kaldi ASR baseline for the CHiME-4 Challenge (1ch track: single channel track)
#
diff --git a/egs/chime4/s5_2ch/run.sh b/egs/chime4/s5_2ch/run.sh
index 7ae5048c6fa..702a5b2fc58 100755
--- a/egs/chime4/s5_2ch/run.sh
+++ b/egs/chime4/s5_2ch/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Kaldi ASR baseline for the CHiME-4 Challenge (2ch track: 2 channel track)
#
diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
index f0f469e46c8..5e8df0a64ae 100755
--- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1b.sh
index 920f2543132..d0ee46e8288 100755
--- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This factorized TDNN (TDNN-F) script is ported from s5b recipe
# It uses resnet-style skip connections.
diff --git a/egs/chime5/s5/local/nnet3/compare_wer.sh b/egs/chime5/s5/local/nnet3/compare_wer.sh
index 095e85cc338..4888de1f159 100755
--- a/egs/chime5/s5/local/nnet3/compare_wer.sh
+++ b/egs/chime5/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/chime5/s5/local/nnet3/run_ivector_common.sh b/egs/chime5/s5/local/nnet3/run_ivector_common.sh
index 2b672063be7..2da57372a45 100755
--- a/egs/chime5/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/chime5/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/chime5/s5/local/prepare_data.sh b/egs/chime5/s5/local/prepare_data.sh
index 98087322c38..ac07d02270f 100755
--- a/egs/chime5/s5/local/prepare_data.sh
+++ b/egs/chime5/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2017 Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
# Apache 2.0
diff --git a/egs/chime5/s5/local/prepare_dict.sh b/egs/chime5/s5/local/prepare_dict.sh
index 09083d0e795..1ea75af8a11 100755
--- a/egs/chime5/s5/local/prepare_dict.sh
+++ b/egs/chime5/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2018, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/chime5/s5/local/run_beamformit.sh b/egs/chime5/s5/local/run_beamformit.sh
index aa3badd90d8..4ac45eb98db 100755
--- a/egs/chime5/s5/local/run_beamformit.sh
+++ b/egs/chime5/s5/local/run_beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
diff --git a/egs/chime5/s5/local/run_recog.sh b/egs/chime5/s5/local/run_recog.sh
index 5c74c9ff242..4e13ae8c0fb 100755
--- a/egs/chime5/s5/local/run_recog.sh
+++ b/egs/chime5/s5/local/run_recog.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Based mostly on the TED-LIUM and Switchboard recipe
#
diff --git a/egs/chime5/s5/local/run_wpe.sh b/egs/chime5/s5/local/run_wpe.sh
index 8ecbbd6182a..0e6ba2676ba 100755
--- a/egs/chime5/s5/local/run_wpe.sh
+++ b/egs/chime5/s5/local/run_wpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/chime5/s5/local/score_for_submit.sh b/egs/chime5/s5/local/score_for_submit.sh
index 23121d68b93..c08fc022840 100755
--- a/egs/chime5/s5/local/score_for_submit.sh
+++ b/egs/chime5/s5/local/score_for_submit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
# Apache 2.0
#
diff --git a/egs/chime5/s5/local/train_lms_srilm.sh b/egs/chime5/s5/local/train_lms_srilm.sh
index 5a1d56d24b3..3b19e58bb48 100755
--- a/egs/chime5/s5/local/train_lms_srilm.sh
+++ b/egs/chime5/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017 Johns Hopkins University (Author: Yenda Trmal, Shinji Watanabe)
# Apache 2.0
diff --git a/egs/chime5/s5/run.sh b/egs/chime5/s5/run.sh
index 024c0190b3e..08779bd1aa1 100755
--- a/egs/chime5/s5/run.sh
+++ b/egs/chime5/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Based mostly on the TED-LIUM and Switchboard recipe
#
diff --git a/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh b/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
index 95e9d934bd3..f81bbd59258 100755
--- a/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
+++ b/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
diff --git a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh
index daad37e2cd7..ca1de635168 100755
--- a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
diff --git a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh
index e033715d884..d9c4b20e513 100755
--- a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This factorized TDNN (TDNN-F) script is adapted from SWBD recipe 7q.
# It uses resnet-style skip connections.
diff --git a/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
index e3d8e6ac4dc..2de79fdf593 100755
--- a/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
diff --git a/egs/chime5/s5b/local/copy_lat_dir_parallel.sh b/egs/chime5/s5b/local/copy_lat_dir_parallel.sh
index 82839604c9e..3e2c1b445b8 100755
--- a/egs/chime5/s5b/local/copy_lat_dir_parallel.sh
+++ b/egs/chime5/s5b/local/copy_lat_dir_parallel.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
cmd=queue.pl
nj=40
diff --git a/egs/chime5/s5b/local/extract_vad_weights.sh b/egs/chime5/s5b/local/extract_vad_weights.sh
index 250b021bd8f..d5019f100b1 100755
--- a/egs/chime5/s5b/local/extract_vad_weights.sh
+++ b/egs/chime5/s5b/local/extract_vad_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti)
# 2019 Vimal Manohar
diff --git a/egs/chime5/s5b/local/nnet3/compare_wer.sh b/egs/chime5/s5b/local/nnet3/compare_wer.sh
index fa627acd27b..6e4965dd819 100644
--- a/egs/chime5/s5b/local/nnet3/compare_wer.sh
+++ b/egs/chime5/s5b/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/chime5/s5b/local/nnet3/decode.sh b/egs/chime5/s5b/local/nnet3/decode.sh
index 8fa54e0d4a6..9c108430b94 100755
--- a/egs/chime5/s5b/local/nnet3/decode.sh
+++ b/egs/chime5/s5b/local/nnet3/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti)
# 2019 Vimal Manohar
diff --git a/egs/chime5/s5b/local/nnet3/run_ivector_common.sh b/egs/chime5/s5b/local/nnet3/run_ivector_common.sh
index 3910e1812a3..ef7ce7c3534 100755
--- a/egs/chime5/s5b/local/nnet3/run_ivector_common.sh
+++ b/egs/chime5/s5b/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/chime5/s5b/local/prepare_data.sh b/egs/chime5/s5b/local/prepare_data.sh
index 98087322c38..ac07d02270f 100755
--- a/egs/chime5/s5b/local/prepare_data.sh
+++ b/egs/chime5/s5b/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2017 Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
# Apache 2.0
diff --git a/egs/chime5/s5b/local/prepare_dict.sh b/egs/chime5/s5b/local/prepare_dict.sh
index 09083d0e795..1ea75af8a11 100755
--- a/egs/chime5/s5b/local/prepare_dict.sh
+++ b/egs/chime5/s5b/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2018, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/chime5/s5b/local/reverberate_lat_dir.sh b/egs/chime5/s5b/local/reverberate_lat_dir.sh
index f601a37c0e1..4a56d910489 100755
--- a/egs/chime5/s5b/local/reverberate_lat_dir.sh
+++ b/egs/chime5/s5b/local/reverberate_lat_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Vimal Manohar
# Apache 2.0
diff --git a/egs/chime5/s5b/local/run_beamformit.sh b/egs/chime5/s5b/local/run_beamformit.sh
index aa3badd90d8..4ac45eb98db 100755
--- a/egs/chime5/s5b/local/run_beamformit.sh
+++ b/egs/chime5/s5b/local/run_beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
diff --git a/egs/chime5/s5b/local/run_recog.sh b/egs/chime5/s5b/local/run_recog.sh
index 989a5f95d01..4da9b1bf2fb 100755
--- a/egs/chime5/s5b/local/run_recog.sh
+++ b/egs/chime5/s5b/local/run_recog.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Based mostly on the TED-LIUM and Switchboard recipe
#
diff --git a/egs/chime5/s5b/local/run_wpe.sh b/egs/chime5/s5b/local/run_wpe.sh
index ed512e69aae..4c6ff0c7e71 100755
--- a/egs/chime5/s5b/local/run_wpe.sh
+++ b/egs/chime5/s5b/local/run_wpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/chime5/s5b/local/score_for_submit.sh b/egs/chime5/s5b/local/score_for_submit.sh
index 23121d68b93..c08fc022840 100755
--- a/egs/chime5/s5b/local/score_for_submit.sh
+++ b/egs/chime5/s5b/local/score_for_submit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
# Apache 2.0
#
diff --git a/egs/chime5/s5b/local/train_lms_srilm.sh b/egs/chime5/s5b/local/train_lms_srilm.sh
index 5a1d56d24b3..3b19e58bb48 100755
--- a/egs/chime5/s5b/local/train_lms_srilm.sh
+++ b/egs/chime5/s5b/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017 Johns Hopkins University (Author: Yenda Trmal, Shinji Watanabe)
# Apache 2.0
diff --git a/egs/chime5/s5b/run.sh b/egs/chime5/s5b/run.sh
index 37bc5c2c94e..0358fab5269 100755
--- a/egs/chime5/s5b/run.sh
+++ b/egs/chime5/s5b/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Based mostly on the TED-LIUM and Switchboard recipe
#
diff --git a/egs/chime6/s5_track1/local/add_location_to_uttid.sh b/egs/chime6/s5_track1/local/add_location_to_uttid.sh
index 91bd0c0dd37..edb88c3f295 100755
--- a/egs/chime6/s5_track1/local/add_location_to_uttid.sh
+++ b/egs/chime6/s5_track1/local/add_location_to_uttid.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Author: Ashish Arora
# Apache 2.0
diff --git a/egs/chime6/s5_track1/local/chain/compare_wer.sh b/egs/chime6/s5_track1/local/chain/compare_wer.sh
index cd6be14ed88..736a3177f17 100755
--- a/egs/chime6/s5_track1/local/chain/compare_wer.sh
+++ b/egs/chime6/s5_track1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh
index daad37e2cd7..ca1de635168 100755
--- a/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
diff --git a/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh
index a9c797ffa33..031a3687262 100755
--- a/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/chime6/s5_track1/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This factorized TDNN (TDNN-F) script is adapted from SWBD recipe 7q.
# It uses resnet-style skip connections.
diff --git a/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh b/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh
index 82839604c9e..3e2c1b445b8 100755
--- a/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh
+++ b/egs/chime6/s5_track1/local/copy_lat_dir_parallel.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
cmd=queue.pl
nj=40
diff --git a/egs/chime6/s5_track1/local/decode.sh b/egs/chime6/s5_track1/local/decode.sh
index 7283a171000..cabf473535f 100755
--- a/egs/chime6/s5_track1/local/decode.sh
+++ b/egs/chime6/s5_track1/local/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Based mostly on the TED-LIUM and Switchboard recipe
#
diff --git a/egs/chime6/s5_track1/local/extract_vad_weights.sh b/egs/chime6/s5_track1/local/extract_vad_weights.sh
index 250b021bd8f..d5019f100b1 100755
--- a/egs/chime6/s5_track1/local/extract_vad_weights.sh
+++ b/egs/chime6/s5_track1/local/extract_vad_weights.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti)
# 2019 Vimal Manohar
diff --git a/egs/chime6/s5_track1/local/generate_chime6_data.sh b/egs/chime6/s5_track1/local/generate_chime6_data.sh
index 93106cf605a..9ecdbe4208f 100755
--- a/egs/chime6/s5_track1/local/generate_chime6_data.sh
+++ b/egs/chime6/s5_track1/local/generate_chime6_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019, Johns Hopkins University (Author: Shinji Watanabe)
# Apache 2.0
diff --git a/egs/chime6/s5_track1/local/install_pb_chime5.sh b/egs/chime6/s5_track1/local/install_pb_chime5.sh
index a151dc60f12..7863cbed437 100755
--- a/egs/chime6/s5_track1/local/install_pb_chime5.sh
+++ b/egs/chime6/s5_track1/local/install_pb_chime5.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Installs pb_chime5
# miniconda should be installed in $HOME/miniconda3/
diff --git a/egs/chime6/s5_track1/local/nnet3/compare_wer.sh b/egs/chime6/s5_track1/local/nnet3/compare_wer.sh
index 095e85cc338..4888de1f159 100755
--- a/egs/chime6/s5_track1/local/nnet3/compare_wer.sh
+++ b/egs/chime6/s5_track1/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/chime6/s5_track1/local/nnet3/decode.sh b/egs/chime6/s5_track1/local/nnet3/decode.sh
index 8fa54e0d4a6..9c108430b94 100755
--- a/egs/chime6/s5_track1/local/nnet3/decode.sh
+++ b/egs/chime6/s5_track1/local/nnet3/decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti)
# 2019 Vimal Manohar
diff --git a/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh b/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh
index cfa18cb7617..0afb983d2fc 100755
--- a/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh
+++ b/egs/chime6/s5_track1/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/chime6/s5_track1/local/prepare_data.sh b/egs/chime6/s5_track1/local/prepare_data.sh
index 3d1ffe859a5..b7cc56c4be4 100755
--- a/egs/chime6/s5_track1/local/prepare_data.sh
+++ b/egs/chime6/s5_track1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2017 Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
# Apache 2.0
diff --git a/egs/chime6/s5_track1/local/prepare_dict.sh b/egs/chime6/s5_track1/local/prepare_dict.sh
index 09083d0e795..1ea75af8a11 100755
--- a/egs/chime6/s5_track1/local/prepare_dict.sh
+++ b/egs/chime6/s5_track1/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2018, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/chime6/s5_track1/local/reverberate_lat_dir.sh b/egs/chime6/s5_track1/local/reverberate_lat_dir.sh
index f601a37c0e1..4a56d910489 100755
--- a/egs/chime6/s5_track1/local/reverberate_lat_dir.sh
+++ b/egs/chime6/s5_track1/local/reverberate_lat_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Vimal Manohar
# Apache 2.0
diff --git a/egs/chime6/s5_track1/local/run_beamformit.sh b/egs/chime6/s5_track1/local/run_beamformit.sh
index aa3badd90d8..4ac45eb98db 100755
--- a/egs/chime6/s5_track1/local/run_beamformit.sh
+++ b/egs/chime6/s5_track1/local/run_beamformit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
diff --git a/egs/chime6/s5_track1/local/run_gss.sh b/egs/chime6/s5_track1/local/run_gss.sh
index fbdc4af25d1..a9c01b20564 100755
--- a/egs/chime6/s5_track1/local/run_gss.sh
+++ b/egs/chime6/s5_track1/local/run_gss.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
diff --git a/egs/chime6/s5_track1/local/run_wpe.sh b/egs/chime6/s5_track1/local/run_wpe.sh
index ed512e69aae..4c6ff0c7e71 100755
--- a/egs/chime6/s5_track1/local/run_wpe.sh
+++ b/egs/chime6/s5_track1/local/run_wpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/chime6/s5_track1/local/score_for_submit.sh b/egs/chime6/s5_track1/local/score_for_submit.sh
index ba7d6cde574..1d7564c6ee0 100755
--- a/egs/chime6/s5_track1/local/score_for_submit.sh
+++ b/egs/chime6/s5_track1/local/score_for_submit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
# Copyright 2019 Johns Hopkins University (Author: Shinji Watanabe)
# Apache 2.0
diff --git a/egs/chime6/s5_track1/local/train_lms_srilm.sh b/egs/chime6/s5_track1/local/train_lms_srilm.sh
index 5a1d56d24b3..3b19e58bb48 100755
--- a/egs/chime6/s5_track1/local/train_lms_srilm.sh
+++ b/egs/chime6/s5_track1/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017 Johns Hopkins University (Author: Yenda Trmal, Shinji Watanabe)
# Apache 2.0
diff --git a/egs/chime6/s5_track1/run.sh b/egs/chime6/s5_track1/run.sh
index 0890a939faf..cbcb3cd2102 100755
--- a/egs/chime6/s5_track1/run.sh
+++ b/egs/chime6/s5_track1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Based mostly on the TED-LIUM and Switchboard recipe
#
diff --git a/egs/chime6/s5_track2/RESULTS b/egs/chime6/s5_track2/RESULTS
index cf87e7cc109..131b43cecf8 100644
--- a/egs/chime6/s5_track2/RESULTS
+++ b/egs/chime6/s5_track2/RESULTS
@@ -1,18 +1,23 @@
# Results for Chime-6 track 2 for dev and eval, using pretrained models
# available at http://kaldi-asr.org/models/m12.
-# Speech Activity Detection (SAD)
- Missed speech False alarm Total error
-Dev 4.3 2.1 6.4
-Eval 5.6 5.9 11.5
+# These results are reported only for array U06, which is the default
+# array selection method in the baseline system.
-# The results for the remaining pipeline are only for array U06.
+# Speech Activity Detection (SAD)
+ Missed speech False alarm Total error
+Dev (old RTTM) 2.5 0.8 3.3
+Dev (new RTTM) 1.9 0.7 2.6
+Eval (old RTTM) 4.1 1.8 5.9
+Eval (new RTTM) 4.3 1.5 5.8
# Diarization
- DER JER
-Dev 57.15 83.96
-Eval 54.12 80.33
+ DER JER
+Dev (old RTTM) 61.56 69.75
+Dev (new RTTM) 63.42 70.83
+Eval (old RTTM) 61.96 71.40
+Eval (new RTTM) 68.20 72.54
# ASR nnet3 tdnn+chain
-Dev: U06 %WER 81.18 [ 58881 / 47798, 1638 ins, 30528 del, 15632 sub ]
-Eval: U06 %WER 85.39 [ 55132 / 47076, 1107 ins, 27768 del, 18201 sub ]
+Dev: %WER 84.25 [ 49610 / 58881, 1937 ins, 34685 del, 12988 sub ]
+Eval: %WER 77.94 [ 42971 / 55132, 1086 ins, 30839 del, 11046 sub ]
diff --git a/egs/chime6/s5_track2/local/decode.sh b/egs/chime6/s5_track2/local/decode.sh
index 876cc0be126..8f094f5c4df 100755
--- a/egs/chime6/s5_track2/local/decode.sh
+++ b/egs/chime6/s5_track2/local/decode.sh
@@ -1,22 +1,29 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# This script decodes raw utterances through the entire pipeline:
# Feature extraction -> SAD -> Diarization -> ASR
#
# Copyright 2017 Johns Hopkins University (Author: Shinji Watanabe and Yenda Trmal)
-# 2019 Desh Raj, David Snyder, Ashish Arora
+# 2019 Desh Raj, David Snyder, Ashish Arora, Zhaoheng Ni
# Apache 2.0
# Begin configuration section.
nj=8
-decode_nj=10
stage=0
sad_stage=0
+score_sad=true
diarizer_stage=0
decode_diarize_stage=0
score_stage=0
+
enhancement=beamformit
+# option to use the new RTTM reference for sad and diarization
+use_new_rttm_reference=false
+if $use_new_rttm_reference == "true"; then
+ git clone https://github.com/nateanl/chime6_rttm
+fi
+
# chime5 main directory path
# please change the path accordingly
chime5_corpus=/export/corpora4/CHiME5
@@ -93,6 +100,7 @@ if [ $stage -le 1 ]; then
"$PWD/${enhandir}/${dset}_${enhancement}_u0*" \
${json_dir}/${dset} data/${dset}_${enhancement}_dereverb
done
+
fi
if [ $stage -le 2 ]; then
@@ -100,7 +108,7 @@ if [ $stage -le 2 ]; then
# want to store MFCC features.
mfccdir=mfcc
for x in ${test_sets}; do
- steps/make_mfcc.sh --nj $decode_nj --cmd "$train_cmd" \
+ steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" \
--mfcc-config conf/mfcc_hires.conf \
data/$x exp/make_mfcc/$x $mfccdir
done
@@ -121,18 +129,44 @@ if [ $stage -le 3 ]; then
exit 0
fi
# Perform segmentation
- local/segmentation/detect_speech_activity.sh --nj $decode_nj --stage $sad_stage \
+ local/segmentation/detect_speech_activity.sh --nj $nj --stage $sad_stage \
$test_set $sad_nnet_dir mfcc $sad_work_dir \
data/${datadir} || exit 1
- mv data/${datadir}_seg data/${datadir}_${nnet_type}_seg
- mv data/${datadir}/{segments.bak,utt2spk.bak} data/${datadir}_${nnet_type}_seg
+ test_dir=data/${datadir}_${nnet_type}_seg
+ mv data/${datadir}_seg ${test_dir}/
+ cp data/${datadir}/{segments.bak,utt2spk.bak} ${test_dir}/
# Generate RTTM file from segmentation performed by SAD. This can
# be used to evaluate the performance of the SAD as an intermediate
# step.
steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
- data/${datadir}_${nnet_type}_seg/utt2spk data/${datadir}_${nnet_type}_seg/segments \
- data/${datadir}_${nnet_type}_seg/rttm
+ ${test_dir}/utt2spk ${test_dir}/segments ${test_dir}/rttm
+
+ if [ $score_sad == "true" ]; then
+ echo "Scoring $datadir.."
+ # We first generate the reference RTTM from the backed up utt2spk and segments
+ # files.
+ ref_rttm=${test_dir}/ref_rttm
+ steps/segmentation/convert_utt2spk_and_segments_to_rttm.py ${test_dir}/utt2spk.bak \
+ ${test_dir}/segments.bak ${test_dir}/ref_rttm
+
+ # To score, we select just U06 segments from the hypothesis RTTM.
+ hyp_rttm=${test_dir}/rttm.U06
+ grep 'U06' ${test_dir}/rttm > ${test_dir}/rttm.U06
+ echo "Array U06 selected for scoring.."
+
+ if $use_new_rttm_reference == "true"; then
+ echo "Use the new RTTM reference."
+ mode="$(cut -d'_' -f1 <<<"$datadir")"
+ ref_rttm=./chime6_rttm/${mode}_rttm
+ fi
+
+ sed 's/_U0[1-6].ENH//g' $ref_rttm > $ref_rttm.scoring
+ sed 's/_U0[1-6].ENH//g' $hyp_rttm > $hyp_rttm.scoring
+ cat ./local/uem_file | grep 'U06' | sed 's/_U0[1-6]//g' > ./local/uem_file.tmp
+ md-eval.pl -1 -c 0.25 -u ./local/uem_file.tmp -r $ref_rttm.scoring -s $hyp_rttm.scoring |\
+ awk 'or(/MISSED SPEECH/,/FALARM SPEECH/)'
+ fi
done
fi
@@ -141,7 +175,14 @@ fi
#######################################################################
if [ $stage -le 4 ]; then
for datadir in ${test_sets}; do
- local/diarize.sh --nj 10 --cmd "$train_cmd" --stage $diarizer_stage \
+ if $use_new_rttm_reference == "true"; then
+ mode="$(cut -d'_' -f1 <<<"$datadir")"
+ ref_rttm=./chime6_rttm/${mode}_rttm
+ else
+ ref_rttm=data/${datadir}_${nnet_type}_seg/ref_rttm
+ fi
+ local/diarize.sh --nj $nj --cmd "$train_cmd" --stage $diarizer_stage \
+ --ref-rttm $ref_rttm \
exp/xvector_nnet_1a \
data/${datadir}_${nnet_type}_seg \
exp/${datadir}_${nnet_type}_seg_diarization
@@ -156,7 +197,7 @@ if [ $stage -le 5 ]; then
local/decode_diarized.sh --nj $nj --cmd "$decode_cmd" --stage $decode_diarize_stage \
exp/${datadir}_${nnet_type}_seg_diarization data/$datadir data/lang \
exp/chain_${train_set}_cleaned_rvb exp/nnet3_${train_set}_cleaned_rvb \
- data/${datadir}_diarized
+ data/${datadir}_diarized || exit 1
done
fi
diff --git a/egs/chime6/s5_track2/local/decode_diarized.sh b/egs/chime6/s5_track2/local/decode_diarized.sh
index 2d0ad6a3b95..f687b313893 100755
--- a/egs/chime6/s5_track2/local/decode_diarized.sh
+++ b/egs/chime6/s5_track2/local/decode_diarized.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 Ashish Arora, Vimal Manohar
# Apache 2.0.
# This script takes an rttm file, and performs decoding on on a test directory.
@@ -38,6 +38,9 @@ if [ $stage -le 0 ]; then
echo "$0 copying data files in output directory"
cp $rttm_dir/rttm $rttm_dir/rttm_1
sed -i 's/'.ENH'/''/g' $rttm_dir/rttm_1
+ # removing participant introduction from the hypothesis rttm
+ # UEM file contains the scoring durations for each recording
+ local/truncate_rttm.py $rttm_dir/rttm_1 local/uem_file $rttm_dir/rttm_introduction_removed
mkdir -p ${out_dir}_hires
cp ${data_in}/{wav.scp,utt2spk} ${out_dir}_hires
utils/data/get_reco2dur.sh ${out_dir}_hires
@@ -45,8 +48,8 @@ fi
if [ $stage -le 1 ]; then
echo "$0 creating segments file from rttm and utt2spk, reco2file_and_channel "
- local/convert_rttm_to_utt2spk_and_segments.py --append-reco-id-to-spkr=true $rttm_dir/rttm_1 \
- <(awk '{print $2".ENH "$2" "$3}' $rttm_dir/rttm_1 |sort -u) \
+ local/convert_rttm_to_utt2spk_and_segments.py --append-reco-id-to-spkr=true $rttm_dir/rttm_introduction_removed \
+ <(awk '{print $2".ENH "$2" "$3}' $rttm_dir/rttm_introduction_removed |sort -u) \
${out_dir}_hires/utt2spk ${out_dir}_hires/segments
utils/utt2spk_to_spk2utt.pl ${out_dir}_hires/utt2spk > ${out_dir}_hires/spk2utt
diff --git a/egs/chime6/s5_track2/local/diarize.sh b/egs/chime6/s5_track2/local/diarize.sh
index 561d5fe7755..d555e92c0e8 100755
--- a/egs/chime6/s5_track2/local/diarize.sh
+++ b/egs/chime6/s5_track2/local/diarize.sh
@@ -1,5 +1,7 @@
#!/bin/bash
-# Copyright 2019 David Snder
+# Copyright 2019 David Snyder
+# 2020 Desh Raj
+
# Apache 2.0.
#
# This script takes an input directory that has a segments file (and
@@ -20,7 +22,7 @@ if [ $# != 3 ]; then
echo "Options: "
echo " --nj # number of parallel jobs."
echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
- echo " --ref-rttm # if present, used to score output RTTM."
+ echo " --ref_rttm ./local/dev_rttm # the location of the reference RTTM file"
exit 1;
fi
@@ -85,29 +87,33 @@ if [ $stage -le 4 ]; then
echo "$0: wrote RTTM to output directory ${out_dir}"
fi
+hyp_rttm=${out_dir}/rttm
+
# For scoring the diarization system, we use the same tool that was
# used in the DIHARD II challenge. This is available at:
# https://github.com/nryant/dscore
+# Note that the scoring takes a single reference RTTM and a single
+# hypothesis RTTM.
if [ $stage -le 5 ]; then
# If a reference RTTM file is not provided, we create one using the backed up
# segments and utt2spk files in the original data directory.
- if [ -z $ref_rttm ]; then
- ref_rttm=data/$name/rttm
- echo "$0: preparing ref RTTM file from segments and utt2spk"
+ if [ -z "$ref_rttm" ]; then
steps/segmentation/convert_utt2spk_and_segments_to_rttm.py data/$name/utt2spk.bak \
- data/$name/segments.bak $ref_rttm
+ data/$name/segments.bak data/$name/rttm
+ ref_rttm=data/$name/rttm
fi
- grep 'U06' $ref_rttm > ${ref_rttm}.U06
- ref_rttm_path=$(readlink -f ${ref_rttm}.U06)
- out_rttm_path=$(readlink -f $out_dir/rttm)
+ echo "Diarization results for "${name}
if ! [ -d dscore ]; then
git clone https://github.com/nryant/dscore.git || exit 1;
cd dscore
python -m pip install --user -r requirements.txt
cd ..
fi
- cd dscore
- python score.py -r $ref_rttm_path -s $out_rttm_path
- cd ..
+ sed 's/_U0[1-6]\.ENH//g' $ref_rttm > $ref_rttm.scoring
+ sed 's/_U0[1-6]\.ENH//g' $hyp_rttm > $hyp_rttm.scoring
+ ref_rttm_path=$(readlink -f ${ref_rttm}.scoring)
+ hyp_rttm_path=$(readlink -f ${hyp_rttm}.scoring)
+ cat ./local/uem_file | grep 'U06' | sed 's/_U0[1-6]//g' > ./local/uem_file.scoring
+ cd dscore && python score.py -u ../local/uem_file.scoring -r $ref_rttm_path \
+ -s $hyp_rttm_path && cd .. || exit 1;
fi
-
diff --git a/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py b/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py
index 7b3e14aaa49..091cf7c05b1 100755
--- a/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py
+++ b/egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py
@@ -39,11 +39,18 @@ def main():
combined_hyp_file = args.output_dir_path + '/' + 'hyp' + '_' + sessionid_micid_speakerid + '_comb'
combined_hyp_writer = open(combined_hyp_file, 'w')
utterances = sessionid_micid_speakerid_dict[sessionid_micid_speakerid]
- text = ''
+ # sorting utterances by start and end time
+ sessionid_micid_speakerid_utterances={}
for line in utterances:
parts = line.strip().split()
+ utt_parts = parts[0].strip().split('-')
+ time ='-'.join(utt_parts[2:])
+ sessionid_micid_speakerid_utterances[time] = line
+ text = ''
+ for time_key in sorted(sessionid_micid_speakerid_utterances):
+ parts = sessionid_micid_speakerid_utterances[time_key].strip().split()
text = text + ' ' + ' '.join(parts[1:])
- hyp_writer.write(line)
+ hyp_writer.write(sessionid_micid_speakerid_utterances[time_key])
combined_utterance = 'utt' + " " + text
combined_hyp_writer.write(combined_utterance)
combined_hyp_writer.write('\n')
diff --git a/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py b/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py
index 6b00e29e6b1..a4394984876 100755
--- a/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py
+++ b/egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py
@@ -55,14 +55,21 @@ def main():
spkrid_mapping[sessionid_speakerid.split('_')[1]]) + '_comb'
combined_ref_writer = open(combined_ref_file, 'w')
utterances = sessionid_speakerid_dict[sessionid_speakerid]
- text = ''
- uttid_wc = 'utt'
+ sessionid_speakerid_utterances = {}
+ # sorting utterances by start and end time
for line in utterances:
parts = line.strip().split()
+ utt_parts = parts[0].strip().split('-')
+ time ='-'.join(utt_parts[1:])
+ sessionid_speakerid_utterances[time] = line
+ text = ''
+ uttid_wc = 'utt'
+ for time_key in sorted(sessionid_speakerid_utterances):
+ parts = sessionid_speakerid_utterances[time_key].strip().split()
uttid_id = parts[0]
utt_text = ' '.join(parts[1:])
text = text + ' ' + ' '.join(parts[1:])
- ref_writer.write(line)
+ ref_writer.write(sessionid_speakerid_utterances[time_key])
length = str(len(utt_text.split()))
uttid_id_len = uttid_id + ":" + length
uttid_wc = uttid_wc + ' ' + uttid_id_len
diff --git a/egs/chime6/s5_track2/local/install_dscore.sh b/egs/chime6/s5_track2/local/install_dscore.sh
new file mode 100755
index 00000000000..314f86f938e
--- /dev/null
+++ b/egs/chime6/s5_track2/local/install_dscore.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+# Installs dscore
+git clone https://github.com/nryant/dscore.git
+pip3 install intervaltree --user
+pip3 install tabulate --user
+pip3 install munkres --user
+pip3 install pytest --user
diff --git a/egs/chime6/s5_track2/local/multispeaker_score.sh b/egs/chime6/s5_track2/local/multispeaker_score.sh
index 74e089c4052..c7075d6cf14 100755
--- a/egs/chime6/s5_track2/local/multispeaker_score.sh
+++ b/egs/chime6/s5_track2/local/multispeaker_score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 Ashish Arora, Yusuke Fujita
# Apache 2.0.
# This script takes a reference and hypothesis text file, and performs
diff --git a/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh
index cb8fe2e6326..6b5ccd466c3 100755
--- a/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh
+++ b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Apache 2.0.
diff --git a/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh
index dcdbe1b1593..326b6dbb9fa 100755
--- a/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh
+++ b/egs/chime6/s5_track2/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Apache 2.0.
diff --git a/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh
index 94fc7e7682f..2189e406a7e 100755
--- a/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh
+++ b/egs/chime6/s5_track2/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 David Snyder
# 2018 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2018 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/chime6/s5_track2/local/prepare_data.sh b/egs/chime6/s5_track2/local/prepare_data.sh
index c6b8121dab0..8bd2530d6db 100755
--- a/egs/chime6/s5_track2/local/prepare_data.sh
+++ b/egs/chime6/s5_track2/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2017 Johns Hopkins University (Author: Shinji Watanabe, Yenda Trmal)
# Apache 2.0
diff --git a/egs/chime6/s5_track2/local/print_dset_error.py b/egs/chime6/s5_track2/local/print_dset_error.py
index 1a7fd4ff365..8ffe930f4f6 100755
--- a/egs/chime6/s5_track2/local/print_dset_error.py
+++ b/egs/chime6/s5_track2/local/print_dset_error.py
@@ -30,6 +30,6 @@
for arrayid in sorted(array_id_error_dict):
wer = float(array_id_error_dict[arrayid][1])/float(array_id_error_dict[arrayid][0])*100
- wer_detail = "%WER {0:5.2f} [ {1} / {2}, {3} ins, {4} del, {5} sub ]".format(wer, array_id_error_dict[arrayid][0], array_id_error_dict[arrayid][1], array_id_error_dict[arrayid][2], array_id_error_dict[arrayid][3], array_id_error_dict[arrayid][4])
+ wer_detail = "%WER {0:5.2f} [ {1} / {2}, {3} ins, {4} del, {5} sub ]".format(wer, array_id_error_dict[arrayid][1], array_id_error_dict[arrayid][0], array_id_error_dict[arrayid][2], array_id_error_dict[arrayid][3], array_id_error_dict[arrayid][4])
output.write(arrayid + ' ' + wer_detail + '\n')
diff --git a/egs/chime6/s5_track2/local/score_for_submit.sh b/egs/chime6/s5_track2/local/score_for_submit.sh
index 29dfac529b4..71a3a4dd607 100755
--- a/egs/chime6/s5_track2/local/score_for_submit.sh
+++ b/egs/chime6/s5_track2/local/score_for_submit.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Apache 2.0
#
# This script provides CHiME-6 challenge track 2 submission scores.
@@ -56,12 +56,19 @@ if [ $stage -le 2 ]; then
| utils/best_wer.sh >& $dev_decodedir/scoring_kaldi_multispeaker/best_wer
best_wer_file=$(awk '{print $NF}' $dev_decodedir/scoring_kaldi_multispeaker/best_wer)
+ best_array=$(echo $best_wer_file | awk -F: '{N=NF; print $N}')
best_lmwt=$(echo $best_wer_file | awk -F/ '{N=NF-2; print $N}')
best_wip=$(echo $best_wer_file | awk -F_ '{N=NF-3; print $N}' | awk -F/ '{N=NF-2; print $N}')
-fi
-echo "best LM weight: $best_lmwt"
-echo "best insertion penalty weight: $best_wip"
+ # printing and storing best lmwt, best_array and wip
+ echo "best array: $best_array"
+ echo "best LM weight: $best_lmwt"
+ echo "best insertion penalty weight: $best_wip"
+
+ echo $best_lmwt > $dev_decodedir/scoring_kaldi_multispeaker/lmwt
+ echo $best_wip > $dev_decodedir/scoring_kaldi_multispeaker/wip
+ echo $best_array > $dev_decodedir/scoring_kaldi_multispeaker/best_array
+fi
if [ $stage -le 3 ]; then
# obtaining per utterance stats for dev
@@ -80,11 +87,16 @@ if [ $stage -le 4 ]; then
fi
if [ $stage -le 5 ]; then
- # storing best lmwt and wip and printing best wer for dev and eval
- echo $best_lmwt > $dev_decodedir/scoring_kaldi_multispeaker/lmwt
- echo $best_wip > $dev_decodedir/scoring_kaldi_multispeaker/wip
+ # obtaining eval wer corresponding to best lmwt, best_array and wip of dev
+ best_array="$(cat $dev_decodedir/scoring_kaldi_multispeaker/best_array)"
+ best_lmwt="$(cat $dev_decodedir/scoring_kaldi_multispeaker/lmwt)"
+ best_wip="$(cat $dev_decodedir/scoring_kaldi_multispeaker/wip)"
+
+ grep WER $eval_decodedir/scoring_kaldi_multispeaker/penalty_$best_wip/$best_lmwt/per_speaker_wer/array_wer.txt /dev/null \
+ | grep $best_array | utils/best_wer.sh >& $eval_decodedir/scoring_kaldi_multispeaker/best_wer
- echo "$(<$dev_decodedir/scoring_kaldi_multispeaker/penalty_$best_wip/$best_lmwt/per_speaker_wer/array_wer.txt)"
- echo "$(<$eval_decodedir/scoring_kaldi_multispeaker/penalty_$best_wip/$best_lmwt/per_speaker_wer/array_wer.txt)"
+ # printing dev and eval wer
+ echo "Dev: $(<$dev_decodedir/scoring_kaldi_multispeaker/best_wer)" | cut -d " " -f 1-15
+ echo "Eval: $(<$eval_decodedir/scoring_kaldi_multispeaker/best_wer)" | cut -d " " -f 1-14
fi
diff --git a/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh b/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh
index 91d52b39269..c9719d472f3 100755
--- a/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh
+++ b/egs/chime6/s5_track2/local/segmentation/detect_speech_activity.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016-17 Vimal Manohar
# 2017 Nagendra Kumar Goel
diff --git a/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh b/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh
index 5701424869a..7ea39f45639 100755
--- a/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh
+++ b/egs/chime6/s5_track2/local/segmentation/tuning/train_lstm_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Nagendra Kumar Goel
# 2018 Vimal Manohar
diff --git a/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh b/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh
index bb985462f49..83bcd587d88 100755
--- a/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh
+++ b/egs/chime6/s5_track2/local/segmentation/tuning/train_stats_sad_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Nagendra Kumar Goel
# 2018 Vimal Manohar
diff --git a/egs/chime6/s5_track2/local/train_diarizer.sh b/egs/chime6/s5_track2/local/train_diarizer.sh
index 71918e7cabc..845ac7840d5 100755
--- a/egs/chime6/s5_track2/local/train_diarizer.sh
+++ b/egs/chime6/s5_track2/local/train_diarizer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright
# 2019 David Snyder
# Apache 2.0.
diff --git a/egs/chime6/s5_track2/local/train_sad.sh b/egs/chime6/s5_track2/local/train_sad.sh
index e12a0cad694..cbaf3dfc5de 100755
--- a/egs/chime6/s5_track2/local/train_sad.sh
+++ b/egs/chime6/s5_track2/local/train_sad.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Nagendra Kumar Goel
# 2017 Vimal Manohar
diff --git a/egs/chime6/s5_track2/local/truncate_rttm.py b/egs/chime6/s5_track2/local/truncate_rttm.py
new file mode 100755
index 00000000000..3de0c0a60d6
--- /dev/null
+++ b/egs/chime6/s5_track2/local/truncate_rttm.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+# Apache 2.0
+# This script truncates the rttm file
+# using UEM file and writes it to a new rttm file
+#
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+from scorelib.turn import trim_turns
+import scorelib.rttm as rttm_func
+from scorelib.uem import load_uem
+
+def get_args():
+ parser = argparse.ArgumentParser(
+ description="""This script truncates the rttm file
+ using UEM file""")
+ parser.add_argument("rttm_file", type=str,
+ help="""Input RTTM file.
+ The format of the RTTM file is
+ """
+ """ """)
+ parser.add_argument("uem_file", type=str,
+ help="""Input UEM file.
+ The format of the UEM file is
+ """)
+ parser.add_argument("rttm_file_write", type=str,
+ help="""output RTTM file.""")
+ args = parser.parse_args()
+ return args
+
+
+if __name__ == '__main__':
+ args = get_args()
+ rttm_writer = open(args.rttm_file_write, 'w')
+ turns, speaker_ids, file_ids = rttm_func.load_rttm(args.rttm_file)
+ loaded_uem = load_uem(args.uem_file)
+ truncated_turns = trim_turns(turns, loaded_uem)
+ rttm_func.write_rttm(args.rttm_file_write,truncated_turns)
diff --git a/egs/chime6/s5_track2/local/uem_file b/egs/chime6/s5_track2/local/uem_file
new file mode 100644
index 00000000000..c1d4dbcd5d4
--- /dev/null
+++ b/egs/chime6/s5_track2/local/uem_file
@@ -0,0 +1,20 @@
+S01_U01 1 0 12000
+S02_U01 1 75 12000
+S09_U01 1 64 12000
+S21_U01 1 59 12000
+S01_U02 1 0 12000
+S02_U02 1 75 12000
+S09_U02 1 64 12000
+S21_U02 1 59 12000
+S01_U03 1 0 12000
+S02_U03 1 75 12000
+S09_U03 1 64 12000
+S21_U03 1 59 12000
+S01_U04 1 0 12000
+S02_U04 1 75 12000
+S09_U04 1 64 12000
+S21_U04 1 59 12000
+S01_U06 1 0 12000
+S02_U06 1 75 12000
+S09_U06 1 64 12000
+S21_U06 1 59 12000
diff --git a/egs/chime6/s5_track2/path.sh b/egs/chime6/s5_track2/path.sh
index c2526194bee..2f4e4e4fb21 100644
--- a/egs/chime6/s5_track2/path.sh
+++ b/egs/chime6/s5_track2/path.sh
@@ -1,6 +1,8 @@
export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
+export PATH=$PWD/dscore:$PATH
+export PYTHONPATH="${PYTHONPATH}:$PWD/dscore"
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
. $KALDI_ROOT/tools/config/common_path.sh
export LC_ALL=C
diff --git a/egs/chime6/s5_track2/run.sh b/egs/chime6/s5_track2/run.sh
index 1350b8e14d5..d5548518287 100755
--- a/egs/chime6/s5_track2/run.sh
+++ b/egs/chime6/s5_track2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Chime-6 Track 2 baseline. Based mostly on the Chime-5 recipe, with the exception
# that we are required to perform speech activity detection and speaker
@@ -16,7 +16,7 @@ stage=0
nnet_stage=-10
sad_stage=0
diarizer_stage=0
-decode_stage=1
+decode_stage=0
enhancement=beamformit # for a new enhancement method,
# change this variable and decode stage
decode_only=false
@@ -111,8 +111,12 @@ if [ $stage -le 4 ]; then
utils/copy_data_dir.sh data/train_worn data/train_worn_org # back up
grep -v -e "^P11_S03" -e "^P52_S19" -e "^P53_S24" -e "^P54_S24" data/train_worn_org/text > data/train_worn/text
utils/fix_data_dir.sh data/train_worn
-fi
+ # Remove S12_U05 from training data since it has known issues
+ utils/copy_data_dir.sh data/train_u05 data/train_u05_org # back up
+ grep -v -e "^S12_U05" data/train_u05_org/text > data/train_u05/text
+ utils/fix_data_dir.sh data/train_u05
+fi
#########################################################################################
# In stages 5 and 6, we augment and fix train data for our training purpose. point source
diff --git a/egs/cifar/v1/image/copy_data_dir.sh b/egs/cifar/v1/image/copy_data_dir.sh
index c923f5cc07a..dd9fdbc086a 100755
--- a/egs/cifar/v1/image/copy_data_dir.sh
+++ b/egs/cifar/v1/image/copy_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (author: Daniel Povey)
# Apache 2.0
diff --git a/egs/cifar/v1/image/fix_data_dir.sh b/egs/cifar/v1/image/fix_data_dir.sh
index b85623b6e85..20f3de5dec6 100755
--- a/egs/cifar/v1/image/fix_data_dir.sh
+++ b/egs/cifar/v1/image/fix_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script makes sure that only the segments present in
# all of "feats.scp", "images.scp" [if present], segments [if present]
diff --git a/egs/cifar/v1/image/validate_data_dir.sh b/egs/cifar/v1/image/validate_data_dir.sh
index e4db9c2c92c..bf56c17632a 100755
--- a/egs/cifar/v1/image/validate_data_dir.sh
+++ b/egs/cifar/v1/image/validate_data_dir.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
no_feats=false
diff --git a/egs/cifar/v1/local/nnet3/compare.sh b/egs/cifar/v1/local/nnet3/compare.sh
index c5208c38ac0..8524efe9f60 100755
--- a/egs/cifar/v1/local/nnet3/compare.sh
+++ b/egs/cifar/v1/local/nnet3/compare.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing trained models between systems.
# e.g. local/nnet3/compare.sh exp/resnet1{b,c}_cifar10
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1a.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1a.sh
index 3854bf24d82..94565e6588a 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1a.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# note: the final 'valid accuracy' (0.69) is actually the test accuracy.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1b.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1b.sh
index 907682454b9..6929eff139e 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1b.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1b is like 1a but a smaller model.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1c.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1c.sh
index 6cb94df49a0..c617145104e 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1c.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1c uses dropout with fewer but larger layers
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1d.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1d.sh
index 6baad31fcbb..864f2cda711 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1d.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1d is as 1c but adding batch-norm to all convolutional layers.
# batch-norm helps (0.78 -> 0.8).
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1e.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1e.sh
index a4dbc949d56..4c30016f656 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_1e.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1e is as 1d but making the time subsampling symmetric with the
# height subsampling (unfortunately this symmetry is not very visible
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1a.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1a.sh
index 1e3a6e10760..1f77a874ee7 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1a.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# aug_1a is as 1a but with data augmentation
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1b.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1b.sh
index 8e5f83ea2d5..2ae51a6320a 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1b.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# run_cnn_aug_1b is the same as run_cnn_1e but with data augmentation.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1c.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1c.sh
index 184ea0fa306..28e3006bded 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1c.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# aug_1c is the same as aug_1b but with many more epochs and smaller
# final learning rate
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1d.sh b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1d.sh
index 1eb448149ba..5c7a89113e6 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1d.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_cnn_aug_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1d is as 1c but setting num-minibatches-history=40.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1a.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1a.sh
index 8f41bb96c07..f5fc96a10f4 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1a.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# run_resnet_1a.sh is a quite well-performing resnet.
# It includes a form of shrinkage that approximates l2 regularization.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1b.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1b.sh
index f8f3b563e6c..3d4d4bfc3e3 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1b.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1b is as 1a but using more epochs: 100 instead of 60.
# This helps a bit.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1c.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1c.sh
index 0708b3d6eaa..34c487e4d00 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1c.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1c is as 1b but setting num-minibatches-history=40.0 in the configs,
# so the Fisher matrix estimates change less fast.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1d.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1d.sh
index 2d1ba279284..635dba70800 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1d.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1d is as 1c but adding rotation in image augmentation.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1e.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1e.sh
index 0b6bd5ce2a9..35615985621 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1e.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1e is as 1d but with more filters and epochs.
diff --git a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1f.sh b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1f.sh
index 3bb3316441a..5f9fe9d692a 100755
--- a/egs/cifar/v1/local/nnet3/tuning/run_resnet_1f.sh
+++ b/egs/cifar/v1/local/nnet3/tuning/run_resnet_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1f is as 1e but with l2-regularize instead of proportional shrink
diff --git a/egs/cifar/v1/local/prepare_data.sh b/egs/cifar/v1/local/prepare_data.sh
index f73cbe41e3c..6eb44668217 100755
--- a/egs/cifar/v1/local/prepare_data.sh
+++ b/egs/cifar/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (author: Hossein Hadian)
# Apache 2.0
diff --git a/egs/cifar/v1/run.sh b/egs/cifar/v1/run.sh
index 084a8a53041..a180920ac4d 100755
--- a/egs/cifar/v1/run.sh
+++ b/egs/cifar/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
stage=0
diff --git a/egs/cmu_cslu_kids/README b/egs/cmu_cslu_kids/README
new file mode 100644
index 00000000000..0b8512e2487
--- /dev/null
+++ b/egs/cmu_cslu_kids/README
@@ -0,0 +1,21 @@
+This is an ASR recipe for children speech using cmu_kids and cslu_kids.
+Both of the corpora can be found on LDC:
+ - cmu_kids : https://catalog.ldc.upenn.edu/LDC97S63
+ - cslu_kids: https://catalog.ldc.upenn.edu/LDC2007S18
+
+To run this recipe, you'll need a copy of both corpora:
+ ./run.sh --cmu_kids --cslu_kids
+
+By default, this recipe will download an LM pretrained on LibriSpeech from
+lm_url=www.openslr.org/resources/11. If you already have a copy of this LM
+and do not wish to redownload, you can specify the LM path using the --lm_src option:
+ ./run.sh --cmu_kids --cslu_kids \
+ --lm_src
+
+This recipe will also download and clean CMU_Dict by default. If you have a clean copy
+already, or wish to use your own dictionary, simply copy your version of the dict to
+ data/local/dict
+
+To run extra features for triphone models or VLTN, set the following options true:
+ ./run.sh --cmu_kids --cslu_kids \
+ --vtln true --extra_features true
diff --git a/egs/cmu_cslu_kids/s5/cmd.sh b/egs/cmu_cslu_kids/s5/cmd.sh
new file mode 100644
index 00000000000..179307556d5
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/cmd.sh
@@ -0,0 +1,23 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine). queue.pl works with GridEngine (qsub). slurm.pl works
+# with slurm. Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration. Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd=queue.pl
+export decode_cmd="queue.pl --mem 2G"
+# the use of cuda_cmd is deprecated, used only in 'nnet1',
+export cuda_cmd="queue.pl --gpu 1"
+
+if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then
+ queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+ export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+ export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+ export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
diff --git a/egs/cmu_cslu_kids/s5/conf/decode.config b/egs/cmu_cslu_kids/s5/conf/decode.config
new file mode 100644
index 00000000000..10b0eee900b
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/decode.config
@@ -0,0 +1,4 @@
+# Use wider-than-normal decoding beams for RM.
+first_beam=16.0
+beam=20.0
+lattice_beam=10.0
diff --git a/egs/cmu_cslu_kids/s5/conf/decode_dnn.config b/egs/cmu_cslu_kids/s5/conf/decode_dnn.config
new file mode 100644
index 00000000000..e7cfca74763
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/decode_dnn.config
@@ -0,0 +1,8 @@
+# In RM, the optimal decode LMWT is in range 2..5, which is different from usual 10..15
+# (it is caused by using simple rule-based LM, instead of n-gram LM),
+scoring_opts="--min-lmwt 2 --max-lmwt 10"
+# Still, it is better to use --acwt 0.1, both for decoding and sMBR,
+acwt=0.1
+# For this small task we can afford to have large beams,
+beam=30.0 # beam for decoding. Was 13.0 in the scripts.
+lattice_beam=18.0 # this has most effect on size of the lattices.
diff --git a/egs/cmu_cslu_kids/s5/conf/mfcc.conf b/egs/cmu_cslu_kids/s5/conf/mfcc.conf
new file mode 100644
index 00000000000..6bbcb763153
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/mfcc.conf
@@ -0,0 +1,2 @@
+--use-energy=false # only non-default option.
+--allow_downsample=true
diff --git a/egs/cmu_cslu_kids/s5/conf/mfcc_hires.conf b/egs/cmu_cslu_kids/s5/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..40f95e97010
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/mfcc_hires.conf
@@ -0,0 +1,11 @@
+# config for high-resolution MFCC features, intended for neural network training
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false # use average of log energy, not energy.
+--num-mel-bins=40 # similar to Google's setup.
+--num-ceps=40 # there is no dimensionality reduction.
+--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so
+ # there might be some information at the low end.
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
+--allow-downsample=true
diff --git a/egs/cmu_cslu_kids/s5/conf/online_cmvn.conf b/egs/cmu_cslu_kids/s5/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/cmu_cslu_kids/s5/conf/plp.conf b/egs/cmu_cslu_kids/s5/conf/plp.conf
new file mode 100644
index 00000000000..e7e8a9e14af
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/conf/plp.conf
@@ -0,0 +1,2 @@
+# No non-default options for now.
+--allow_downsample=true
diff --git a/egs/cmu_cslu_kids/s5/local/chain/compare_wer.sh b/egs/cmu_cslu_kids/s5/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..411d2691bb9
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/chain/compare_wer.sh
@@ -0,0 +1,137 @@
+#!/usr/bin/env bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+ echo "Usage: $0: [--looped] [--online] [ ... ]"
+ echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+ echo "or (with epoch numbers for discriminative training):"
+ echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+ exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+ include_looped=true
+ shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+ include_online=true
+ shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+# set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+ if [ $# != 1 ]; then
+ echo "compare_wer_general.sh: internal error"
+ exit 1 # exit the program
+ fi
+ dirname=$(echo $1 | cut -d: -f1)
+ epoch=$(echo $1 | cut -s -d: -f2)
+ if [ -z $epoch ]; then
+ epoch_infix=""
+ else
+ used_epochs=true
+ epoch_infix=_epoch${epoch}
+ fi
+}
+
+
+
+echo -n "# System "
+for x in $*; do printf "% 10s" " $(basename $x)"; done
+echo
+
+strings=(
+ "#WER dev_clean_2 (tgsmall) "
+ "#WER dev_clean_2 (tglarge) ")
+
+for n in 0 1; do
+ echo -n "${strings[$n]}"
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2)
+
+ wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ if $include_looped; then
+ echo -n "# [looped:] "
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ fi
+ if $include_online; then
+ echo -n "# [online:] "
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ fi
+done
+
+
+if $used_epochs; then
+ exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+
+echo -n "# Final train prob "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)"
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)"
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Num-params "
+for x in $*; do
+ printf "% 10s" $(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
+done
+echo
diff --git a/egs/cmu_cslu_kids/s5/local/chain/run_tdnnf.sh b/egs/cmu_cslu_kids/s5/local/chain/run_tdnnf.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/chain/run_tdnnf.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/cmu_cslu_kids/s5/local/chain/tdnnf_decode.sh b/egs/cmu_cslu_kids/s5/local/chain/tdnnf_decode.sh
new file mode 100755
index 00000000000..8d124193584
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/chain/tdnnf_decode.sh
@@ -0,0 +1,82 @@
+#! /bin/bash
+
+# Copyright Johns Hopkins University
+# 2019 Fei Wu
+
+# Decode on new data set using trained model.
+# The data directory should be prepared in kaldi style.
+# Usage:
+# ./local/chain/tdnnF_decode.sh --data_src
+
+set -euo pipefail
+echo "$0 $@"
+
+stage=0
+decode_nj=10
+data_src=
+affix=
+tree_affix=
+nnet3_affix=
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat </dev/null || true
+
+ (
+ nspk=$(wc -l <$data_hires/spk2utt)
+ steps/nnet3/decode.sh \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --frames-per-chunk $frames_per_chunk \
+ --nj $nspk --cmd "$decode_cmd" --num-threads 4 \
+ --online-ivector-dir $ivect_dir \
+ $tree_dir/graph_tgsmall $data_hires ${dir}/decode_tgsmall_$data_name || exit 1
+
+ steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
+ data/lang_test_{tgsmall,tglarge} \
+ $data_hires ${dir}/decode_{tgsmall,tglarge}_$data_name || exit 1
+ ) || touch $dir/.error &
+
+ wait
+ [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
diff --git a/egs/cmu_cslu_kids/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/cmu_cslu_kids/s5/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..ca08fd4ada8
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,279 @@
+#!/usr/bin/env bash
+
+# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey)
+# 2017-2018 Yiming Wang
+# 2019 Fei Wu
+
+# Based on material recipe for low-resource languages
+# Factored TDNN with skip connectiong and splicing (two bottle neck layers)
+
+# WER results on dev
+# Model LM Corpus WER(%)
+# tdnn_1a tg_large Combined 11.72
+# tdnn_1a tg_small Combined 13.61
+# tdnn_1a tg_large CMU_Kids 17.26
+# tdnn_1a tg_small CMU_Kids 26.43
+# tdnn_1a tg_large CSLU_Kids 10.80
+# tdnn_1a tg_small CSLU_Kids 12.50
+
+# steps/info/chain_dir_info.pl exp/chain/tdnn1a_sp
+# exp/chain/tdnn1a_sp/: num-iters=342 nj=2..5 num-params=17.9M dim=40+100->3192 combine=-0.042->-0.041 (over 8) xent:train/valid[227,341,final]=(-0.451,-0.363,-0.346/-0.524,-0.466,-0.434) logprob:train/valid[227,341,final]=(-0.047,-0.043,-0.042/-0.058,-0.056,-0.054)
+
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=10
+train_set=train
+test_sets="test"
+gmm=tri3
+nnet3_affix=
+
+# The rest are configs specific to this script. Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1a
+tree_affix=
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# training chunk-options
+chunk_width=140,100,160
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+common_egs_dir=
+xent_regularize=0.1
+
+# training options
+srand=0
+remove_egs=true
+reporting_email=
+
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat <$lang/topo
+ fi
+fi
+
+if [ $stage -le 8 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom).
+ # use the same num-jobs as the alignments
+ steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \
+ data/lang $gmm_dir $lat_dir
+ rm $lat_dir/fsts.*.gz # save space
+fi
+
+
+if [ $stage -le 10 ]; then
+ # Build a tree using our new topology. We know we have alignments for the
+ # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+ # those. The num-leaves is always somewhat less than the num-leaves from
+ # the GMM baseline.
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ steps/nnet3/chain/build_tree.sh \
+ --frame-subsampling-factor 3 \
+ --context-opts "--context-width=2 --central-position=1" \
+ --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+ $lang $ali_dir $tree_dir
+fi
+
+if [ $stage -le 11 ]; then
+ mkdir -p $dir
+ echo "$0: creating neural net configs using the xconfig parser";
+
+ num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+ learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+ opts="l2-regularize=0.004 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+ linear_opts="orthonormal-constraint=-1.0 l2-regularize=0.004"
+ output_opts="l2-regularize=0.002"
+
+ mkdir -p $dir/configs
+
+ cat < $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ # the first splicing is moved before the lda layer, so no splicing here
+ relu-batchnorm-dropout-layer name=tdnn1 $opts dim=1024
+ linear-component name=tdnn2l0 dim=256 $linear_opts input=Append(-1,0)
+ linear-component name=tdnn2l dim=256 $linear_opts input=Append(-1,0)
+ relu-batchnorm-dropout-layer name=tdnn2 $opts input=Append(0,1) dim=1024
+ linear-component name=tdnn3l dim=256 $linear_opts input=Append(-1,0)
+ relu-batchnorm-dropout-layer name=tdnn3 $opts dim=1024 input=Append(0,1)
+ linear-component name=tdnn4l0 dim=256 $linear_opts input=Append(-1,0)
+ linear-component name=tdnn4l dim=256 $linear_opts input=Append(0,1)
+ relu-batchnorm-dropout-layer name=tdnn4 $opts input=Append(0,1) dim=1024
+ linear-component name=tdnn5l dim=256 $linear_opts
+ relu-batchnorm-dropout-layer name=tdnn5 $opts dim=1024 input=Append(0, tdnn3l)
+ linear-component name=tdnn6l0 dim=256 $linear_opts input=Append(-3,0)
+ linear-component name=tdnn6l dim=256 $linear_opts input=Append(-3,0)
+ relu-batchnorm-dropout-layer name=tdnn6 $opts input=Append(0,3) dim=1280
+ linear-component name=tdnn7l0 dim=256 $linear_opts input=Append(-3,0)
+ linear-component name=tdnn7l dim=256 $linear_opts input=Append(0,3)
+ relu-batchnorm-dropout-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1024
+ linear-component name=tdnn8l0 dim=256 $linear_opts input=Append(-3,0)
+ linear-component name=tdnn8l dim=256 $linear_opts input=Append(0,3)
+ relu-batchnorm-dropout-layer name=tdnn8 $opts input=Append(0,3) dim=1280
+ linear-component name=tdnn9l0 dim=256 $linear_opts input=Append(-3,0)
+ linear-component name=tdnn9l dim=256 $linear_opts input=Append(-3,0)
+ relu-batchnorm-dropout-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn5l) dim=1024
+ linear-component name=tdnn10l0 dim=256 $linear_opts input=Append(-3,0)
+ linear-component name=tdnn10l dim=256 $linear_opts input=Append(0,3)
+ relu-batchnorm-dropout-layer name=tdnn10 $opts input=Append(0,3) dim=1280
+ linear-component name=tdnn11l0 dim=256 $linear_opts input=Append(-3,0)
+ linear-component name=tdnn11l dim=256 $linear_opts input=Append(-3,0)
+ relu-batchnorm-dropout-layer name=tdnn11 $opts input=Append(0,3,tdnn10l,tdnn9l,tdnn7l) dim=1024
+ linear-component name=prefinal-l dim=256 $linear_opts
+
+ relu-batchnorm-layer name=prefinal-chain input=prefinal-l $opts dim=1280
+ linear-component name=prefinal-chain-l dim=256 $linear_opts
+ batchnorm-component name=prefinal-chain-batchnorm
+ output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+ relu-batchnorm-layer name=prefinal-xent input=prefinal-l $opts dim=1280
+ linear-component name=prefinal-xent-l dim=256 $linear_opts
+ batchnorm-component name=prefinal-xent-batchnorm
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+
+EOF
+
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+
+fi
+
+
+if [ $stage -le 12 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+ fi
+
+ steps/nnet3/chain/train.py --stage=$train_stage \
+ --cmd="$decode_cmd" \
+ --feat.online-ivector-dir=$train_ivector_dir \
+ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+ --chain.xent-regularize $xent_regularize \
+ --chain.leaky-hmm-coefficient=0.1 \
+ --chain.l2-regularize=0.0 \
+ --chain.apply-deriv-weights=false \
+ --chain.lm-opts="--num-extra-lm-states=2000" \
+ --trainer.dropout-schedule $dropout_schedule \
+ --trainer.add-option="--optimization.memory-compression-level=2" \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs=20 \
+ --trainer.frames-per-iter=3000000 \
+ --trainer.optimization.num-jobs-initial=2 \
+ --trainer.optimization.num-jobs-final=5 \
+ --trainer.optimization.initial-effective-lrate=0.002 \
+ --trainer.optimization.final-effective-lrate=0.0002 \
+ --trainer.num-chunk-per-minibatch=128,64 \
+ --egs.chunk-width=$chunk_width \
+ --egs.dir="$common_egs_dir" \
+ --egs.opts="--frames-overlap-per-eg 0" \
+ --cleanup.remove-egs=$remove_egs \
+ --use-gpu=true \
+ --reporting.email="$reporting_email" \
+ --feat-dir=$train_data_dir \
+ --tree-dir=$tree_dir \
+ --lat-dir=$lat_dir \
+ --dir=$dir || exit 1;
+fi
+
+if [ $stage -le 13 ]; then
+ # Note: it's not important to give mkgraph.sh the lang directory with the
+ # matched topology (since it gets the topology file from the model).
+ utils/mkgraph.sh \
+ --self-loop-scale 1.0 data/lang_test_tgsmall \
+ $tree_dir $tree_dir/graph_tgsmall || exit 1;
+fi
+
+if [ $stage -le 14 ]; then
+ frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+ rm $dir/.error 2>/dev/null || true
+
+ for data in $test_sets; do
+ (
+ nspk=$(wc -l tmp
+ cut -f 3- < tmp > out
+
+ tr '[:lower:]' '[:upper:]' < out > tmp
+ tr -d '[:cntrl:]' < tmp > out
+ sent=$( out
+ tr '[:lower:]' '[:upper:]' < tmp > out
+ trans=$(> $data/$target/utt2spk
+ echo "$uttID $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe -f wav -p -c 1 $utt|" >> $data/$target/wav.scp
+ echo "$spkID f" >> $data/$target/spk2gender
+ echo "$uttID $sent" >> $data/$target/text
+ fi
+ done
+ fi
+ fi
+done
+
+for d in $data/train $data/test; do
+ utils/utt2spk_to_spk2utt.pl $d/utt2spk > $d/spk2utt
+ utils/fix_data_dir.sh $d
+done
+
+printf "\t total: %s; train: %s; test: %s.\n" "$total_cnt" "$train_cnt" "$test_cnt"
+rm -f out tmp
+
+# Optional
+# Get data duration, just for book keeping
+# for data in $data/train $data/test; do
+# ./local/data_duration.sh $data
+# done
+#
+
diff --git a/egs/cmu_cslu_kids/s5/local/cslu_aud_prep.sh b/egs/cmu_cslu_kids/s5/local/cslu_aud_prep.sh
new file mode 100755
index 00000000000..735f87eca9f
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/cslu_aud_prep.sh
@@ -0,0 +1,43 @@
+#/bin/bash
+
+# Copyright Johns Hopkins University
+# 2019 Fei Wu
+
+# Called by local/cslu_DataPrep.shi
+
+Assignment()
+{
+ rnd=$((1+RANDOM % 100))
+ if [ $rnd -le $test_percentage ]; then
+ target="test"
+ else
+ target="train"
+ fi
+}
+audio=
+test_percentage=30 # Percent of data reserved as test set
+debug=debug/cslu_dataprep_debug
+data=data/data_cslu
+. ./utils/parse_options.sh
+
+uttID=$(basename $audio)
+uttID=${uttID%'.wav'}
+sentID=${uttID: -3}
+spkID=${uttID%$sentID}
+sentID=${sentID%"0"}
+sentID=$(echo "$sentID" | tr '[:lower:]' '[:upper:]' )
+
+line=$(grep $sentID cslu/docs/all.map)
+
+if [ -z "$line" ]; then # Can't map utterance to transcript
+ echo $audio $sentID >> $debug
+else
+ txt=$(echo $line | grep -oP '"\K.*?(?=")')
+ cap_txt=${txt^^}
+ Assignment
+ echo "$uttID $cap_txt" >> $data/$target/text
+ echo "$uttID $spkID" >> $data/$target/utt2spk
+ echo "$spkID f" >> $data/$target/spk2gender
+ echo "$uttID $audio" >> $data/$target/wav.scp
+fi
+
diff --git a/egs/cmu_cslu_kids/s5/local/cslu_prepare_data.sh b/egs/cmu_cslu_kids/s5/local/cslu_prepare_data.sh
new file mode 100755
index 00000000000..621179079b3
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/cslu_prepare_data.sh
@@ -0,0 +1,49 @@
+#! /bin/bash
+
+# Copyright Johns Hopkins University
+# 2019 Fei Wu
+
+# Prepares cslu_kids
+# Should be run from egs/cmu_csli_kids
+
+set -e
+Looper()
+{
+ # echo "Looping through $1"
+ for f in $1/*; do
+ if [ -d $f ]; then
+ Looper $f
+ else
+ ./local/cslu_aud_prep.sh --data $data --audio $f
+ fi
+ done
+}
+
+data=data/data_cslu
+corpus=cslu
+. ./utils/parse_options.sh
+
+rm -f debug/cslu_dataprep_debug
+mkdir -p debug
+# File check, remove previous data and features files
+for d in $data/test $data/train; do
+ mkdir -p $d
+ ./local/file_check.sh $d
+done
+
+echo "Preparing cslu_kids..."
+Looper $corpus/speech/scripted
+
+for d in $data/test $data/train; do
+ ./utils/utt2spk_to_spk2utt.pl $d
+ ./utils/fix_data_dir.sh $d
+done
+if [ -f debug/cslu_dataprep_debug ]; then
+ echo "Missing transcripts for some utterances. See cslu_dataprep_debug"
+fi
+
+# Optional
+# Get data duration, just for book keeping
+# for data in data/data_cslu/test data/data_cslu/train; do
+# ./local/data_duration.sh $data
+# done
diff --git a/egs/cmu_cslu_kids/s5/local/data_duration.sh b/egs/cmu_cslu_kids/s5/local/data_duration.sh
new file mode 100755
index 00000000000..e838e365ea7
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/data_duration.sh
@@ -0,0 +1,19 @@
+#! /bin/bash
+
+# Copyright Johns Hopkins University
+# 2019 Fei Wu
+
+# Get duration of the utterance given data dir
+set -eu
+echo $0 $@
+
+data_dir=$1
+mkdir -p duration
+
+./utils/data/get_utt2dur.sh $data_dir
+
+echo "$data_dir"
+python local/sum_duration.py $data_dir/utt2dur
+echo ""
+
+
diff --git a/egs/cmu_cslu_kids/s5/local/download_cmu_dict.sh b/egs/cmu_cslu_kids/s5/local/download_cmu_dict.sh
new file mode 100755
index 00000000000..3f58fd23c93
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/download_cmu_dict.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# Copyright 2019 Fei Wu
+set -eu
+# Adapted from the local/prepare_dict script in
+# the librispeech recipe. Download and prepare CMU_dict.
+# For childresn speech ASR tasks, since the vocabulary in cmu_kids and
+# cslu_kids is relatively easy comparing to librispeech, we use only the
+# CMU_dict, and do not handle OOV with G2P.
+# Should be run from egs/cmu_cslu_kids.
+# Usage:
+# local/download_cmu_dict.sh --dict_dir
+
+dict_dir=data/local/dict
+OOV=""
+
+. ./utils/parse_options.sh || exit 1;
+. ./path.sh || exit 1
+
+if [ ! -d $dict_dir ]; then
+ echo "Downloading and preparing CMU dict"
+ svn co -r 12440 https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict $dict_dir/raw_dict || exit 1;
+
+ echo "Removing the pronunciation variant markers ..."
+ grep -v ';;;' $dict_dir/raw_dict/cmudict.0.7a | \
+ perl -ane 'if(!m:^;;;:){ s:(\S+)\(\d+\) :$1 :; print; }' | \
+ sort -u > $dict_dir/lexicon.txt || exit 1;
+
+ tr -d '\r' < $dict_dir/raw_dict/cmudict.0.7a.symbols > $dict_dir/nonsilence_phones.txt
+
+ echo "$OOV SIL" >> $dict_dir/lexicon.txt
+
+ echo "SIL" > $dict_dir/silence_phones.txt
+ echo "SPN" >> $dict_dir/silence_phones.txt
+ echo "SIL" > $dict_dir/optional_silence.txt
+
+ rm -rf $dict_dir/raw_dict
+fi
diff --git a/egs/cmu_cslu_kids/s5/local/download_lm.sh b/egs/cmu_cslu_kids/s5/local/download_lm.sh
new file mode 100755
index 00000000000..129ca1edbe3
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/download_lm.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 Vassil Panayotov
+# Apache 2.0
+
+if [ $# -ne "2" ]; then
+ echo "Usage: $0 "
+ echo "e.g.: $0 http://www.openslr.org/resources/11 data/local/lm"
+ exit 1
+fi
+
+base_url=$1
+dst_dir=$2
+
+# given a filename returns the corresponding file size in bytes
+# The switch cases below can be autogenerated by entering the data directory and running:
+# for f in *; do echo "\"$f\") echo \"$(du -b $f | awk '{print $1}')\";;"; done
+function filesize() {
+ case $1 in
+ "3-gram.arpa.gz") echo "759636181";;
+ "3-gram.pruned.1e-7.arpa.gz") echo "34094057";;
+ "3-gram.pruned.3e-7.arpa.gz") echo "13654242";;
+ "4-gram.arpa.gz") echo "1355172078";;
+ "g2p-model-5") echo "20098243";;
+ "librispeech-lexicon.txt") echo "5627653";;
+ "librispeech-lm-corpus.tgz") echo "1803499244";;
+ "librispeech-lm-norm.txt.gz") echo "1507274412";;
+ "librispeech-vocab.txt") echo "1737588";;
+ *) echo "";;
+ esac
+}
+
+function check_and_download () {
+ [[ $# -eq 1 ]] || { echo "check_and_download() expects exactly one argument!"; return 1; }
+ fname=$1
+ echo "Downloading file '$fname' into '$dst_dir'..."
+ expect_size="$(filesize $fname)"
+ [[ ! -z "$expect_size" ]] || { echo "Unknown file size for '$fname'"; return 1; }
+ if [[ -s $dst_dir/$fname ]]; then
+ # In the following statement, the first version works on linux, and the part
+ # after '||' works on Linux.
+ f=$dst_dir/$fname
+ fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f)
+ if [[ "$fsize" -eq "$expect_size" ]]; then
+ echo "'$fname' already exists and appears to be complete"
+ return 0
+ else
+ echo "WARNING: '$fname' exists, but the size is wrong - re-downloading ..."
+ fi
+ fi
+ wget --no-check-certificate -O $dst_dir/$fname $base_url/$fname || {
+ echo "Error while trying to download $fname!"
+ return 1
+ }
+ f=$dst_dir/$fname
+ # In the following statement, the first version works on linux, and the part after '||'
+ # works on Linux.
+ fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f)
+ [[ "$fsize" -eq "$expect_size" ]] || { echo "$fname: file size mismatch!"; return 1; }
+ return 0
+}
+
+mkdir -p $dst_dir
+
+for f in 3-gram.arpa.gz 3-gram.pruned.1e-7.arpa.gz 3-gram.pruned.3e-7.arpa.gz 4-gram.arpa.gz \
+ g2p-model-5 librispeech-lm-corpus.tgz librispeech-vocab.txt librispeech-lexicon.txt; do
+ check_and_download $f || exit 1
+done
+
+cd $dst_dir
+ln -sf 3-gram.pruned.1e-7.arpa.gz lm_tgmed.arpa.gz
+ln -sf 3-gram.pruned.3e-7.arpa.gz lm_tgsmall.arpa.gz
+ln -sf 3-gram.arpa.gz lm_tglarge.arpa.gz
+ln -sf 4-gram.arpa.gz lm_fglarge.arpa.gz
+
+exit 0
diff --git a/egs/cmu_cslu_kids/s5/local/file_check.sh b/egs/cmu_cslu_kids/s5/local/file_check.sh
new file mode 100755
index 00000000000..859f228058a
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/file_check.sh
@@ -0,0 +1,17 @@
+#! /bin/bash
+
+# Copyright Johns Hopkins University
+# 2019 Fei Wu
+
+
+printf "\t File Check in folder: %s.\n" "$1"
+
+WavScp="$1/wav.scp"
+Text="$1/text"
+Utt2Spk="$1/utt2spk"
+Gend="$1/utt2gender"
+Spk2Utt="$1/spk2utt"
+rm -f $WavScp $Text $Utt2Spk $Gend $Spk2Utt
+
+
+
diff --git a/egs/cmu_cslu_kids/s5/local/format_lms.sh b/egs/cmu_cslu_kids/s5/local/format_lms.sh
new file mode 100755
index 00000000000..d1a18bada88
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/format_lms.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 Vassil Panayotov
+# Apache 2.0
+
+# Prepares the test time language model(G) transducers
+# (adapted from wsj/s5/local/wsj_format_data.sh)
+
+. ./path.sh || exit 1;
+
+# begin configuration section
+src_dir=data/lang
+# end configuration section
+
+. utils/parse_options.sh || exit 1;
+
+set -e
+
+if [ $# -ne 1 ]; then
+ echo "Usage: $0 "
+ echo "e.g.: $0 /export/a15/vpanayotov/data/lm"
+ echo ", where:"
+ echo " is the directory in which the language model is stored/downloaded"
+ echo "Options:"
+ echo " --src-dir # source lang directory, default data/lang"
+ exit 1
+fi
+
+lm_dir=$1
+
+if [ ! -d $lm_dir ]; then
+ echo "$0: expected source LM directory $lm_dir to exist"
+ exit 1;
+fi
+if [ ! -f $src_dir/words.txt ]; then
+ echo "$0: expected $src_dir/words.txt to exist."
+ exit 1;
+fi
+
+
+tmpdir=data/local/lm_tmp.$$
+trap "rm -r $tmpdir" EXIT
+
+mkdir -p $tmpdir
+
+for lm_suffix in tgsmall tgmed; do
+ # tglarge is prepared by a separate command, called from run.sh; we don't
+ # want to compile G.fst for tglarge, as it takes a while.
+ test=${src_dir}_test_${lm_suffix}
+ mkdir -p $test
+ cp -r ${src_dir}/* $test
+ gunzip -c $lm_dir/lm_${lm_suffix}.arpa.gz | \
+ arpa2fst --disambig-symbol=#0 \
+ --read-symbol-table=$test/words.txt - $test/G.fst
+ utils/validate_lang.pl --skip-determinization-check $test || exit 1;
+done
+
+echo "Succeeded in formatting data."
+
+exit 0
diff --git a/egs/cmu_cslu_kids/s5/local/make_lm.pl b/egs/cmu_cslu_kids/s5/local/make_lm.pl
new file mode 100755
index 00000000000..80eea5a6198
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/make_lm.pl
@@ -0,0 +1,119 @@
+#!/usr/bin/env perl
+
+# Copyright 2010-2011 Yanmin Qian Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This file takes as input the file wp_gram.txt that comes with the RM
+# distribution, and creates the language model as an acceptor in FST form.
+
+# make_rm_lm.pl wp_gram.txt > G.txt
+
+if (@ARGV != 1) {
+ print "usage: make_rm_lm.pl wp_gram.txt > G.txt\n";
+ exit(0);
+}
+unless (open(IN_FILE, "@ARGV[0]")) {
+ die ("can't open @ARGV[0]");
+}
+
+
+$flag = 0;
+$count_wrd = 0;
+$cnt_ends = 0;
+$init = "";
+
+while ($line = )
+{
+ chop($line); # Return the last char
+
+ $line =~ s/ //g; # Selete all spaces
+
+ if(($line =~ /^>/)) # If line has ">"
+ {
+ if($flag == 0) # Flip flag
+ {
+ $flag = 1;
+ }
+ $line =~ s/>//g; # Delete ">"
+ $hashcnt{$init} = $i;
+ $init = $line;
+ $i = 0;
+ $count_wrd++;
+ @LineArray[$count_wrd - 1] = $init;
+ $hashwrd{$init} = 0;
+ }
+ elsif($flag != 0)
+ {
+
+ $hash{$init}[$i] = $line;
+ $i++;
+ if($line =~ /SENTENCE-END/)
+ {
+ $cnt_ends++;
+ }
+ }
+ else
+ {}
+}
+
+$hashcnt{$init} = $i;
+
+$num = 0;
+$weight = 0;
+$init_wrd = "SENTENCE-END";
+$hashwrd{$init_wrd} = @LineArray;
+for($i = 0; $i < $hashcnt{$init_wrd}; $i++)
+{
+ $weight = -log(1/$hashcnt{$init_wrd});
+ $hashwrd{$hash{$init_wrd}[$i]} = $i + 1;
+ print "0 $hashwrd{$hash{$init_wrd}[$i]} $hash{$init_wrd}[$i] $hash{$init_wrd}[$i] $weight\n";
+}
+$num = $i;
+
+for($i = 0; $i < @LineArray; $i++)
+{
+ if(@LineArray[$i] eq 'SENTENCE-END')
+ {}
+ else
+ {
+ if($hashwrd{@LineArray[$i]} == 0)
+ {
+ $num++;
+ $hashwrd{@LineArray[$i]} = $num;
+ }
+ for($j = 0; $j < $hashcnt{@LineArray[$i]}; $j++)
+ {
+ $weight = -log(1/$hashcnt{@LineArray[$i]});
+ if($hashwrd{$hash{@LineArray[$i]}[$j]} == 0)
+ {
+ $num++;
+ $hashwrd{$hash{@LineArray[$i]}[$j]} = $num;
+ }
+ if($hash{@LineArray[$i]}[$j] eq 'SENTENCE-END')
+ {
+ print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} $weight\n"
+ }
+ else
+ {
+ print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} $hash{@LineArray[$i]}[$j] $hash{@LineArray[$i]}[$j] $weight\n";
+ }
+ }
+ }
+}
+
+print "$hashwrd{$init_wrd} 0\n";
+close(IN_FILE);
+
+
diff --git a/egs/cmu_cslu_kids/s5/local/nnet3/compare_wer.sh b/egs/cmu_cslu_kids/s5/local/nnet3/compare_wer.sh
new file mode 100755
index 00000000000..4888de1f159
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/nnet3/compare_wer.sh
@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+ echo "Usage: $0: [--looped] [--online] [ ... ]"
+ echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+ echo "or (with epoch numbers for discriminative training):"
+ echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+ exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+ include_looped=true
+ shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+ include_online=true
+ shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+# set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+ if [ $# != 1 ]; then
+ echo "compare_wer_general.sh: internal error"
+ exit 1 # exit the program
+ fi
+ dirname=$(echo $1 | cut -d: -f1)
+ epoch=$(echo $1 | cut -s -d: -f2)
+ if [ -z $epoch ]; then
+ epoch_infix=""
+ else
+ used_epochs=true
+ epoch_infix=_epoch${epoch}
+ fi
+}
+
+
+
+echo -n "# System "
+for x in $*; do printf "% 10s" " $(basename $x)"; done
+echo
+
+strings=(
+ "#WER dev_clean_2 (tgsmall) "
+ "#WER dev_clean_2 (tglarge) ")
+
+for n in 0 1; do
+ echo -n "${strings[$n]}"
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2)
+
+ wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ if $include_looped; then
+ echo -n "# [looped:] "
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ fi
+ if $include_online; then
+ echo -n "# [online:] "
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ fi
+done
+
+
+if $used_epochs; then
+ exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+echo -n "# Final train prob "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train acc "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid acc "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo
diff --git a/egs/cmu_cslu_kids/s5/local/nnet3/run_ivector_common.sh b/egs/cmu_cslu_kids/s5/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..4a7d3a8913a
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# This script is called from local/nnet3/run_tdnn.sh and
+# local/chain/run_tdnn.sh (and may eventually be called by more
+# scripts). It contains the common feature preparation and
+# iVector-related parts of the script. See those scripts for examples
+# of usage.
+
+stage=0
+train_set=train
+test_sets="test"
+gmm=tri3b
+
+nnet3_affix=
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
+ if [ ! -f $f ]; then
+ echo "$0: expected file $f to exist"
+ exit 1
+ fi
+done
+
+if [ $stage -le 1 ]; then
+ # Although the nnet will be trained by high resolution data, we still have to
+ # perturb the normal data to get the alignment _sp stands for speed-perturbed
+ echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+ utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+ echo "$0: making MFCC features for low-resolution speed-perturbed data"
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/${train_set}_sp || exit 1;
+ steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1;
+ utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: aligning with the perturbed low-resolution data"
+ steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
+ data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+ # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+ # this shows how you can split across multiple file-systems.
+ echo "$0: creating high-resolution MFCC features"
+ mfccdir=data/${train_set}_sp_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/fs0{1,2}/$USER/kaldi-data/mfcc/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+ fi
+
+ for datadir in ${train_set}_sp ${test_sets}; do
+ utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+ done
+
+ # do volume-perturbation on the training data prior to extracting hires
+ # features; this helps make trained nnets more invariant to test data volume.
+ utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1;
+
+ for datadir in ${train_set}_sp ${test_sets}; do
+ steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+ utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+ done
+fi
+
+if [ $stage -le 4 ]; then
+ echo "$0: computing a subset of data to train the diagonal UBM."
+ # We'll use about a quarter of the data.
+ mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+ temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+ num_utts_total=$(wc -l 2041 combine=-0.47->-0.38 loglike:train/valid[20,31,combined]=(-0.62,-0.38,-0.37/-1.03,-1.03,-1.02) accuracy:train/valid[20,31,combined]=(0.79,0.87,0.87/0.70,0.72,0.72)
+
+# Below, comparing with the chain TDNN system. It's a little better with the
+# small-vocab decoding. Both systems are probably super-badly tuned, and the
+# chain system probably used too many jobs.
+#
+# local/nnet3/compare_wer.sh exp/chain/tdnn1a_sp exp/nnet3/tdnn_lstm1a_sp
+# System tdnn1a_sp tdnn_lstm1a_sp
+#WER dev_clean_2 (tgsmall) 18.43 17.37
+#WER dev_clean_2 (tglarge) 13.15 13.43
+# Final train prob -0.3933
+# Final valid prob -0.9662
+# Final train acc 0.8652
+# Final valid acc 0.7206
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=10
+train_set=train_clean_5
+test_sets=dev_clean_2
+gmm=tri3b
+nnet3_affix=
+
+# The rest are configs specific to this script. Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1a # affix for the TDNN directory name
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# training options
+# training chunk-options
+chunk_width=40,30,20
+chunk_left_context=40
+chunk_right_context=0
+common_egs_dir=
+xent_regularize=0.1
+
+# training options
+srand=0
+remove_egs=true
+reporting_email=
+
+#decode options
+test_online_decoding=true # if true, it will run the last decoding stage.
+
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat < $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ relu-renorm-layer name=tdnn1 dim=520
+ relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1)
+ fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3
+ relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3)
+ relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3)
+ fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3
+ relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3)
+ relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3)
+ fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3
+
+ output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 11 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+ fi
+
+ steps/nnet3/train_rnn.py --stage=$train_stage \
+ --cmd="$decode_cmd" \
+ --feat.online-ivector-dir=$train_ivector_dir \
+ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs=6 \
+ --trainer.deriv-truncate-margin=10 \
+ --trainer.samples-per-iter=20000 \
+ --trainer.optimization.num-jobs-initial=1 \
+ --trainer.optimization.num-jobs-final=2 \
+ --trainer.optimization.initial-effective-lrate=0.0003 \
+ --trainer.optimization.final-effective-lrate=0.00003 \
+ --trainer.optimization.shrink-value=0.99 \
+ --trainer.rnn.num-chunk-per-minibatch=128,64 \
+ --trainer.optimization.momentum=0.5 \
+ --egs.chunk-width=$chunk_width \
+ --egs.chunk-left-context=$chunk_left_context \
+ --egs.chunk-right-context=$chunk_right_context \
+ --egs.chunk-left-context-initial=0 \
+ --egs.chunk-right-context-final=0 \
+ --egs.dir="$common_egs_dir" \
+ --cleanup.remove-egs=$remove_egs \
+ --use-gpu=true \
+ --reporting.email="$reporting_email" \
+ --feat-dir=$train_data_dir \
+ --ali-dir=$ali_dir \
+ --lang=$lang \
+ --dir=$dir || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+ frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+ rm $dir/.error 2>/dev/null || true
+
+ for data in $test_sets; do
+ (
+ nspk=$(wc -l /dev/null || true
+
+ for data in $test_sets; do
+ (
+ nspk=$(wc -l 2041 combine=-0.71->-0.58 loglike:train/valid[20,31,combined]=(-2.78,-0.95,-0.57/-2.94,-1.31,-0.98) accuracy:train/valid[20,31,combined]=(0.48,0.75,0.81/0.45,0.67,0.71)
+
+# local/nnet3/compare_wer.sh --online exp/nnet3/tdnn_lstm1a_sp exp/nnet3/tdnn_lstm1b_sp
+# System tdnn_lstm1a_sp tdnn_lstm1b_sp
+#WER dev_clean_2 (tgsmall) 17.67 17.01
+# [online:] 18.06 17.26
+#WER dev_clean_2 (tglarge) 13.43 12.63
+# [online:] 13.73 12.94
+# Final train prob -0.3660 -0.5680
+# Final valid prob -1.0236 -0.9771
+# Final train acc 0.8737 0.8067
+# Final valid acc 0.7222 0.7144
+
+
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=10
+train_set=train_clean_5
+test_sets=dev_clean_2
+gmm=tri3b
+nnet3_affix=
+
+# The rest are configs specific to this script. Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1b # affix for the TDNN+LSTM directory name
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# training options
+# training chunk-options
+chunk_width=40,30,20
+chunk_left_context=40
+chunk_right_context=0
+common_egs_dir=
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+
+# training options
+srand=0
+remove_egs=true
+reporting_email=
+
+#decode options
+test_online_decoding=true # if true, it will run the last decoding stage.
+
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat < $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ relu-renorm-layer name=tdnn1 dim=520
+ relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1)
+ fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+ relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3)
+ relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3)
+ fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+ relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3)
+ relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3)
+ fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+
+ output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 11 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+ fi
+
+ steps/nnet3/train_rnn.py --stage=$train_stage \
+ --cmd="$decode_cmd" \
+ --feat.online-ivector-dir=$train_ivector_dir \
+ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs=6 \
+ --trainer.deriv-truncate-margin=10 \
+ --trainer.samples-per-iter=20000 \
+ --trainer.optimization.num-jobs-initial=1 \
+ --trainer.optimization.num-jobs-final=2 \
+ --trainer.optimization.initial-effective-lrate=0.0003 \
+ --trainer.optimization.final-effective-lrate=0.00003 \
+ --trainer.optimization.shrink-value=0.99 \
+ --trainer.dropout-schedule="$dropout_schedule" \
+ --trainer.rnn.num-chunk-per-minibatch=128,64 \
+ --trainer.optimization.momentum=0.5 \
+ --egs.chunk-width=$chunk_width \
+ --egs.chunk-left-context=$chunk_left_context \
+ --egs.chunk-right-context=$chunk_right_context \
+ --egs.chunk-left-context-initial=0 \
+ --egs.chunk-right-context-final=0 \
+ --egs.dir="$common_egs_dir" \
+ --cleanup.remove-egs=$remove_egs \
+ --use-gpu=true \
+ --reporting.email="$reporting_email" \
+ --feat-dir=$train_data_dir \
+ --ali-dir=$ali_dir \
+ --lang=$lang \
+ --dir=$dir || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+ frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+ rm $dir/.error 2>/dev/null || true
+
+ for data in $test_sets; do
+ (
+ nspk=$(wc -l /dev/null || true
+
+ for data in $test_sets; do
+ (
+ nspk=$(wc -l 2041 combine=-0.99->-0.81 loglike:train/valid[20,31,combined]=(-1.22,-0.69,-0.61/-1.34,-1.02,-0.91) accuracy:train/valid[20,31,combined]=(0.68,0.779,0.800/0.64,0.70,0.724)
+
+
+
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=10
+train_set=train_clean_5
+test_sets=dev_clean_2
+gmm=tri3b
+nnet3_affix=
+
+# The rest are configs specific to this script. Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1c # affix for the TDNN+LSTM directory name
+train_stage=-10
+get_egs_stage=-10
+decode_iter=
+
+# training options
+# training chunk-options
+chunk_width=40,30,20
+chunk_left_context=40
+chunk_right_context=0
+common_egs_dir=
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+
+# training options
+srand=0
+remove_egs=true
+reporting_email=
+
+#decode options
+test_online_decoding=true # if true, it will run the last decoding stage.
+
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat < $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ relu-batchnorm-layer name=tdnn1 dim=520 $tdnn_opts
+ relu-batchnorm-layer name=tdnn2 dim=520 $tdnn_opts input=Append(-1,0,1)
+ fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+ relu-batchnorm-layer name=tdnn3 dim=520 $tdnn_opts input=Append(-3,0,3)
+ relu-batchnorm-layer name=tdnn4 dim=520 $tdnn_opts input=Append(-3,0,3)
+ fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+ relu-batchnorm-layer name=tdnn5 dim=520 $tdnn_opts input=Append(-3,0,3)
+ relu-batchnorm-layer name=tdnn6 dim=520 $tdnn_opts input=Append(-3,0,3)
+ fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts
+
+ output-layer name=output input=lstm3 $output_opts output-delay=$label_delay dim=$num_targets max-change=1.5
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 11 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+ fi
+
+ steps/nnet3/train_rnn.py --stage=$train_stage \
+ --cmd="$decode_cmd" \
+ --feat.online-ivector-dir=$train_ivector_dir \
+ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs=6 \
+ --trainer.deriv-truncate-margin=10 \
+ --trainer.samples-per-iter=20000 \
+ --trainer.optimization.num-jobs-initial=1 \
+ --trainer.optimization.num-jobs-final=2 \
+ --trainer.optimization.initial-effective-lrate=0.0003 \
+ --trainer.optimization.final-effective-lrate=0.00003 \
+ --trainer.dropout-schedule="$dropout_schedule" \
+ --trainer.rnn.num-chunk-per-minibatch=128,64 \
+ --trainer.optimization.momentum=0.5 \
+ --egs.chunk-width=$chunk_width \
+ --egs.chunk-left-context=$chunk_left_context \
+ --egs.chunk-right-context=$chunk_right_context \
+ --egs.chunk-left-context-initial=0 \
+ --egs.chunk-right-context-final=0 \
+ --egs.dir="$common_egs_dir" \
+ --cleanup.remove-egs=$remove_egs \
+ --use-gpu=true \
+ --reporting.email="$reporting_email" \
+ --feat-dir=$train_data_dir \
+ --ali-dir=$ali_dir \
+ --lang=$lang \
+ --dir=$dir || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+ frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+ rm $dir/.error 2>/dev/null || true
+
+ for data in $test_sets; do
+ (
+ nspk=$(wc -l /dev/null || true
+
+ for data in $test_sets; do
+ (
+ nspk=$(wc -l data/lang/G.fst || exit 1;
+
+# Checking that G is stochastic [note, it wouldn't be for an Arpa]
+fstisstochastic data/lang/G.fst || echo Error: G is not stochastic
+
+# Checking that G.fst is determinizable.
+fstdeterminize data/lang/G.fst /dev/null || echo Error determinizing G.
+
+# Checking that L_disambig.fst is determinizable.
+fstdeterminize data/lang/L_disambig.fst /dev/null || echo Error determinizing L.
+
+# Checking that disambiguated lexicon times G is determinizable
+fsttablecompose data/lang/L_disambig.fst data/lang/G.fst | \
+ fstdeterminize >/dev/null || echo Error
+
+# Checking that LG is stochastic:
+fsttablecompose data/lang/L.fst data/lang/G.fst | \
+ fstisstochastic || echo Error: LG is not stochastic.
+
+# Checking that L_disambig.G is stochastic:
+fsttablecompose data/lang/L_disambig.fst data/lang/G.fst | \
+ fstisstochastic || echo Error: LG is not stochastic.
+
+echo "Succeeded preparing grammar for CMU_kids."
diff --git a/egs/cmu_cslu_kids/s5/local/score.sh b/egs/cmu_cslu_kids/s5/local/score.sh
new file mode 100755
index 00000000000..cb5bbb7277b
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/score.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
+# 2014 Guoguo Chen
+# Apache 2.0
+
+[ -f ./path.sh ] && . ./path.sh
+
+# begin configuration section.
+cmd=run.pl
+stage=0
+decode_mbr=true
+word_ins_penalty=0.0,0.5,1.0
+min_lmwt=7
+max_lmwt=17
+iter=final
+#end configuration section.
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+ echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] "
+ echo " Options:"
+ echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
+ echo " --stage (0|1|2) # start scoring script from part-way through."
+ echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)."
+ echo " --min_lmwt # minumum LM-weight for lattice rescoring "
+ echo " --max_lmwt # maximum LM-weight for lattice rescoring "
+ exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $dir/lat.1.gz $data/text; do
+ [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/scoring/log
+
+cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt
+
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+ $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \
+ lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+ lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
+ lattice-best-path --word-symbol-table=$symtab \
+ ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1;
+done
+
+# Note: the double level of quoting for the sed command
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+ $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \
+ cat $dir/scoring/LMWT.$wip.tra \| \
+ utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \
+ compute-wer --text --mode=present \
+ ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
+done
+
+exit 0;
diff --git a/egs/cmu_cslu_kids/s5/local/sort_result.sh b/egs/cmu_cslu_kids/s5/local/sort_result.sh
new file mode 100755
index 00000000000..aedec9dc344
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/sort_result.sh
@@ -0,0 +1,46 @@
+#! /bin/bash
+
+# Copyright Johns Hopkins University
+# 2019 Fei Wu
+
+# Sorts and reports results in results/results.txt
+# for all models in exp. Expects decode directories
+# to be named as exp//decode* or exp/chain/tdnn*/decode*
+# Should be run from egs/cmu_cslu_kids.
+
+res=${1:-"results/results.txt"}
+exp=exp
+mkdir -p results
+rm -f $res
+
+echo "Sorting results in: "
+echo "# ---------- GMM-HMM Models ----------" >> $res
+for mdl in $exp/mono* $exp/tri*; do
+ echo " $mdl"
+ if [ -d $mdl ];then
+ for dec in $mdl/decode*;do
+ echo " $dec"
+ if [ -d $dec ];then
+ grep WER $dec/wer* | \
+ sort -k2 -n > $dec/WERs
+ head -n 1 $dec/WERs >> $res
+ fi
+ done
+ fi
+done
+
+echo "# ---------- DNN-HMM Models ----------" >> $res
+# DNN results
+for mdl in $exp/chain/tdnn*; do
+ echo " $mdl"
+ for dec in $mdl/decode*; do
+ if [ -d $dec ]; then
+ echo " $dec"
+ grep WER $dec/wer* | \
+ sort -k2 -n > $dec/WERs
+ head -n 1 $dec/WERs >> $res
+ fi
+ done
+done
+
+sed -i "s/:/ /g" $res
diff --git a/egs/cmu_cslu_kids/s5/local/subset_dataset.sh b/egs/cmu_cslu_kids/s5/local/subset_dataset.sh
new file mode 100755
index 00000000000..f8936b64c97
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/subset_dataset.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Copyright 2017 Luminar Technologies, Inc. (author: Daniel Galvez)
+# Apache 2.0
+
+# The following commands were used to generate the mini_librispeech dataset:
+#
+# Note that data generation is random. This could be fixed by
+# providing a seed argument to the shuf program.
+
+if [ "$#" -ne 3 ]; then
+ echo "Usage: $0 "
+ echo "e.g.: $0 /export/a05/dgalvez/LibriSpeech/train-clean-100 \\
+ /export/a05/dgalvez/LibriSpeech/train-clean-5 5"
+ exit 1
+fi
+
+src_dir=$1
+dest_dir=$2
+dest_num_hours=$3
+
+src=$(basename $src_dir)
+dest=$(basename $dest_dir)
+librispeech_dir=$(dirname $src_dir)
+
+# TODO: Possibly improve this to ensure gender balance and speaker
+# balance.
+# TODO: Use actual time values instead of assuming that to make sure we get $dest_num_hours of data
+src_num_hours=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | awk -F'|' '{ print $3 }' | \
+python -c '
+from __future__ import print_function
+from sys import stdin
+minutes_str = stdin.read().split()
+print(int(round(sum([float(minutes) for minutes in minutes_str]) / 60.0)))')
+src_num_chapters=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | \
+ awk -F'|' '{ print $1 }' | sort -u | wc -l)
+mkdir -p data/subset_tmp
+grep "$src" $librispeech_dir/CHAPTERS.TXT | \
+ awk -F'|' '{ print $1 }' | \
+ shuf -n $(((dest_num_hours * src_num_chapters) / src_num_hours)) > \
+ data/subset_tmp/${dest}_chapter_id_list.txt
+
+while read -r chapter_id || [[ -n "$chapter_id" ]]; do
+ chapter_dir=$(find $src_dir/ -mindepth 2 -name "$chapter_id" -type d)
+ speaker_id=$(basename $(dirname $chapter_dir))
+ mkdir -p $dest_dir/$speaker_id/
+ cp -r $chapter_dir $dest_dir/$speaker_id/
+done < data/subset_tmp/${dest}_chapter_id_list.txt
diff --git a/egs/cmu_cslu_kids/s5/local/sum_duration.py b/egs/cmu_cslu_kids/s5/local/sum_duration.py
new file mode 100644
index 00000000000..0af7ba62151
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/sum_duration.py
@@ -0,0 +1,15 @@
+# Sum duration obtained by using
+# utils/data/get_utt2dur.sh
+
+import sys
+file = sys.argv[1]
+sum = 0
+with open(file, 'r') as fp:
+ line = fp.readline()
+ while(line):
+ toks = line.strip().split()
+ sum += float(toks[1])
+ line = fp.readline()
+fp.close()
+h=sum/3600
+sys.stdout.write("%f hour data.\n"%h)
diff --git a/egs/cmu_cslu_kids/s5/local/train_lms.sh b/egs/cmu_cslu_kids/s5/local/train_lms.sh
new file mode 100755
index 00000000000..a5aaf415e44
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/train_lms.sh
@@ -0,0 +1,217 @@
+#!/usr/bin/env bash
+
+# This script trains LMs on the WSJ LM-training data.
+# It requires that you have already run wsj_extend_dict.sh,
+# to get the larger-size dictionary including all of CMUdict
+# plus any OOVs and possible acronyms that we could easily
+# derive pronunciations for.
+
+dict_suffix=
+
+echo "$0 $@" # Print the command line for logging
+. utils/parse_options.sh || exit 1;
+
+dir=data/local/local_lm
+srcdir=data/local/dict${dict_suffix}_larger
+mkdir -p $dir
+. ./path.sh || exit 1; # for KALDI_ROOT
+export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH
+( # First make sure the kaldi_lm toolkit is installed.
+ cd $KALDI_ROOT/tools || exit 1;
+ if [ -d kaldi_lm ]; then
+ echo Not installing the kaldi_lm toolkit since it is already there.
+ else
+ echo Downloading and installing the kaldi_lm tools
+ if [ ! -f kaldi_lm.tar.gz ]; then
+ wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
+ fi
+ tar -xvzf kaldi_lm.tar.gz || exit 1;
+ cd kaldi_lm
+ make || exit 1;
+ echo Done making the kaldi_lm tools
+ fi
+) || exit 1;
+
+
+
+if [ ! -f $srcdir/cleaned.gz -o ! -f $srcdir/lexicon.txt ]; then
+ echo "Expecting files $srcdir/cleaned.gz and $srcdir/lexicon.txt to exist";
+ echo "You need to run local/wsj_extend_dict.sh before running this script."
+ exit 1;
+fi
+
+# Get a wordlist-- keep everything but silence, which should not appear in
+# the LM.
+awk '{print $1}' $srcdir/lexicon.txt | grep -v -w '!SIL' > $dir/wordlist.txt
+
+# Get training data with OOV words (w.r.t. our current vocab) replaced with .
+echo "Getting training data with OOV words replaced with (train_nounk.gz)"
+gunzip -c $srcdir/cleaned.gz | awk -v w=$dir/wordlist.txt \
+ 'BEGIN{while((getline0) v[$1]=1;}
+ {for (i=1;i<=NF;i++) if ($i in v) printf $i" ";else printf " ";print ""}'|sed 's/ $//g' \
+ | gzip -c > $dir/train_nounk.gz
+
+# Get unigram counts (without bos/eos, but this doens't matter here, it's
+# only to get the word-map, which treats them specially & doesn't need their
+# counts).
+# Add a 1-count for each word in word-list by including that in the data,
+# so all words appear.
+gunzip -c $dir/train_nounk.gz | cat - $dir/wordlist.txt | \
+ awk '{ for(x=1;x<=NF;x++) count[$x]++; } END{for(w in count){print count[w], w;}}' | \
+ sort -nr > $dir/unigram.counts
+
+# Get "mapped" words-- a character encoding of the words that makes the common words very short.
+cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map
+
+gunzip -c $dir/train_nounk.gz | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1]=$2;}
+ { for(n=1;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz
+
+# To save disk space, remove the un-mapped training data. We could
+# easily generate it again if needed.
+rm $dir/train_nounk.gz
+
+train_lm.sh --arpa --lmtype 3gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 141.444826
+# 7.8 million N-grams.
+
+prune_lm.sh --arpa 6.0 $dir/3gram-mincount/
+# 1.45 million N-grams.
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 165.394139
+
+train_lm.sh --arpa --lmtype 4gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 126.734180
+# 10.3 million N-grams.
+
+prune_lm.sh --arpa 7.0 $dir/4gram-mincount
+# 1.50 million N-grams
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 155.663757
+
+
+exit 0
+
+### Below here, this script is showing various commands that
+## were run during LM tuning.
+
+train_lm.sh --arpa --lmtype 3gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 141.444826
+# 7.8 million N-grams.
+
+prune_lm.sh --arpa 3.0 $dir/3gram-mincount/
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 156.408740
+# 2.5 million N-grams.
+
+prune_lm.sh --arpa 6.0 $dir/3gram-mincount/
+# 1.45 million N-grams.
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 165.394139
+
+train_lm.sh --arpa --lmtype 4gram-mincount $dir
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 126.734180
+# 10.3 million N-grams.
+
+prune_lm.sh --arpa 3.0 $dir/4gram-mincount
+#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 143.206294
+# 2.6 million N-grams.
+
+prune_lm.sh --arpa 4.0 $dir/4gram-mincount
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 146.927717
+# 2.15 million N-grams.
+
+prune_lm.sh --arpa 5.0 $dir/4gram-mincount
+# 1.86 million N-grams
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 150.162023
+
+prune_lm.sh --arpa 7.0 $dir/4gram-mincount
+# 1.50 million N-grams
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 155.663757
+
+train_lm.sh --arpa --lmtype 3gram $dir
+# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 135.692866
+# 20.0 million N-grams
+
+! which ngram-count \
+ && echo "SRILM tools not installed so not doing the comparison" && exit 1;
+
+#################
+# You could finish the script here if you wanted.
+# Below is to show how to do baselines with SRILM.
+# You'd have to install the SRILM toolkit first.
+
+heldout_sent=10000 # Don't change this if you want result to be comparable with
+ # kaldi_lm results
+sdir=$dir/srilm # in case we want to use SRILM to double-check perplexities.
+mkdir -p $sdir
+gunzip -c $srcdir/cleaned.gz | head -$heldout_sent > $sdir/cleaned.heldout
+gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent > $sdir/cleaned.train
+(echo ""; echo "" ) | cat - $dir/wordlist.txt > $sdir/wordlist.final.s
+
+# 3-gram:
+ngram-count -text $sdir/cleaned.train -order 3 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
+ -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz
+ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/cleaned.heldout # consider -debug 2
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
+#0 zeroprobs, logprob= -491456 ppl= 141.457 ppl1= 177.437
+
+# Trying 4-gram:
+ngram-count -text $sdir/cleaned.train -order 4 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
+ -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o4g.kn.gz
+ngram -order 4 -lm $sdir/srilm.o4g.kn.gz -ppl $sdir/cleaned.heldout
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
+#0 zeroprobs, logprob= -480939 ppl= 127.233 ppl1= 158.822
+
+#3-gram with pruning:
+ngram-count -text $sdir/cleaned.train -order 3 -limit-vocab -vocab $sdir/wordlist.final.s -unk \
+ -prune 0.0000001 -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o3g.pr7.kn.gz
+ngram -lm $sdir/srilm.o3g.pr7.kn.gz -ppl $sdir/cleaned.heldout
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs
+#0 zeroprobs, logprob= -510828 ppl= 171.947 ppl1= 217.616
+# Around 2.25M N-grams.
+# Note: this is closest to the experiment done with "prune_lm.sh --arpa 3.0 $dir/3gram-mincount/"
+# above, which gave 2.5 million N-grams and a perplexity of 156.
+
+# Note: all SRILM experiments above fully discount all singleton 3 and 4-grams.
+# You can use -gt3min=0 and -gt4min=0 to stop this (this will be comparable to
+# the kaldi_lm experiments above without "-mincount".
+
+## From here is how to train with
+# IRSTLM. This is not really working at the moment.
+
+if [ -z $IRSTLM ] ; then
+ export IRSTLM=$KALDI_ROOT/tools/irstlm/
+fi
+export PATH=${PATH}:$IRSTLM/bin
+if ! command -v prune-lm >/dev/null 2>&1 ; then
+ echo "$0: Error: the IRSTLM is not available or compiled" >&2
+ echo "$0: Error: We used to install it by default, but." >&2
+ echo "$0: Error: this is no longer the case." >&2
+ echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
+ echo "$0: Error: and run extras/install_irstlm.sh" >&2
+ exit 1
+fi
+
+idir=$dir/irstlm
+mkdir $idir
+gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent | add-start-end.sh | \
+ gzip -c > $idir/train.gz
+
+dict -i=WSJ.cleaned.irstlm.txt -o=dico -f=y -sort=no
+ cat dico | gawk 'BEGIN{while (getline<"vocab.20k.nooov") v[$1]=1; print "DICTIONARY 0 "length(v);}FNR>1{if ($1 in v)\
+{print $0;}}' > vocab.irstlm.20k
+
+
+build-lm.sh -i "gunzip -c $idir/train.gz" -o $idir/lm_3gram.gz -p yes \
+ -n 3 -s improved-kneser-ney -b yes
+# Testing perplexity with SRILM tools:
+ngram -lm $idir/lm_3gram.gz -ppl $sdir/cleaned.heldout
+#data/local/local_lm/irstlm/lm_3gram.gz: line 162049: warning: non-zero probability for in closed-vocabulary LM
+#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 0 OOVs
+#0 zeroprobs, logprob= -513670 ppl= 175.041 ppl1= 221.599
+
+# Perplexity is very bad (should be ~141, since we used -p option,
+# not 175),
+# but adding -debug 3 to the command line shows that
+# the IRSTLM LM does not seem to sum to one properly, so it seems that
+# it produces an LM that isn't interpretable in the normal way as an ARPA
+# LM.
+
+
+
diff --git a/egs/cmu_cslu_kids/s5/local/vtln.sh b/egs/cmu_cslu_kids/s5/local/vtln.sh
new file mode 100755
index 00000000000..0f3f0d375d3
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/local/vtln.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+# Copyright Johns Hopkins University
+# 2019 Fei Wu
+
+# Run VTLN. This will be run if the vtln option
+# is set to be true in run.sh.
+
+set -eu
+stage=0
+featdir=mfcc/vtln
+data=data
+mdl=exp/tri3
+mdl_vtln=${mdl}_vtln
+vtln_lda=exp/tri4
+vtln_sat=exp/tri5
+
+. ./cmd.sh
+. ./utils/parse_options.sh
+
+mkdir -p $featdir
+
+steps/train_lvtln.sh --cmd "$train_cmd" 1800 9000 $data/train $data/lang $mdl $mdl_vtln
+
+if [ $stage -le 0 ]; then
+ mkdir -p $data/train_vtln
+ cp $data/train/* $data/train_vtln || true
+ cp $mdl_vtln/final.warp $data/train_vtln/spk2warp
+ steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" $data/train_vtln exp/make_mfcc/train_vtln $featdir
+ steps/compute_cmvn_stats.sh $data/train_vtln exp/make_mfcc/train_vtln $featdir
+fi
+
+if [ $stage -le 1 ]; then
+ utils/mkgraph.sh $data/lang_test_tgmed $mdl_vtln $mdl_vtln/graph
+ steps/decode_lvtln.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
+ $mdl_vtln/graph $data/test $mdl_vtln/decode
+fi
+
+if [ $stage -le 2 ]; then
+ mkdir -p $data/test_vtln
+ cp $data/test/* $data/test_vtln || true
+ cp $mdl_vtln/decode/final.warp $data/test_vtln/spk2warp
+ steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" $data/test_vtln exp/make_mfcc/test_vtln $featdir
+ steps/compute_cmvn_stats.sh $data/test_vtln exp/make_mfcc/test_vtln $featdir
+fi
+
+if [ $stage -le 3 ]; then
+ steps/train_lda_mllt.sh --cmd "$train_cmd" --splice-opts "--left-context=3 --right-context=3" 1800 9000 \
+ $data/train_vtln $data/lang $mdl_vtln $vtln_lda
+ utils/mkgraph.sh $data/lang_test_tgmed $vtln_lda $vtln_lda/graph
+ echo "$mdl_vtln + lda + mllt" > $vtln_lda/mcodel_discription
+ steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
+ $vtln_lda/graph $data/test_vtln $vtln_lda/decode
+fi
+
+if [ $stage -le 4 ]; then
+ steps/train_sat.sh 1800 9000 $data/train_vtln $data/lang $vtln_lda $vtln_sat
+ utils/mkgraph.sh $data/lang_test_tgmed $vtln_sat $vtln_sat/graph
+ steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" $vtln_sat/graph $data/test_vtln $vtln_sat/decode
+ echo "$mdl_vtln + lda + mllt + SAT" > $vtln_sat/model_discription
+fi
diff --git a/egs/cmu_cslu_kids/s5/path.sh b/egs/cmu_cslu_kids/s5/path.sh
new file mode 100755
index 00000000000..2d17b17a84a
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/cmu_cslu_kids/s5/run.sh b/egs/cmu_cslu_kids/s5/run.sh
new file mode 100755
index 00000000000..4226201989b
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/run.sh
@@ -0,0 +1,179 @@
+#! /bin/bash
+
+# Copyright Johns Hopkins University
+# 2019 Fei Wu
+
+set -eo
+
+stage=0
+cmu_kids= # path to cmu_kids corpus
+cslu_kids= # path to cslu_kids corpus
+lm_src= # path of existing librispeech lm
+extra_features=false # Extra features for GMM model (MMI, boosting and MPE)
+vtln=false # Optional, run VLTN on gmm and tdnnf models if set true
+email= # Reporting email for tdnn-f training
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+lm_url=www.openslr.org/resources/11
+mkdir -p data
+mkdir -p data/local
+
+# Prepare data
+if [ $stage -le 0 ]; then
+ # Make soft link to the corpora
+ if [ ! -e cmu_kids ]; then
+ if [ ! -d $cmu_kids/kids ]; then echo "ERROR: Expected to find a directory called 'kids' in $cmu_kids. Exiting." && exit 1; fi
+ ln -sf $cmu_kids cmu_kids
+ fi
+ if [ ! -e cslu ]; then
+ if [ ! -d $cslu_kids/speech ]; then echo "ERROR: Expected to find a directory called 'speech' in $cslu_kids. Exiting." && exit 1; fi
+ ln -sf $cslu_kids cslu
+ fi
+
+ # Make softlink to lm, if lm_src provided
+ if [ ! -z "$lm_src" ] && [ ! -e data/local/lm ] ; then
+ ln -sf $lm_src data/local/lm
+ fi
+
+ # Remove old data dirs
+ rm -rf data/data_cmu
+ rm -rf data/data_cslu
+
+ # Data Prep
+ ./local/cmu_prepare_data.sh --corpus cmu_kids/kids --data data/data_cmu
+ ./local/cslu_prepare_data.sh --corpus cslu --data data/data_cslu
+fi
+
+# Combine data
+if [ $stage -le 1 ]; then
+ mkdir -p data/train
+ mkdir -p data/test
+ rm -rf data/train/*
+ rm -rf data/test/*
+ ./utils/combine_data.sh data/train data/data_cmu/train data/data_cslu/train
+ ./utils/combine_data.sh data/test data/data_cmu/test data/data_cslu/test
+fi
+
+# LM, WFST Preparation
+if [ $stage -le 2 ]; then
+ if [ ! -d data/local/dict ]; then
+ ./local/download_cmu_dict.sh
+ fi
+
+ if [ ! -e data/local/lm ]; then
+ echo "lm_src not provided. Downloading lm from openslr."
+ ./local/download_lm.sh $lm_url data/local/lm
+ fi
+
+ utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang
+ local/format_lms.sh --src_dir data/lang data/local/lm
+
+ # Create ConstArpaLm format language model for full 3-gram and 4-gram LMs
+ utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge
+ utils/build_const_arpa_lm.sh data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge
+fi
+
+# Make MFCC features
+if [ $stage -le 3 ]; then
+ mkdir -p mfcc
+ mkdir -p exp
+ steps/make_mfcc.sh --nj 40 --cmd "$train_cmd" data/test exp/make_feat/test mfcc
+ steps/compute_cmvn_stats.sh data/test exp/make_feat/test mfcc
+ steps/make_mfcc.sh --nj 40 --cmd "$train_cmd" data/train exp/make_feat/train mfcc
+ steps/compute_cmvn_stats.sh data/train exp/make_feat/train mfcc
+fi
+
+# Mono-phone
+if [ $stage -le 4 ]; then
+ # Train
+ steps/train_mono.sh --nj 40 --cmd "$train_cmd" data/train data/lang exp/mono
+ #Decode
+ utils/mkgraph.sh data/lang_test_tgsmall exp/mono exp/mono/graph
+ steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/mono/graph data/test exp/mono/decode
+ #Align
+ steps/align_si.sh --nj 20 --cmd "$train_cmd" data/train data/lang exp/mono exp/mono_ali
+fi
+
+# Tri1 [Vanilla tri phone model]
+if [ $stage -le 5 ]; then
+ # Train
+ steps/train_deltas.sh --cmd "$train_cmd" 1800 9000 data/train data/lang exp/mono_ali exp/tri1
+ # Decode
+ utils/mkgraph.sh data/lang_test_tgmed exp/tri1 exp/tri1/graph
+ steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri1/graph data/test exp/tri1/decode
+ # Align - make graph - decode again
+ steps/align_si.sh --nj 20 --cmd "$train_cmd" --use-graphs true data/train data/lang_test_tgmed exp/tri1 exp/tri1_ali
+ utils/mkgraph.sh data/lang_test_tgmed exp/tri1_ali exp/tri1_ali/graph
+ steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri1_ali/graph data/test exp/tri1_ali/decode
+fi
+
+# Add LDA and MLLT
+if [ $stage -le 6 ]; then
+ # Train
+ steps/train_lda_mllt.sh --cmd "$train_cmd" --splice-opts "--left-context=3 --right-context=3" 1800 9000 data/train data/lang exp/tri1_ali exp/tri2
+ utils/mkgraph.sh data/lang_test_tgmed exp/tri2 exp/tri2/graph
+ # Decode
+ steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2/decode
+ # Align - make graph - dcode again
+ steps/align_si.sh --nj 20 --cmd "$train_cmd" --use-graphs true data/train data/lang_test_tgmed exp/tri2 exp/tri2_ali
+ utils/mkgraph.sh data/lang_test_tgmed exp/tri2_ali exp/tri2_ali/graph
+ steps/decode_fmllr.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri2_ali/graph data/test exp/tri2_ali/decode
+fi
+
+# Add other features
+if [ $stage -le 7 ]; then
+ if [ $extra_features = true ]; then
+ # Add MMI
+ steps/make_denlats.sh --nj 20 --cmd "$train_cmd" data/train data/lang exp/tri2 exp/tri2_denlats
+ steps/train_mmi.sh data/train data/lang exp/tri2_ali exp/tri2_denlats exp/tri2_mmi
+ steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi/decode_it4
+ steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi/decode_it3
+
+ # Add Boosting
+ steps/train_mmi.sh --boost 0.05 data/train data/lang exp/tri2_ali exp/tri2_denlats exp/tri2_mmi_b0.05
+ steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi_b0.05/decode_it4
+ steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi_b0.05/decode_it3
+
+ # Add MPE
+ steps/train_mpe.sh data/train data/lang exp/tri2_ali exp/tri2_denlats exp/tri2_mpe
+ steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mpe/decode_it4
+ steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mpe/decode_it3
+ fi
+fi
+
+# Add SAT
+if [ $stage -le 8 ]; then
+ # Do LDA+MLLT+SAT, and decode.
+ steps/train_sat.sh 1800 9000 data/train data/lang exp/tri2_ali exp/tri3
+ utils/mkgraph.sh data/lang_test_tgmed exp/tri3 exp/tri3/graph
+ steps/decode_fmllr.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri3/graph data/test exp/tri3/decode
+fi
+
+if [ $stage -le 9 ]; then
+ # Align all data with LDA+MLLT+SAT system (tri3)
+ steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" --use-graphs true data/train data/lang_test_tgmed exp/tri3 exp/tri3_ali
+ utils/mkgraph.sh data/lang_test_tgmed exp/tri3_ali exp/tri3_ali/graph
+ steps/decode_fmllr.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri3_ali/graph data/test exp/tri3_ali/decode
+fi
+
+if [ $stage -le 10 ]; then
+ # Uncomment reporting email option to get training progress updates by email
+ ./local/chain/run_tdnnf.sh --train_set train \
+ --test_sets test --gmm tri3 # --reporting_email $email
+fi
+
+
+# Optional VTLN. Run if vtln is set to true
+if [ $stage -le 11 ]; then
+ if [ $vtln = true ]; then
+ ./local/vtln.sh
+ ./local/chain/run_tdnnf.sh --nnet3_affix vtln --train_set train_vtln \
+ --test_sets test_vtln --gmm tri5 # --reporting_email $email
+ fi
+fi
+
+# Collect and resport WER results for all models
+./local/sort_result.sh
diff --git a/egs/cmu_cslu_kids/s5/steps b/egs/cmu_cslu_kids/s5/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/cmu_cslu_kids/s5/utils b/egs/cmu_cslu_kids/s5/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/cmu_cslu_kids/s5/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/cnceleb/README.txt b/egs/cnceleb/README.txt
new file mode 100644
index 00000000000..db8789839a9
--- /dev/null
+++ b/egs/cnceleb/README.txt
@@ -0,0 +1,9 @@
+
+This directory contains example scripts for CN-Celeb speaker
+verification. The CN-Celeb corpus is required, and can be
+downloaded from Openslr http://www.openslr.org/82/ or from
+CSLT@Tsinghua http://cslt.riit.tsinghua.edu.cn/~data/CN-Celeb/
+
+The subdirectories "v1" and so on are different speaker recognition
+recipes. The recipe in v1 demonstrates a standard approach using a
+full-covariance GMM-UBM, iVectors, and a PLDA backend.
diff --git a/egs/cnceleb/v1/README.txt b/egs/cnceleb/v1/README.txt
new file mode 100644
index 00000000000..dc5086f0b7a
--- /dev/null
+++ b/egs/cnceleb/v1/README.txt
@@ -0,0 +1,4 @@
+
+ This example demonstrates a traditional iVector system based on
+ CN-Celeb dataset.
+
diff --git a/egs/cnceleb/v1/cmd.sh b/egs/cnceleb/v1/cmd.sh
new file mode 100755
index 00000000000..d1ca1a6d126
--- /dev/null
+++ b/egs/cnceleb/v1/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine). queue.pl works with GridEngine (qsub). slurm.pl works
+# with slurm. Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration. Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 4G"
+
+
diff --git a/egs/cnceleb/v1/conf/mfcc.conf b/egs/cnceleb/v1/conf/mfcc.conf
new file mode 100644
index 00000000000..649cffb9de8
--- /dev/null
+++ b/egs/cnceleb/v1/conf/mfcc.conf
@@ -0,0 +1,7 @@
+--sample-frequency=16000
+--frame-length=25 # the default is 25
+--low-freq=20 # the default.
+--high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case).
+--num-mel-bins=30
+--num-ceps=24
+--snip-edges=false
diff --git a/egs/cnceleb/v1/conf/vad.conf b/egs/cnceleb/v1/conf/vad.conf
new file mode 100644
index 00000000000..a0ca2449b10
--- /dev/null
+++ b/egs/cnceleb/v1/conf/vad.conf
@@ -0,0 +1,2 @@
+--vad-energy-threshold=5.5
+--vad-energy-mean-scale=0.5
diff --git a/egs/cnceleb/v1/local/make_cnceleb.sh b/egs/cnceleb/v1/local/make_cnceleb.sh
new file mode 100755
index 00000000000..620c0dfe76a
--- /dev/null
+++ b/egs/cnceleb/v1/local/make_cnceleb.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# Copyright 2017 Ignacio Viñals
+# 2017-2018 David Snyder
+# 2019 Jiawen Kang
+#
+# This script prepares the CN-Celeb dataset. It creates separate directories
+# for train, eval enroll and eval test. It also prepares a trials files, in the eval test directory.
+
+if [ $# != 2 ]; then
+ echo "Usage: make_cnceleb.sh "
+ echo "E.g.: make_cnceleb.sh /export/corpora/CN-Celeb data"
+ exit 1
+fi
+
+in_dir=$1
+out_dir=$2
+
+# Prepare the development data
+this_out_dir=${out_dir}/train
+mkdir -p $this_out_dir 2>/dev/null
+WAVFILE=$this_out_dir/wav.scp
+SPKFILE=$this_out_dir/utt2spk
+rm $WAVFILE $SPKFILE 2>/dev/null
+this_in_dir=${in_dir}/dev
+
+for spkr_id in `cat $this_in_dir/dev.lst`; do
+ for f in $in_dir/data/$spkr_id/*.wav; do
+ wav_id=$(basename $f | sed s:.wav$::)
+ echo "${spkr_id}-${wav_id} $f" >> $WAVFILE
+ echo "${spkr_id}-${wav_id} ${spkr_id}" >> $SPKFILE
+ done
+done
+utils/fix_data_dir.sh $this_out_dir
+
+# Prepare the evaluation data
+for mode in enroll test; do
+ this_out_dir=${out_dir}/eval_${mode}
+ mkdir -p $this_out_dir 2>/dev/null
+ WAVFILE=$this_out_dir/wav.scp
+ SPKFILE=$this_out_dir/utt2spk
+ rm $WAVFILE $SPKFILE 2>/dev/null
+ this_in_dir=${in_dir}/eval/${mode}
+
+ for f in $this_in_dir/*.wav; do
+ wav_id=$(basename $f | sed s:.wav$::)
+ spkr_id=$(echo ${wav_id} | cut -d "-" -f1)
+ echo "${wav_id} $f" >> $WAVFILE
+ echo "${wav_id} ${spkr_id}" >> $SPKFILE
+ done
+ utils/fix_data_dir.sh $this_out_dir
+done
+
+# Prepare test trials
+this_out_dir=$out_dir/eval_test/trials
+mkdir -p $out_dir/eval_test/trials
+this_in_dir=${in_dir}/eval/lists
+cat $this_in_dir/trials.lst | sed 's@-enroll@@g' | sed 's@test/@@g' | sed 's@.wav@@g' | \
+ awk '{if ($3 == "1")
+ {print $1,$2,"target"}
+ else
+ {print $1,$2,"nontarget"}
+ }'> $this_out_dir/trials.lst
+
diff --git a/egs/cnceleb/v1/path.sh b/egs/cnceleb/v1/path.sh
new file mode 100755
index 00000000000..e50f57c5271
--- /dev/null
+++ b/egs/cnceleb/v1/path.sh
@@ -0,0 +1,5 @@
+export KALDI_ROOT=`pwd`/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/cnceleb/v1/run.sh b/egs/cnceleb/v1/run.sh
new file mode 100755
index 00000000000..0afeddb8ffd
--- /dev/null
+++ b/egs/cnceleb/v1/run.sh
@@ -0,0 +1,133 @@
+#!/usr/bin/env bash
+# Copyright 2017 Johns Hopkins University (Author: Daniel Garcia-Romero)
+# 2017 Johns Hopkins University (Author: Daniel Povey)
+# 2017-2018 David Snyder
+# 2018 Ewald Enzinger
+# 2019 Tsinghua University (Author: Jiawen Kang and Lantian Li)
+# Apache 2.0.
+#
+# This is an i-vector-based recipe for CN-Celeb database.
+# See ../README.txt for more info on data required. The recipe uses
+# CN-Celeb/dev for training the UBM, T matrix and PLDA, and CN-Celeb/eval
+# for evaluation. The results are reported in terms of EER and minDCF,
+# and are inline in the comments below.
+
+. ./cmd.sh
+. ./path.sh
+set -e
+mfccdir=`pwd`/mfcc
+vaddir=`pwd`/mfcc
+
+cnceleb_root=/export/corpora/CN-Celeb
+eval_trails_core=data/eval_test/trials/trials.lst
+
+stage=0
+
+if [ $stage -le 0 ]; then
+ # Prepare the CN-Celeb dataset. The script is used to prepare the development
+ # dataset and evaluation dataset.
+ local/make_cnceleb.sh $cnceleb_root data
+fi
+
+if [ $stage -le 1 ]; then
+ # Make MFCCs and compute the energy-based VAD for each dataset
+ for name in train eval_enroll eval_test; do
+ steps/make_mfcc.sh --write-utt2num-frames true --mfcc-config conf/mfcc.conf --nj 20 --cmd "$train_cmd" \
+ data/${name} exp/make_mfcc $mfccdir
+ utils/fix_data_dir.sh data/${name}
+ sid/compute_vad_decision.sh --nj 20 --cmd "$train_cmd" \
+ data/${name} exp/make_vad $vaddir
+ utils/fix_data_dir.sh data/${name}
+ done
+fi
+
+if [ $stage -le 2 ]; then
+ # Train the UBM
+ sid/train_diag_ubm.sh --cmd "$train_cmd --mem 4G" \
+ --nj 20 --num-threads 8 \
+ data/train 2048 \
+ exp/diag_ubm
+
+ sid/train_full_ubm.sh --cmd "$train_cmd --mem 16G" \
+ --nj 20 --remove-low-count-gaussians false \
+ data/train \
+ exp/diag_ubm exp/full_ubm
+fi
+
+if [ $stage -le 3 ]; then
+ # Train the i-vector extractor.
+ sid/train_ivector_extractor.sh --nj 20 --cmd "$train_cmd --mem 16G" \
+ --ivector-dim 400 --num-iters 5 \
+ exp/full_ubm/final.ubm data/train \
+ exp/extractor
+fi
+
+if [ $stage -le 4 ]; then
+ # Note that there are over one-third of the utterances less than 2 seconds in our training set,
+ # and these short utterances are harmful for PLDA training. Therefore, to improve performance
+ # of PLDA modeling and inference, we will combine the short utterances longer than 5 seconds.
+ utils/data/combine_short_segments.sh --speaker-only true \
+ data/train 5 data/train_comb
+ # Compute the energy-based VAD for train_comb
+ sid/compute_vad_decision.sh --nj 20 --cmd "$train_cmd" \
+ data/train_comb exp/make_vad $vaddir
+ utils/fix_data_dir.sh data/train_comb
+fi
+
+if [ $stage -le 5 ]; then
+ # These i-vectors will be used for mean-subtraction, LDA, and PLDA training.
+ sid/extract_ivectors.sh --cmd "$train_cmd --mem 4G" --nj 20 \
+ exp/extractor data/train_comb \
+ exp/ivectors_train_comb
+
+ # Extract i-vector for eval sets.
+ for name in eval_enroll eval_test; do
+ sid/extract_ivectors.sh --cmd "$train_cmd --mem 4G" --nj 10 \
+ exp/extractor data/$name \
+ exp/ivectors_$name
+ done
+fi
+
+if [ $stage -le 6 ]; then
+ # Compute the mean vector for centering the evaluation i-vectors.
+ $train_cmd exp/ivectors_train_comb/log/compute_mean.log \
+ ivector-mean scp:exp/ivectors_train_comb/ivector.scp \
+ exp/ivectors_train_comb/mean.vec || exit 1;
+
+ # This script uses LDA to decrease the dimensionality prior to PLDA.
+ lda_dim=150
+ $train_cmd exp/ivectors_train_comb/log/lda.log \
+ ivector-compute-lda --total-covariance-factor=0.0 --dim=$lda_dim \
+ "ark:ivector-subtract-global-mean scp:exp/ivectors_train_comb/ivector.scp ark:- |" \
+ ark:data/train_comb/utt2spk exp/ivectors_train_comb/transform.mat || exit 1;
+
+ # Train the PLDA model.
+ $train_cmd exp/ivectors_train_comb/log/plda.log \
+ ivector-compute-plda ark:data/train_comb/spk2utt \
+ "ark:ivector-subtract-global-mean scp:exp/ivectors_train_comb/ivector.scp ark:- | transform-vec exp/ivectors_train_comb/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
+ exp/ivectors_train_comb/plda || exit 1;
+
+fi
+
+if [ $stage -le 7 ]; then
+ # Compute PLDA scores for CN-Celeb eval core trials
+ $train_cmd exp/scores/log/cnceleb_eval_scoring.log \
+ ivector-plda-scoring --normalize-length=true \
+ --num-utts=ark:exp/ivectors_eval_enroll/num_utts.ark \
+ "ivector-copy-plda --smoothing=0.0 exp/ivectors_train_comb/plda - |" \
+ "ark:ivector-mean ark:data/eval_enroll/spk2utt scp:exp/ivectors_eval_enroll/ivector.scp ark:- | ivector-subtract-global-mean exp/ivectors_train_comb/mean.vec ark:- ark:- | transform-vec exp/ivectors_train_comb/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
+ "ark:ivector-subtract-global-mean exp/ivectors_train_comb/mean.vec scp:exp/ivectors_eval_test/ivector.scp ark:- | transform-vec exp/ivectors_train_comb/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
+ "cat '$eval_trails_core' | cut -d\ --fields=1,2 |" exp/scores/cnceleb_eval_scores || exit 1;
+
+ # CN-Celeb Eval Core:
+ # EER: 13.91%
+ # minDCF(p-target=0.01): 0.6530
+ # minDCF(p-target=0.001): 0.7521
+ echo -e "\nCN-Celeb Eval Core:";
+ eer=$(paste $eval_trails_core exp/scores/cnceleb_eval_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
+ mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/cnceleb_eval_scores $eval_trails_core 2> /dev/null`
+ mindcf2=`sid/compute_min_dcf.py --p-target 0.001 exp/scores/cnceleb_eval_scores $eval_trails_core 2> /dev/null`
+ echo "EER: $eer%"
+ echo "minDCF(p-target=0.01): $mindcf1"
+ echo "minDCF(p-target=0.001): $mindcf2"
+fi
diff --git a/egs/cnceleb/v1/sid b/egs/cnceleb/v1/sid
new file mode 120000
index 00000000000..893a12f30c9
--- /dev/null
+++ b/egs/cnceleb/v1/sid
@@ -0,0 +1 @@
+../../sre08/v1/sid
\ No newline at end of file
diff --git a/egs/cnceleb/v1/steps b/egs/cnceleb/v1/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/cnceleb/v1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/cnceleb/v1/utils b/egs/cnceleb/v1/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/cnceleb/v1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/commonvoice/s5/local/chain/compare_wer.sh b/egs/commonvoice/s5/local/chain/compare_wer.sh
index 133b6b5d250..217ec057a1a 100755
--- a/egs/commonvoice/s5/local/chain/compare_wer.sh
+++ b/egs/commonvoice/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copied from egs/mini_librispeech/s5/local/chain/compare_wer.sh (commit 87d95c5efff7da3b6f04e719a96de4204a367f8b)
diff --git a/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh
index d4acd0fed4b..68cb129d9ed 100755
--- a/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Adapted from egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh
diff --git a/egs/commonvoice/s5/local/download_and_untar.sh b/egs/commonvoice/s5/local/download_and_untar.sh
index 5590b36486d..23650b1d475 100755
--- a/egs/commonvoice/s5/local/download_and_untar.sh
+++ b/egs/commonvoice/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# 2017 Luminar Technologies, Inc. (author: Daniel Galvez)
diff --git a/egs/commonvoice/s5/local/nnet3/compare_wer.sh b/egs/commonvoice/s5/local/nnet3/compare_wer.sh
index 17cbe1bbcef..5e3c0b0f9bd 100755
--- a/egs/commonvoice/s5/local/nnet3/compare_wer.sh
+++ b/egs/commonvoice/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copied from egs/mini_librispeech/s5/local/nnet3/compare_wer.sh (commit 87d95c5efff7da3b6f04e719a96de4204a367f8b)
diff --git a/egs/commonvoice/s5/local/nnet3/run_ivector_common.sh b/egs/commonvoice/s5/local/nnet3/run_ivector_common.sh
index 5560120677e..ab43a277218 100755
--- a/egs/commonvoice/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/commonvoice/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Adapted from egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh (commit 92c99ee51caeba4be7c5ab39ea7c1d6100f3d67b)
diff --git a/egs/commonvoice/s5/local/prepare_dict.sh b/egs/commonvoice/s5/local/prepare_dict.sh
index cdfffe42080..670dc972d26 100755
--- a/egs/commonvoice/s5/local/prepare_dict.sh
+++ b/egs/commonvoice/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Vassil Panayotov
# 2017 Ewald Enzinger
diff --git a/egs/commonvoice/s5/local/prepare_lm.sh b/egs/commonvoice/s5/local/prepare_lm.sh
index 8bd689ac73e..86d9df1a996 100755
--- a/egs/commonvoice/s5/local/prepare_lm.sh
+++ b/egs/commonvoice/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Vassil Panayotov
# 2017 Ewald Enzinger
diff --git a/egs/commonvoice/s5/run.sh b/egs/commonvoice/s5/run.sh
index 3e0e46c89f1..6065857ade2 100755
--- a/egs/commonvoice/s5/run.sh
+++ b/egs/commonvoice/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Recipe for Mozilla Common Voice corpus v1
#
diff --git a/egs/csj/s5/local/chain/compare_wer.sh b/egs/csj/s5/local/chain/compare_wer.sh
index d7017a51a71..3076d394f41 100644
--- a/egs/csj/s5/local/chain/compare_wer.sh
+++ b/egs/csj/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn{1a,1b}
diff --git a/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh
index 75ceb80e3e0..b622b9eb262 100755
--- a/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is a basic TDNN experiment.(As the speed_perturbation is done by default,
# the _sp suffix on the directory name is removed.)
diff --git a/egs/csj/s5/local/csj_data_prep.sh b/egs/csj/s5/local/csj_data_prep.sh
index 69e2865e316..b3fe966a4e2 100755
--- a/egs/csj/s5/local/csj_data_prep.sh
+++ b/egs/csj/s5/local/csj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
# 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
diff --git a/egs/csj/s5/local/csj_eval_data_prep.sh b/egs/csj/s5/local/csj_eval_data_prep.sh
index c452ee9f239..18d2fa5422c 100755
--- a/egs/csj/s5/local/csj_eval_data_prep.sh
+++ b/egs/csj/s5/local/csj_eval_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
# 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
diff --git a/egs/csj/s5/local/csj_prepare_dict.sh b/egs/csj/s5/local/csj_prepare_dict.sh
index 5620b7d99af..a2ff0c2dcc7 100755
--- a/egs/csj/s5/local/csj_prepare_dict.sh
+++ b/egs/csj/s5/local/csj_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Making dictionary using CSJ data with morpheme analysis.
# from the one in Kaldi s5 recipe in that it uses lower-case --Arnab (Jan 2013)
diff --git a/egs/csj/s5/local/csj_run_rnnlm.sh b/egs/csj/s5/local/csj_run_rnnlm.sh
index b3cb79478b1..53d50acfcc1 100755
--- a/egs/csj/s5/local/csj_run_rnnlm.sh
+++ b/egs/csj/s5/local/csj_run_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Tokyo Institute of Technology (Authors: Tomohiro Tanaka, Takafumi Moriya and Takahiro Shinozaki)
# 2016 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
diff --git a/egs/csj/s5/local/nnet/run_dnn.sh b/egs/csj/s5/local/nnet/run_dnn.sh
index 54b1da88fd3..d615b55674f 100755
--- a/egs/csj/s5/local/nnet/run_dnn.sh
+++ b/egs/csj/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh b/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh
index 297aed1f486..a405f13095b 100755
--- a/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh
+++ b/egs/csj/s5/local/nnet/run_dnn_tandem_uc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 2016 Modified by Takafumi Moriya at Tokyo Institute of Technology
# for Japanese speech recognition using CSJ.
diff --git a/egs/csj/s5/local/nnet/run_lstm.sh b/egs/csj/s5/local/nnet/run_lstm.sh
index dc0f40dec24..17af4a1ed85 100755
--- a/egs/csj/s5/local/nnet/run_lstm.sh
+++ b/egs/csj/s5/local/nnet/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 2016 Modified by Takafumi Moriya at Tokyo Institute of Technology
# for Japanese speech recognition using CSJ.
diff --git a/egs/csj/s5/local/nnet3/run_ivector_common.sh b/egs/csj/s5/local/nnet3/run_ivector_common.sh
index 9c6b02b6e59..8ea140869fd 100755
--- a/egs/csj/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/csj/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/csj/s5/local/nnet3/run_tdnn.sh b/egs/csj/s5/local/nnet3/run_tdnn.sh
index e656b825517..f14f216d51b 100755
--- a/egs/csj/s5/local/nnet3/run_tdnn.sh
+++ b/egs/csj/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is modified from swbd/s5c/local/nnet3/run_tdnn.sh
# Tomohiro Tanaka 15/05/2016
diff --git a/egs/csj/s5/local/run_mmi.sh b/egs/csj/s5/local/run_mmi.sh
index 1f844f697c3..7f44b89633a 100644
--- a/egs/csj/s5/local/run_mmi.sh
+++ b/egs/csj/s5/local/run_mmi.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. cmd.sh
diff --git a/egs/csj/s5/local/run_sgmm2.sh b/egs/csj/s5/local/run_sgmm2.sh
index c66b43c4f7f..66477a9c7b1 100755
--- a/egs/csj/s5/local/run_sgmm2.sh
+++ b/egs/csj/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
. ./path.sh
diff --git a/egs/csj/s5/local/score_csj.sh b/egs/csj/s5/local/score_csj.sh
index 91cc944e0b5..92e28adf8b3 100755
--- a/egs/csj/s5/local/score_csj.sh
+++ b/egs/csj/s5/local/score_csj.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# Modified by Takafumi Moriya for Japanese speech recognition using CSJ.
diff --git a/egs/csj/s5/run.sh b/egs/csj/s5/run.sh
index aaf2e51313e..dde2197666b 100755
--- a/egs/csj/s5/run.sh
+++ b/egs/csj/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Tokyo Institute of Technology
# (Authors: Takafumi Moriya, Tomohiro Tanaka and Takahiro Shinozaki)
diff --git a/egs/dihard_2018/v1/local/make_dihard_2018_dev.sh b/egs/dihard_2018/v1/local/make_dihard_2018_dev.sh
index cc48e2e792a..161bcd4d5f2 100755
--- a/egs/dihard_2018/v1/local/make_dihard_2018_dev.sh
+++ b/egs/dihard_2018/v1/local/make_dihard_2018_dev.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Zili Huang
# Apache 2.0.
#
diff --git a/egs/dihard_2018/v1/local/make_dihard_2018_eval.sh b/egs/dihard_2018/v1/local/make_dihard_2018_eval.sh
index 0a461c635ec..86bb0c31bb7 100755
--- a/egs/dihard_2018/v1/local/make_dihard_2018_eval.sh
+++ b/egs/dihard_2018/v1/local/make_dihard_2018_eval.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Zili Huang
# Apache 2.0.
#
diff --git a/egs/dihard_2018/v1/local/prepare_feats.sh b/egs/dihard_2018/v1/local/prepare_feats.sh
index 9fa70a2d91e..d9e0c2b49c9 100755
--- a/egs/dihard_2018/v1/local/prepare_feats.sh
+++ b/egs/dihard_2018/v1/local/prepare_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Apache 2.0.
diff --git a/egs/dihard_2018/v1/run.sh b/egs/dihard_2018/v1/run.sh
index eb23ac500cd..be7cc9a6df6 100755
--- a/egs/dihard_2018/v1/run.sh
+++ b/egs/dihard_2018/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2017 Johns Hopkins University (Author: Daniel Povey)
# 2017-2018 David Snyder
diff --git a/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats.sh b/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats.sh
index 4ad2c42d8b9..19d202bc0dd 100755
--- a/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats.sh
+++ b/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Apache 2.0.
diff --git a/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats_for_egs.sh
index 1d8ac6153e7..cf0519aded9 100755
--- a/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats_for_egs.sh
+++ b/egs/dihard_2018/v2/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copied from egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh (commit 3ea534070fd2cccd2e4ee21772132230033022ce).
#
diff --git a/egs/dihard_2018/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/dihard_2018/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh
index 4ee472b1c71..7e39b8a2110 100755
--- a/egs/dihard_2018/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh
+++ b/egs/dihard_2018/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 David Snyder
# 2017 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2017 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/dihard_2018/v2/run.sh b/egs/dihard_2018/v2/run.sh
index 6cd6630a838..542fc0930dd 100755
--- a/egs/dihard_2018/v2/run.sh
+++ b/egs/dihard_2018/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2017 Johns Hopkins University (Author: Daniel Povey)
# 2017-2018 David Snyder
diff --git a/egs/fame/s5/local/fame_data_prep.sh b/egs/fame/s5/local/fame_data_prep.sh
index 11c28c1d130..fb5ae71e4bf 100755
--- a/egs/fame/s5/local/fame_data_prep.sh
+++ b/egs/fame/s5/local/fame_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Copyright 2016 Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/s5/local/fame_dict_prep.sh b/egs/fame/s5/local/fame_dict_prep.sh
index 95b5d846e6a..2202b1adf91 100755
--- a/egs/fame/s5/local/fame_dict_prep.sh
+++ b/egs/fame/s5/local/fame_dict_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Copyright 2016 Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/s5/local/nnet/run_dnn.sh b/egs/fame/s5/local/nnet/run_dnn.sh
index ca1efa5e0ac..80c877dc50e 100755
--- a/egs/fame/s5/local/nnet/run_dnn.sh
+++ b/egs/fame/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# Copyright 2016 Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/s5/local/nnet/run_dnn_fbank.sh b/egs/fame/s5/local/nnet/run_dnn_fbank.sh
index a81449ffbcf..9d068c71455 100755
--- a/egs/fame/s5/local/nnet/run_dnn_fbank.sh
+++ b/egs/fame/s5/local/nnet/run_dnn_fbank.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# Copyright 2016 Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/s5/local/wer_hyp_filter b/egs/fame/s5/local/wer_hyp_filter
index 372d1a9c73a..f1fb43d135d 100755
--- a/egs/fame/s5/local/wer_hyp_filter
+++ b/egs/fame/s5/local/wer_hyp_filter
@@ -1,2 +1,4 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
s:::g
+'
diff --git a/egs/fame/s5/local/wer_output_filter b/egs/fame/s5/local/wer_output_filter
index 372d1a9c73a..f1fb43d135d 100755
--- a/egs/fame/s5/local/wer_output_filter
+++ b/egs/fame/s5/local/wer_output_filter
@@ -1,2 +1,4 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
s:::g
+'
diff --git a/egs/fame/s5/local/wer_ref_filter b/egs/fame/s5/local/wer_ref_filter
index 372d1a9c73a..f1fb43d135d 100755
--- a/egs/fame/s5/local/wer_ref_filter
+++ b/egs/fame/s5/local/wer_ref_filter
@@ -1,2 +1,4 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
s:::g
+'
diff --git a/egs/fame/s5/run.sh b/egs/fame/s5/run.sh
index de6fe46b7c4..a8ed7bbd917 100755
--- a/egs/fame/s5/run.sh
+++ b/egs/fame/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
. ./path.sh
diff --git a/egs/fame/v1/local/dnn/run_nnet2_common.sh b/egs/fame/v1/local/dnn/run_nnet2_common.sh
index df5804d7d78..54223c76d71 100755
--- a/egs/fame/v1/local/dnn/run_nnet2_common.sh
+++ b/egs/fame/v1/local/dnn/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Make the features.
diff --git a/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh b/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh
index bba54c5583f..c9523f0e920 100755
--- a/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh
+++ b/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Radboud University (Author: Emre Yilmaz)
#
# This script is based on run_nnet2_multisplice.sh in
diff --git a/egs/fame/v1/local/dnn/train_dnn.sh b/egs/fame/v1/local/dnn/train_dnn.sh
index 7155f32d6a4..c211feb7173 100755
--- a/egs/fame/v1/local/dnn/train_dnn.sh
+++ b/egs/fame/v1/local/dnn/train_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Radboud University (Author: Emre Yilmaz)
. ./cmd.sh
diff --git a/egs/fame/v1/local/fame_data_prep.sh b/egs/fame/v1/local/fame_data_prep.sh
index bbe30976dd5..996b32fe82d 100755
--- a/egs/fame/v1/local/fame_data_prep.sh
+++ b/egs/fame/v1/local/fame_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Copyright 2017 Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/v1/local/fame_dict_prep.sh b/egs/fame/v1/local/fame_dict_prep.sh
index 122c34c837b..52c1f5d32f4 100755
--- a/egs/fame/v1/local/fame_dict_prep.sh
+++ b/egs/fame/v1/local/fame_dict_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Copyright 2016 Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/v1/local/plda_scoring.sh b/egs/fame/v1/local/plda_scoring.sh
index 63d4a4f0d4c..d3e9c0c0e9d 100755
--- a/egs/fame/v1/local/plda_scoring.sh
+++ b/egs/fame/v1/local/plda_scoring.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 David Snyder
# Apache 2.0.
#
diff --git a/egs/fame/v1/local/prepare_train.sh b/egs/fame/v1/local/prepare_train.sh
index 0a3979dd6a7..c1d91b85645 100755
--- a/egs/fame/v1/local/prepare_train.sh
+++ b/egs/fame/v1/local/prepare_train.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Copyright 2017 Radboud University (Author: Emre Yilmaz)
diff --git a/egs/fame/v1/local/scoring_common.sh b/egs/fame/v1/local/scoring_common.sh
index 63950ae5711..0d9f74f6600 100755
--- a/egs/fame/v1/local/scoring_common.sh
+++ b/egs/fame/v1/local/scoring_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 David Snyder
# Copyright 2017 Emre Yilmaz (Adapted)
# Apache 2.0.
diff --git a/egs/fame/v1/run.sh b/egs/fame/v1/run.sh
index 34c425adcf7..8a1c6334e55 100755
--- a/egs/fame/v1/run.sh
+++ b/egs/fame/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 David Snyder
# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2015 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/fame/v2/run.sh b/egs/fame/v2/run.sh
index 43fb5a275de..d3bb35a64f4 100755
--- a/egs/fame/v2/run.sh
+++ b/egs/fame/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 David Snyder
# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2015 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/farsdat/s5/local/farsdat_data_prep.sh b/egs/farsdat/s5/local/farsdat_data_prep.sh
index 0a086c535d2..9c114501537 100755
--- a/egs/farsdat/s5/local/farsdat_data_prep.sh
+++ b/egs/farsdat/s5/local/farsdat_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2014 Univercity of Tehran (Author: Bagher BabaAli)
diff --git a/egs/farsdat/s5/local/farsdat_format_data.sh b/egs/farsdat/s5/local/farsdat_format_data.sh
index 8e565f11fd0..2415f163bf8 100644
--- a/egs/farsdat/s5/local/farsdat_format_data.sh
+++ b/egs/farsdat/s5/local/farsdat_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 (Author: Daniel Povey, Bagher BabaAli)
# Apache 2.0
diff --git a/egs/farsdat/s5/local/farsdat_norm_trans.sh b/egs/farsdat/s5/local/farsdat_norm_trans.sh
index 369843c95a6..7bb53f1aeed 100755
--- a/egs/farsdat/s5/local/farsdat_norm_trans.sh
+++ b/egs/farsdat/s5/local/farsdat_norm_trans.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 University of Tehran (Author: Bagher BabaAli)
# Apache 2.0.
diff --git a/egs/farsdat/s5/local/farsdat_prepare_dict.sh b/egs/farsdat/s5/local/farsdat_prepare_dict.sh
index af03e4f44b3..6d2730b6937 100755
--- a/egs/farsdat/s5/local/farsdat_prepare_dict.sh
+++ b/egs/farsdat/s5/local/farsdat_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 (Authors: Daniel Povey, Bagher BabaAli)
diff --git a/egs/farsdat/s5/local/farsdat_prepare_lm.sh b/egs/farsdat/s5/local/farsdat_prepare_lm.sh
index c04f756d438..6476fa3b168 100755
--- a/egs/farsdat/s5/local/farsdat_prepare_lm.sh
+++ b/egs/farsdat/s5/local/farsdat_prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/farsdat/s5/local/nnet/run_dnn.sh b/egs/farsdat/s5/local/nnet/run_dnn.sh
index a02894a7322..19f49ad6bd2 100755
--- a/egs/farsdat/s5/local/nnet/run_dnn.sh
+++ b/egs/farsdat/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/farsdat/s5/local/score_sclite.sh b/egs/farsdat/s5/local/score_sclite.sh
index 6269f7c494b..90cb91ad33c 100755
--- a/egs/farsdat/s5/local/score_sclite.sh
+++ b/egs/farsdat/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# 2014 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/farsdat/s5/run.sh b/egs/farsdat/s5/run.sh
index 4c3d3c5882b..a08bd8af308 100755
--- a/egs/farsdat/s5/run.sh
+++ b/egs/farsdat/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2014 University of Tehran (Author: Bagher BabaAli)
diff --git a/egs/fisher_callhome_spanish/s5/local/callhome_data_prep.sh b/egs/fisher_callhome_spanish/s5/local/callhome_data_prep.sh
index f61b0fa9519..31202c5a406 100755
--- a/egs/fisher_callhome_spanish/s5/local/callhome_data_prep.sh
+++ b/egs/fisher_callhome_spanish/s5/local/callhome_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2014 Gaurav Kumar. Apache 2.0
# The input is the Callhome Spanish Dataset. (*.sph files)
diff --git a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh
index 7f407552c2e..087f95c5392 100755
--- a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh
+++ b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1g is like 1f but upgrading to a "resnet-style TDNN-F model", i.e.
# with bypass resnet connections, and re-tuned.
diff --git a/egs/fisher_callhome_spanish/s5/local/fsp_create_test_lang.sh b/egs/fisher_callhome_spanish/s5/local/fsp_create_test_lang.sh
index fb765b57e69..195b9f25713 100755
--- a/egs/fisher_callhome_spanish/s5/local/fsp_create_test_lang.sh
+++ b/egs/fisher_callhome_spanish/s5/local/fsp_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Gaurav Kumar. Apache 2.0
#
diff --git a/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh b/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh
index 11d65da3e95..0bcd4abca8b 100755
--- a/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh
+++ b/egs/fisher_callhome_spanish/s5/local/fsp_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2014 Gaurav Kumar. Apache 2.0
# The input is the Fisher Dataset which contains DISC1 and DISC2. (*.sph files)
diff --git a/egs/fisher_callhome_spanish/s5/local/fsp_train_lms.sh b/egs/fisher_callhome_spanish/s5/local/fsp_train_lms.sh
index cebf3b222ab..c839dd16255 100755
--- a/egs/fisher_callhome_spanish/s5/local/fsp_train_lms.sh
+++ b/egs/fisher_callhome_spanish/s5/local/fsp_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Gaurav Kumar. Apache 2.0
# To be run from one level above this directory
diff --git a/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh b/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh
index cc9de4d26c5..4204e94fad9 100755
--- a/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh b/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh
index 3713fe228d6..300d54b0945 100755
--- a/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh
+++ b/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson
# 2017 Hainan Xu
diff --git a/egs/fisher_callhome_spanish/s5/local/run_sgmm2x.sh b/egs/fisher_callhome_spanish/s5/local/run_sgmm2x.sh
index 9148b1f1171..7b9d1cc7ab3 100755
--- a/egs/fisher_callhome_spanish/s5/local/run_sgmm2x.sh
+++ b/egs/fisher_callhome_spanish/s5/local/run_sgmm2x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Gaurav Kumar. Apache 2.0
# This is as run_sgmm2.sh but excluding the "speaker-dependent weights",
diff --git a/egs/fisher_callhome_spanish/s5/local/subset_data_prep.sh b/egs/fisher_callhome_spanish/s5/local/subset_data_prep.sh
index 9f5855d56c4..433201609cb 100755
--- a/egs/fisher_callhome_spanish/s5/local/subset_data_prep.sh
+++ b/egs/fisher_callhome_spanish/s5/local/subset_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2014 Gaurav Kumar. Apache 2.0
# The input is a subset of the dataset in use. (*.sph files)
diff --git a/egs/fisher_callhome_spanish/s5/local/wer_output_filter b/egs/fisher_callhome_spanish/s5/local/wer_output_filter
index 4fce42945b3..2c52ee3fd23 100755
--- a/egs/fisher_callhome_spanish/s5/local/wer_output_filter
+++ b/egs/fisher_callhome_spanish/s5/local/wer_output_filter
@@ -1,5 +1,7 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
s:\[laughter\]::g
s:\[noise\]::g
s:\[oov\]::g
s:::g
+'
diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh
index 6e2752a7b68..c8e6adab40f 100755
--- a/egs/fisher_callhome_spanish/s5/run.sh
+++ b/egs/fisher_callhome_spanish/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2018 Nagendra Goel, Saikiran Valluri Apache 2.0
# Copyright 2014 Gaurav Kumar. Apache 2.0
diff --git a/egs/fisher_english/s5/local/chain/compare_wer_general.sh b/egs/fisher_english/s5/local/chain/compare_wer_general.sh
index 2f724c8ff81..4d80fb90687 100755
--- a/egs/fisher_english/s5/local/chain/compare_wer_general.sh
+++ b/egs/fisher_english/s5/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_{c,d}_sp
diff --git a/egs/fisher_english/s5/local/chain/run_tdnn.sh b/egs/fisher_english/s5/local/chain/run_tdnn.sh
index 1fd0f1fdf3a..1df1a4555c5 100755
--- a/egs/fisher_english/s5/local/chain/run_tdnn.sh
+++ b/egs/fisher_english/s5/local/chain/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# Based on run_tdnn_7b.sh in the fisher swbd recipe
diff --git a/egs/fisher_english/s5/local/fisher_create_test_lang.sh b/egs/fisher_english/s5/local/fisher_create_test_lang.sh
index ac3e16c9c78..3202589a16a 100755
--- a/egs/fisher_english/s5/local/fisher_create_test_lang.sh
+++ b/egs/fisher_english/s5/local/fisher_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script formats ARPA LM into G.fst.
diff --git a/egs/fisher_english/s5/local/fisher_data_prep.sh b/egs/fisher_english/s5/local/fisher_data_prep.sh
index f3ad3c3f5bd..900ee385768 100755
--- a/egs/fisher_english/s5/local/fisher_data_prep.sh
+++ b/egs/fisher_english/s5/local/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
diff --git a/egs/fisher_english/s5/local/fisher_prepare_dict.sh b/egs/fisher_english/s5/local/fisher_prepare_dict.sh
index f52ec61823a..bd6b6f45b67 100755
--- a/egs/fisher_english/s5/local/fisher_prepare_dict.sh
+++ b/egs/fisher_english/s5/local/fisher_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# To be run from one directory above this script.
diff --git a/egs/fisher_english/s5/local/fisher_train_lms.sh b/egs/fisher_english/s5/local/fisher_train_lms.sh
index 881d3ce9466..d1c8b1e9714 100755
--- a/egs/fisher_english/s5/local/fisher_train_lms.sh
+++ b/egs/fisher_english/s5/local/fisher_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
diff --git a/egs/fisher_english/s5/local/fisher_train_lms_pocolm.sh b/egs/fisher_english/s5/local/fisher_train_lms_pocolm.sh
index 906703953a1..570339104f7 100755
--- a/egs/fisher_english/s5/local/fisher_train_lms_pocolm.sh
+++ b/egs/fisher_english/s5/local/fisher_train_lms_pocolm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh b/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh
index 210d0f5646f..8e4218f34ff 100755
--- a/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh
+++ b/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this (local/nnet2/run_6c_gpu.sh) trains a p-norm neural network on top of
diff --git a/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh b/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh
index b203f9638b4..d8c3945a5d9 100755
--- a/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/fisher_english/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# 2017 Vimal Manohar
diff --git a/egs/fisher_english/s5/local/online/run_nnet2.sh b/egs/fisher_english/s5/local/online/run_nnet2.sh
index 2204a16e363..fbd0953f42c 100755
--- a/egs/fisher_english/s5/local/online/run_nnet2.sh
+++ b/egs/fisher_english/s5/local/online/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/fisher_english/s5/local/online/run_nnet2_b.sh b/egs/fisher_english/s5/local/online/run_nnet2_b.sh
index 512f35d8043..2664fe8a718 100755
--- a/egs/fisher_english/s5/local/online/run_nnet2_b.sh
+++ b/egs/fisher_english/s5/local/online/run_nnet2_b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script run_nnet2_b.sh is as run_nnet2.sh but it trains a larger network,
# with 5 instead of 4 hidden layers and p-norm (input,output) dims of
diff --git a/egs/fisher_english/s5/local/online/run_nnet2_common.sh b/egs/fisher_english/s5/local/online/run_nnet2_common.sh
index 5a23e6b32da..af3f9f25695 100755
--- a/egs/fisher_english/s5/local/online/run_nnet2_common.sh
+++ b/egs/fisher_english/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Make the features, build the iVector extractor
diff --git a/egs/fisher_english/s5/local/online/run_nnet2_discriminative.sh b/egs/fisher_english/s5/local/online/run_nnet2_discriminative.sh
index 8c85a989fdd..25d618d2674 100755
--- a/egs/fisher_english/s5/local/online/run_nnet2_discriminative.sh
+++ b/egs/fisher_english/s5/local/online/run_nnet2_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is to be run after run_nnet2.sh
diff --git a/egs/fisher_english/s5/local/online/run_nnet2_multisplice.sh b/egs/fisher_english/s5/local/online/run_nnet2_multisplice.sh
index b5361f2f8d8..c8ad9d8d1de 100755
--- a/egs/fisher_english/s5/local/online/run_nnet2_multisplice.sh
+++ b/egs/fisher_english/s5/local/online/run_nnet2_multisplice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/fisher_english/s5/local/run_data_cleaning.sh b/egs/fisher_english/s5/local/run_data_cleaning.sh
index 68b752ad577..7154da73e27 100755
--- a/egs/fisher_english/s5/local/run_data_cleaning.sh
+++ b/egs/fisher_english/s5/local/run_data_cleaning.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script shows how you can do data-cleaning, and exclude data that has a
diff --git a/egs/fisher_english/s5/local/run_nnet2.sh b/egs/fisher_english/s5/local/run_nnet2.sh
index 359b6c5afdf..6f971129878 100755
--- a/egs/fisher_english/s5/local/run_nnet2.sh
+++ b/egs/fisher_english/s5/local/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This shows what you can potentially run; you'd probably want to pick and choose.
diff --git a/egs/fisher_english/s5/local/run_unk_model.sh b/egs/fisher_english/s5/local/run_unk_model.sh
index 1fe658bda79..1894acf7bdb 100755
--- a/egs/fisher_english/s5/local/run_unk_model.sh
+++ b/egs/fisher_english/s5/local/run_unk_model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Vimal Manohar
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh
index 07636a8b3c8..24c64d84ba9 100644
--- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Vimal Manohar
# Apache 2.0
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
index b1c133942ef..d934db2929d 100755
--- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Vimal Manohar
# Apache 2.0
diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh
index 04244014502..66ff7363a0e 100755
--- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh
+++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Vimal Manohar
# Apache 2.0
diff --git a/egs/fisher_english/s5/local/semisup/run_100k.sh b/egs/fisher_english/s5/local/semisup/run_100k.sh
index 7657e94b7f2..0d93820d865 100644
--- a/egs/fisher_english/s5/local/semisup/run_100k.sh
+++ b/egs/fisher_english/s5/local/semisup/run_100k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Vimal Manohar
# Apache 2.0
diff --git a/egs/fisher_english/s5/local/semisup/run_50k.sh b/egs/fisher_english/s5/local/semisup/run_50k.sh
index c2a5c0db7e7..27fb54d7659 100644
--- a/egs/fisher_english/s5/local/semisup/run_50k.sh
+++ b/egs/fisher_english/s5/local/semisup/run_50k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Vimal Manohar
# Apache 2.0
diff --git a/egs/fisher_english/s5/run.sh b/egs/fisher_english/s5/run.sh
index 67c0d5ce638..256cc7f850f 100755
--- a/egs/fisher_english/s5/run.sh
+++ b/egs/fisher_english/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# It's best to run the commands in this one by one.
diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh
index 66f87c8da8f..ba116ca472e 100755
--- a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# based on run_tdnn_6h.sh
diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh
index c12f604f26b..87a77127a21 100755
--- a/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh
index 543f753bd4e..eea6efea2b9 100755
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh
index efcd1eced4a..8d31a580ea1 100644
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh
index e4a555abfdd..ad6139e909c 100644
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh
index 5650cedca28..52d8e2b7ae1 100755
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh
index 5beb2e74a9a..cab547fc1e4 100644
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2018 Nagendra Kumar Goel,
# Saikiran Valluri, Govivace.Inc - Apache 2.0
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh
index f3cc869e6de..37ab5ba5e4c 100755
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh
index 059a81e15fc..73344f74995 100755
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh
index d86b699d6f6..613c4fde163 100755
--- a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh
+++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/fisher_swbd/s5/local/eval2000_data_prep.sh b/egs/fisher_swbd/s5/local/eval2000_data_prep.sh
index 533b5ecf46e..17e069964ff 100755
--- a/egs/fisher_swbd/s5/local/eval2000_data_prep.sh
+++ b/egs/fisher_swbd/s5/local/eval2000_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Hub-5 Eval 2000 data preparation
# Author: Arnab Ghoshal (Jan 2013)
diff --git a/egs/fisher_swbd/s5/local/fisher_create_test_lang.sh b/egs/fisher_swbd/s5/local/fisher_create_test_lang.sh
index f0926d2ceab..250257e071d 100755
--- a/egs/fisher_swbd/s5/local/fisher_create_test_lang.sh
+++ b/egs/fisher_swbd/s5/local/fisher_create_test_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/fisher_swbd/s5/local/fisher_create_test_lang_fsh.sh b/egs/fisher_swbd/s5/local/fisher_create_test_lang_fsh.sh
index 44291ddace9..e1c174c135f 100755
--- a/egs/fisher_swbd/s5/local/fisher_create_test_lang_fsh.sh
+++ b/egs/fisher_swbd/s5/local/fisher_create_test_lang_fsh.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
if [ -f path.sh ]; then . ./path.sh; fi
diff --git a/egs/fisher_swbd/s5/local/fisher_data_prep.sh b/egs/fisher_swbd/s5/local/fisher_data_prep.sh
index 186f7d7e122..d8cd7bc1ba3 100755
--- a/egs/fisher_swbd/s5/local/fisher_data_prep.sh
+++ b/egs/fisher_swbd/s5/local/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
diff --git a/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh b/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh
index ddc70295e40..3b7148d46ce 100755
--- a/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh
+++ b/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# To be run from one directory above this script.
diff --git a/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh b/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh
index bae19603285..90786b78209 100755
--- a/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh
+++ b/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# To be run from one directory above this script.
diff --git a/egs/fisher_swbd/s5/local/fisher_train_lms.sh b/egs/fisher_swbd/s5/local/fisher_train_lms.sh
index 7df4353338e..3cf89d4c48e 100755
--- a/egs/fisher_swbd/s5/local/fisher_train_lms.sh
+++ b/egs/fisher_swbd/s5/local/fisher_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
diff --git a/egs/fisher_swbd/s5/local/fisher_train_lms_fsh.sh b/egs/fisher_swbd/s5/local/fisher_train_lms_fsh.sh
index c9df2b72f0d..46418100f07 100755
--- a/egs/fisher_swbd/s5/local/fisher_train_lms_fsh.sh
+++ b/egs/fisher_swbd/s5/local/fisher_train_lms_fsh.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
@@ -31,7 +31,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz ||
- wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
+ wget -c http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
diff --git a/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh b/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh
index 01c988709f1..b711d2e9304 100755
--- a/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
set -e
diff --git a/egs/fisher_swbd/s5/local/online/run_nnet2_common.sh b/egs/fisher_swbd/s5/local/online/run_nnet2_common.sh
index ad2a54bd8bf..e7cff4dd606 100755
--- a/egs/fisher_swbd/s5/local/online/run_nnet2_common.sh
+++ b/egs/fisher_swbd/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Make the features, build the iVector extractor
diff --git a/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh b/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh
index 91dc19a1e71..ad1417daf19 100755
--- a/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh
+++ b/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/fisher_swbd/s5/local/rt03_data_prep.sh b/egs/fisher_swbd/s5/local/rt03_data_prep.sh
index f537b88b609..15e03f70498 100755
--- a/egs/fisher_swbd/s5/local/rt03_data_prep.sh
+++ b/egs/fisher_swbd/s5/local/rt03_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# RT-03 data preparation (conversational telephone speech part only)
# Adapted from Arnab Ghoshal's script for Hub-5 Eval 2000 by Peng Qi
diff --git a/egs/fisher_swbd/s5/local/score.sh b/egs/fisher_swbd/s5/local/score.sh
index 189d49119ab..ba8f43d7efd 100755
--- a/egs/fisher_swbd/s5/local/score.sh
+++ b/egs/fisher_swbd/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
orig_args=
diff --git a/egs/fisher_swbd/s5/local/score_basic.sh b/egs/fisher_swbd/s5/local/score_basic.sh
index 6d3ac65c383..18a939d0455 100755
--- a/egs/fisher_swbd/s5/local/score_basic.sh
+++ b/egs/fisher_swbd/s5/local/score_basic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/fisher_swbd/s5/local/score_sclite.sh b/egs/fisher_swbd/s5/local/score_sclite.sh
index c17b3c69c9b..18ad3853255 100755
--- a/egs/fisher_swbd/s5/local/score_sclite.sh
+++ b/egs/fisher_swbd/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/fisher_swbd/s5/local/score_sclite_conf.sh b/egs/fisher_swbd/s5/local/score_sclite_conf.sh
index bb354fd50cf..d6d052ee763 100755
--- a/egs/fisher_swbd/s5/local/score_sclite_conf.sh
+++ b/egs/fisher_swbd/s5/local/score_sclite_conf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/fisher_swbd/s5/local/swbd1_data_download.sh b/egs/fisher_swbd/s5/local/swbd1_data_download.sh
index 04904945472..7f5ea2e13cb 100755
--- a/egs/fisher_swbd/s5/local/swbd1_data_download.sh
+++ b/egs/fisher_swbd/s5/local/swbd1_data_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Switchboard-1 training data preparation customized for Edinburgh
# Author: Arnab Ghoshal (Jan 2013)
@@ -36,7 +36,7 @@ if [ ! -d $SWBD_DIR/transcriptions/swb_ms98_transcriptions ]; then
if [ ! -d swb_ms98_transcriptions ]; then
echo " *** Downloading trascriptions and dictionary ***"
wget http://www.openslr.org/resources/5/switchboard_word_alignments.tar.gz ||
- wget http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
+ wget -c http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
tar -xf switchboard_word_alignments.tar.gz
fi
)
diff --git a/egs/fisher_swbd/s5/local/swbd1_data_prep.sh b/egs/fisher_swbd/s5/local/swbd1_data_prep.sh
index ce5d580eb5a..ee5bfc204aa 100755
--- a/egs/fisher_swbd/s5/local/swbd1_data_prep.sh
+++ b/egs/fisher_swbd/s5/local/swbd1_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Switchboard-1 training data preparation customized for Edinburgh
# Author: Arnab Ghoshal (Jan 2013)
diff --git a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh
index fc951d14fa0..da978a21c87 100755
--- a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh
+++ b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Formatting the Mississippi State dictionary for use in Edinburgh. Differs
# from the one in Kaldi s5 recipe in that it uses lower-case --Arnab (Jan 2013)
diff --git a/egs/fisher_swbd/s5/run.sh b/egs/fisher_swbd/s5/run.sh
index fcb0ac8ebb6..09da3ab7c91 100755
--- a/egs/fisher_swbd/s5/run.sh
+++ b/egs/fisher_swbd/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# It's best to run the commands in this one by one.
. ./cmd.sh
diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh
index 66c5ad3335f..f9ceb667553 100755
--- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on run_tdnn_7h.sh in swbd chain recipe.
diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh
index 1981bb0530d..d27f2884e10 100755
--- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script shows improvement arising from data cleaning.
diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh
index 6fa10344cfc..3e1f7ac7b15 100755
--- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# CER:
# %WER 16.44 [ 35459 / 215718, 4216 ins, 11278 del, 19965 sub ] exp/chain/tdnn_1b_sp/decode_test/cer_10_0.0
diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh
index 1f4b7e12850..2f83db14ad1 100755
--- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# CER:
# 1a: %WER 16.83 [ 36305 / 215718, 4772 ins, 10810 del, 20723 sub ] exp/chain/tdnn_1a_sp/decode_test/cer_9_0.0
diff --git a/egs/formosa/s5/local/nnet3/run_ivector_common.sh b/egs/formosa/s5/local/nnet3/run_ivector_common.sh
index 723589ddd2e..11542e55760 100755
--- a/egs/formosa/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/formosa/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/formosa/s5/local/nnet3/run_tdnn.sh b/egs/formosa/s5/local/nnet3/run_tdnn.sh
index a41d990a9b2..9a0d6a6dbc1 100755
--- a/egs/formosa/s5/local/nnet3/run_tdnn.sh
+++ b/egs/formosa/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on swbd/s5c/local/nnet3/run_tdnn.sh
diff --git a/egs/formosa/s5/local/prepare_data.sh b/egs/formosa/s5/local/prepare_data.sh
index 68f342e1549..1da4eb16776 100755
--- a/egs/formosa/s5/local/prepare_data.sh
+++ b/egs/formosa/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Copyright 2018 Yuan-Fu Liao, National Taipei University of Technology
@@ -26,15 +26,15 @@ if [ -z "$(command -v dos2unix 2>/dev/null)" ]; then
exit 1;
fi
-# have to remvoe previous files to avoid filtering speakers according to cmvn.scp and feats.scp
+# have to remove previous files to avoid filtering speakers according to cmvn.scp and feats.scp
rm -rf data/all data/train data/test data/eval data/local/train
mkdir -p data/all data/train data/test data/eval data/local/train
# make utt2spk, wav.scp and text
-find $train_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $y' \; | dos2unix > data/all/utt2spk
-find $train_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $x' \; | dos2unix > data/all/wav.scp
-find $train_dir -name *.txt -exec sh -c 'x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x' \; | dos2unix > data/all/text
+find -L $train_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $y' \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/all/utt2spk
+find -L $train_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $x' \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/all/wav.scp
+find -L $train_dir -name *.txt -exec sh -c 'x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x' \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/all/text
# fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp,
# duplicate entries and so on). Also, it regenerates the spk2utt from
@@ -51,9 +51,9 @@ echo "cp data/train/text data/local/train/text for language model training"
cat data/train/text | awk '{$1=""}1;' | awk '{$1=$1}1;' > data/local/train/text
# preparing EVAL set.
-find $eval_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $y' \; | dos2unix > data/eval/utt2spk
-find $eval_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $x' \; | dos2unix > data/eval/wav.scp
-find $eval_key_dir -name *.txt -exec sh -c 'x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x' \; | dos2unix > data/eval/text
+find -L $eval_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $y' \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/eval/utt2spk
+find -L $eval_dir -name *.wav -exec sh -c 'x={}; y=$(basename -s .wav $x); printf "%s %s\n" $y $x' \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/eval/wav.scp
+find -L $eval_key_dir -name *.txt -exec sh -c 'x={}; y=$(basename -s .txt $x); printf "%s " $y; cat $x' \; | sed 's/\xe3\x80\x80\|\xc2\xa0//g' | dos2unix > data/eval/text
utils/fix_data_dir.sh data/eval
echo "Data preparation completed."
diff --git a/egs/formosa/s5/local/prepare_dict.sh b/egs/formosa/s5/local/prepare_dict.sh
index 4e580f5f6e8..82f93895919 100755
--- a/egs/formosa/s5/local/prepare_dict.sh
+++ b/egs/formosa/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Copyright 2018 Yuan-Fu Liao, National Taipei University of Technology
diff --git a/egs/formosa/s5/local/prepare_lm.sh b/egs/formosa/s5/local/prepare_lm.sh
index 59fe1529658..b137bb2886f 100755
--- a/egs/formosa/s5/local/prepare_lm.sh
+++ b/egs/formosa/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/formosa/s5/local/run_cleanup_segmentation.sh b/egs/formosa/s5/local/run_cleanup_segmentation.sh
index b72cd89b4d1..1515411969c 100755
--- a/egs/formosa/s5/local/run_cleanup_segmentation.sh
+++ b/egs/formosa/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vimal Manohar
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/formosa/s5/local/score.sh b/egs/formosa/s5/local/score.sh
index a9786169973..d283ceb68dc 100755
--- a/egs/formosa/s5/local/score.sh
+++ b/egs/formosa/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
set -x
diff --git a/egs/formosa/s5/local/train_lms.sh b/egs/formosa/s5/local/train_lms.sh
index efc5b92c573..8c91b351867 100755
--- a/egs/formosa/s5/local/train_lms.sh
+++ b/egs/formosa/s5/local/train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
diff --git a/egs/formosa/s5/run.sh b/egs/formosa/s5/run.sh
index a4d0f2dcd1d..7cf4dbac00e 100755
--- a/egs/formosa/s5/run.sh
+++ b/egs/formosa/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2018, Yuan-Fu Liao, National Taipei University of Technology, yfliao@mail.ntut.edu.tw
#
diff --git a/egs/gale_arabic/s5/local/gale_data_prep_txt.sh b/egs/gale_arabic/s5/local/gale_data_prep_txt.sh
index a95b37ab14f..960a3ced161 100755
--- a/egs/gale_arabic/s5/local/gale_data_prep_txt.sh
+++ b/egs/gale_arabic/s5/local/gale_data_prep_txt.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 QCRI (author: Ahmed Ali)
# Apache 2.0
diff --git a/egs/gale_arabic/s5/local/gale_format_data.sh b/egs/gale_arabic/s5/local/gale_format_data.sh
index 053323dc194..8e86f75b085 100755
--- a/egs/gale_arabic/s5/local/gale_format_data.sh
+++ b/egs/gale_arabic/s5/local/gale_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 QCRI (author: Ahmed Ali)
# Apache 2.0
diff --git a/egs/gale_arabic/s5/local/gale_prep_dict.sh b/egs/gale_arabic/s5/local/gale_prep_dict.sh
index f6fd83378d0..5aa3894272d 100755
--- a/egs/gale_arabic/s5/local/gale_prep_dict.sh
+++ b/egs/gale_arabic/s5/local/gale_prep_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 QCRI (author: Ahmed Ali)
# Apache 2.0
diff --git a/egs/gale_arabic/s5/local/gale_train_lms.sh b/egs/gale_arabic/s5/local/gale_train_lms.sh
index 8f8e715390f..9f91749a0dd 100755
--- a/egs/gale_arabic/s5/local/gale_train_lms.sh
+++ b/egs/gale_arabic/s5/local/gale_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
diff --git a/egs/gale_arabic/s5/local/nnet/run_lstm.sh b/egs/gale_arabic/s5/local/nnet/run_lstm.sh
index aeb2272976b..7969538b3c5 100755
--- a/egs/gale_arabic/s5/local/nnet/run_lstm.sh
+++ b/egs/gale_arabic/s5/local/nnet/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 QCRI (author: Ahmed Ali)
# Apache 2.0
diff --git a/egs/gale_arabic/s5/local/online/run_nnet2.sh b/egs/gale_arabic/s5/local/online/run_nnet2.sh
index afc3166c9eb..4f24df40fd4 100644
--- a/egs/gale_arabic/s5/local/online/run_nnet2.sh
+++ b/egs/gale_arabic/s5/local/online/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vimal Manohar
# This is our online neural net build for Gale system
diff --git a/egs/gale_arabic/s5/local/run_sgmm.sh b/egs/gale_arabic/s5/local/run_sgmm.sh
index a5d32d18038..b0112004aa2 100755
--- a/egs/gale_arabic/s5/local/run_sgmm.sh
+++ b/egs/gale_arabic/s5/local/run_sgmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./path.sh
diff --git a/egs/gale_arabic/s5/local/score.sh b/egs/gale_arabic/s5/local/score.sh
index abd8149a672..332f038c575 100755
--- a/egs/gale_arabic/s5/local/score.sh
+++ b/egs/gale_arabic/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/gale_arabic/s5/local/score_combine.sh b/egs/gale_arabic/s5/local/score_combine.sh
index 65caab06ecc..c4d3c13886a 100755
--- a/egs/gale_arabic/s5/local/score_combine.sh
+++ b/egs/gale_arabic/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Arnab Ghoshal
diff --git a/egs/gale_arabic/s5/local/score_mbr.sh b/egs/gale_arabic/s5/local/score_mbr.sh
index 04b84ccce5a..8c752368906 100755
--- a/egs/gale_arabic/s5/local/score_mbr.sh
+++ b/egs/gale_arabic/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Script for minimum bayes risk decoding.
diff --git a/egs/gale_arabic/s5/local/split_wer.sh b/egs/gale_arabic/s5/local/split_wer.sh
index 26d8a3c6023..dbcc9f03e73 100755
--- a/egs/gale_arabic/s5/local/split_wer.sh
+++ b/egs/gale_arabic/s5/local/split_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Report WER for reports and conversational
# Copyright 2014 QCRI (author: Ahmed Ali)
diff --git a/egs/gale_arabic/s5/local/split_wer_per_corpus.sh b/egs/gale_arabic/s5/local/split_wer_per_corpus.sh
index 53716f809ac..556eb320a0d 100755
--- a/egs/gale_arabic/s5/local/split_wer_per_corpus.sh
+++ b/egs/gale_arabic/s5/local/split_wer_per_corpus.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Report WER for reports and conversational
# Copyright 2014 QCRI (author: Ahmed Ali)
diff --git a/egs/gale_arabic/s5/run.sh b/egs/gale_arabic/s5/run.sh
index 5f20c14c414..7e672d67eb2 100755
--- a/egs/gale_arabic/s5/run.sh
+++ b/egs/gale_arabic/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
diff --git a/egs/gale_arabic/s5b/local/chain/compare_wer.sh b/egs/gale_arabic/s5b/local/chain/compare_wer.sh
index 1a40523355a..ece324c279e 100755
--- a/egs/gale_arabic/s5b/local/chain/compare_wer.sh
+++ b/egs/gale_arabic/s5b/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/gale_arabic/s5b/local/chain/run_chain_common.sh b/egs/gale_arabic/s5b/local/chain/run_chain_common.sh
index da37e148441..710625cf489 100755
--- a/egs/gale_arabic/s5b/local/chain/run_chain_common.sh
+++ b/egs/gale_arabic/s5b/local/chain/run_chain_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script has common stages shared across librispeech chain recipes.
# It generates a new topology in a new lang directory, gets the alignments as
diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh
index bf2e45c9914..346c3f39ccb 100755
--- a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# ./local/chain/compare_wer.sh exp/chain/tdnn_1a_sp
# System tdnn_1a_sp
diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
index deebafc95e4..259e660532d 100755
--- a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#started from tedlium recipe with few edits
diff --git a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh
index a03cc5b2fa3..3732e2e4518 100755
--- a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh
+++ b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/gale_arabic/s5b/local/nnet3/tuning/run_lstm_1a.sh b/egs/gale_arabic/s5b/local/nnet3/tuning/run_lstm_1a.sh
index 7f7b8b3ba56..6e481f2ea7d 100755
--- a/egs/gale_arabic/s5b/local/nnet3/tuning/run_lstm_1a.sh
+++ b/egs/gale_arabic/s5b/local/nnet3/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#started from tedlium recipe with few edits
diff --git a/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh
index 6619df668ef..c624d4e8535 100755
--- a/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# started from tedlium recipe with few edits
diff --git a/egs/gale_arabic/s5b/local/prepare_lm.sh b/egs/gale_arabic/s5b/local/prepare_lm.sh
index 6fdf35f471a..70ad8bc1b76 100755
--- a/egs/gale_arabic/s5b/local/prepare_lm.sh
+++ b/egs/gale_arabic/s5b/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Vassil Panayotov
# 2017 Ewald Enzinger
diff --git a/egs/gale_arabic/s5b/local/score.sh b/egs/gale_arabic/s5b/local/score.sh
index 1d84815fc69..6168f38a929 100755
--- a/egs/gale_arabic/s5b/local/score.sh
+++ b/egs/gale_arabic/s5b/local/score.sh
@@ -1,5 +1,5 @@
-#!/bin/bash
+#!/usr/bin/env bash
steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/gale_arabic/s5b/local/split_wer.sh b/egs/gale_arabic/s5b/local/split_wer.sh
index d83a0f79e8c..c4c323003a3 100755
--- a/egs/gale_arabic/s5b/local/split_wer.sh
+++ b/egs/gale_arabic/s5b/local/split_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Report WER for reports and conversational
# Copyright 2014 QCRI (author: Ahmed Ali)
diff --git a/egs/gale_arabic/s5c/local/chain/compare_wer.sh b/egs/gale_arabic/s5c/local/chain/compare_wer.sh
index 1a40523355a..ece324c279e 100755
--- a/egs/gale_arabic/s5c/local/chain/compare_wer.sh
+++ b/egs/gale_arabic/s5c/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/gale_arabic/s5c/local/chain/run_chain_common.sh b/egs/gale_arabic/s5c/local/chain/run_chain_common.sh
index da37e148441..710625cf489 100755
--- a/egs/gale_arabic/s5c/local/chain/run_chain_common.sh
+++ b/egs/gale_arabic/s5c/local/chain/run_chain_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script has common stages shared across librispeech chain recipes.
# It generates a new topology in a new lang directory, gets the alignments as
diff --git a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh
index bf2e45c9914..346c3f39ccb 100755
--- a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# ./local/chain/compare_wer.sh exp/chain/tdnn_1a_sp
# System tdnn_1a_sp
diff --git a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh
index deebafc95e4..259e660532d 100755
--- a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#started from tedlium recipe with few edits
diff --git a/egs/gale_arabic/s5c/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5c/local/nnet3/run_ivector_common.sh
index a03cc5b2fa3..3732e2e4518 100755
--- a/egs/gale_arabic/s5c/local/nnet3/run_ivector_common.sh
+++ b/egs/gale_arabic/s5c/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/gale_arabic/s5c/local/nnet3/tuning/run_lstm_1a.sh b/egs/gale_arabic/s5c/local/nnet3/tuning/run_lstm_1a.sh
index 7f7b8b3ba56..6e481f2ea7d 100755
--- a/egs/gale_arabic/s5c/local/nnet3/tuning/run_lstm_1a.sh
+++ b/egs/gale_arabic/s5c/local/nnet3/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#started from tedlium recipe with few edits
diff --git a/egs/gale_arabic/s5c/local/nnet3/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5c/local/nnet3/tuning/run_tdnn_1a.sh
index 6619df668ef..c624d4e8535 100755
--- a/egs/gale_arabic/s5c/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/gale_arabic/s5c/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# started from tedlium recipe with few edits
diff --git a/egs/gale_arabic/s5c/local/prepare_lm.sh b/egs/gale_arabic/s5c/local/prepare_lm.sh
index 6fdf35f471a..70ad8bc1b76 100755
--- a/egs/gale_arabic/s5c/local/prepare_lm.sh
+++ b/egs/gale_arabic/s5c/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Vassil Panayotov
# 2017 Ewald Enzinger
diff --git a/egs/gale_arabic/s5c/local/prepare_lm_subword.sh b/egs/gale_arabic/s5c/local/prepare_lm_subword.sh
index a5d5c1d1c94..7ba19c95277 100755
--- a/egs/gale_arabic/s5c/local/prepare_lm_subword.sh
+++ b/egs/gale_arabic/s5c/local/prepare_lm_subword.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Vassil Panayotov
# 2017 Ewald Enzinger
diff --git a/egs/gale_arabic/s5c/local/score.sh b/egs/gale_arabic/s5c/local/score.sh
index 1d84815fc69..6168f38a929 100755
--- a/egs/gale_arabic/s5c/local/score.sh
+++ b/egs/gale_arabic/s5c/local/score.sh
@@ -1,5 +1,5 @@
-#!/bin/bash
+#!/usr/bin/env bash
steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/gale_arabic/s5c/local/split_wer.sh b/egs/gale_arabic/s5c/local/split_wer.sh
index d83a0f79e8c..c4c323003a3 100755
--- a/egs/gale_arabic/s5c/local/split_wer.sh
+++ b/egs/gale_arabic/s5c/local/split_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Report WER for reports and conversational
# Copyright 2014 QCRI (author: Ahmed Ali)
diff --git a/egs/gale_arabic/s5c/local/wer_output_filter b/egs/gale_arabic/s5c/local/wer_output_filter
index fcd40539e7f..0c8b7984f58 100755
--- a/egs/gale_arabic/s5c/local/wer_output_filter
+++ b/egs/gale_arabic/s5c/local/wer_output_filter
@@ -1,4 +1,6 @@
-#!/bin/sed -f
+#!/usr/bin/env bash
+sed '
s/@@ //g
s///g
s///g
+'
diff --git a/egs/gale_arabic/s5d/RESULTS b/egs/gale_arabic/s5d/RESULTS
new file mode 100644
index 00000000000..464b98ae5eb
--- /dev/null
+++ b/egs/gale_arabic/s5d/RESULTS
@@ -0,0 +1,19 @@
+tri1
+%WER 40.91 [ 32272 / 78894, 2147 ins, 7478 del, 22647 sub ] exp/tri1/decode/wer_12_0.5
+tri2b
+%WER 36.68 [ 28936 / 78894, 2752 ins, 5682 del, 20502 sub ] exp/tri2b/decode/wer_13_0.0
+tri3b
+%WER 35.35 [ 27892 / 78894, 2587 ins, 7024 del, 18281 sub ] exp/tri3b/decode/wer_14_0.0
+
+chain for dev set
+%WER 16.60 [ 13094 / 78894, 1314 ins, 2992 del, 8788 sub ] exp/chain/tdnn_1a_sp/decode_dev/wer_9_0.0
+rnnlm-rescoring for dev set
+%WER 15.02 [ 11846 / 78894, 1248 ins, 2836 del, 7762 sub ] exp/chain/tdnn_1a_sp/decode_dev_rnnlm_1e_0.45/wer_9_0.0
+
+chain for test_p2 set
+%WER 14.95 [ 10416 / 69668, 1129 ins, 2593 del, 6694 sub ] exp/chain/tdnn_1a_sp/decode_test_p2/wer_9_0.0
+rnnlm-rescoring for test_p2 set
+%WER 13.51 [ 9413 / 69668, 1059 ins, 2517 del, 5837 sub ] exp/chain/tdnn_1a_sp/decode_test_p2_rnnlm_1e_0.45/wer_9_0.0
+
+rnnlm-rescoring for mt_eval set
+%WER 12.02 [ 10829 / 90112, 1483 ins, 2401 del, 6945 sub ] exp/chain/tdnn_1a_sp/decode_mt_all_rnnlm_1e_0.45/wer_9_0.0
diff --git a/egs/gale_arabic/s5d/cmd.sh b/egs/gale_arabic/s5d/cmd.sh
new file mode 100755
index 00000000000..0a2eda442d6
--- /dev/null
+++ b/egs/gale_arabic/s5d/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine). queue.pl works with GridEngine (qsub). slurm.pl works
+# with slurm. Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration. Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="retry.pl queue.pl"
+export decode_cmd="retry.pl queue.pl"
+export mkgraph_cmd="retry.pl queue.pl --mem 8G"
diff --git a/egs/gale_arabic/s5d/conf/decode.config b/egs/gale_arabic/s5d/conf/decode.config
new file mode 100644
index 00000000000..6f503eab35e
--- /dev/null
+++ b/egs/gale_arabic/s5d/conf/decode.config
@@ -0,0 +1 @@
+link decode_dnn.config
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/conf/mfcc.conf b/egs/gale_arabic/s5d/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/gale_arabic/s5d/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false # only non-default option.
diff --git a/egs/gale_arabic/s5d/conf/mfcc_hires.conf b/egs/gale_arabic/s5d/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..c45f2b691a9
--- /dev/null
+++ b/egs/gale_arabic/s5d/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false # use average of log energy, not energy.
+--sample-frequency=16000
+--num-mel-bins=40
+--num-ceps=40
+--low-freq=40 # low cutoff frequency for mel bins
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
diff --git a/egs/gale_arabic/s5d/conf/online_cmvn.conf b/egs/gale_arabic/s5d/conf/online_cmvn.conf
new file mode 100644
index 00000000000..cbdaf5f281c
--- /dev/null
+++ b/egs/gale_arabic/s5d/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh
diff --git a/egs/gale_arabic/s5d/local/add_to_datadir.py b/egs/gale_arabic/s5d/local/add_to_datadir.py
new file mode 100755
index 00000000000..b41ab42f7c4
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/add_to_datadir.py
@@ -0,0 +1,54 @@
+#!/usr/bin/python
+
+# This script appends utterances dumped out from XML to a Kaldi datadir
+
+import sys, re
+from xml.sax.saxutils import unescape
+
+basename=sys.argv[1]
+outdir = sys.argv[2]
+
+if len(sys.argv) > 3:
+ mer_thresh=float(sys.argv[3])
+else:
+ mer_thresh = None
+
+# open the output files in append mode
+#segments_file = open(outdir + '/segments', 'a')
+#utt2spk_file = open(outdir + '/utt2spk', 'a')
+#text_file = open(outdir + '/text', 'a')
+mgb2_file = open(outdir + '/mgb2', 'a')
+
+for line in sys.stdin:
+
+ m = re.match(r'\w+speaker(\d+)\w+\s+(.*)', line)
+ #print line
+
+ if m:
+
+ spk = int(m.group(1))
+
+ t = m.group(2).split()
+ start = float(t[0])
+ end = float(t[1])
+ mer = float(t[2])
+
+ s = [unescape(w) for w in t[3:]]
+ words = ' '.join(s)
+
+ segId = '%s_spk-%04d_seg-%07d:%07d' % (basename, spk, start*100, end*100)
+ spkId = '%s_spk-%04d' % (basename, spk)
+
+ # only add segments where the Matching Error Rate is below the prescribed threshhold
+ if mer_thresh == None or mer <= mer_thresh:
+#print >> segments_file, '%s %s %.2f %.2f' % (segId, basename, start, end )
+#print >> text_file, '%s %s' % (segId, words)
+#print >> utt2spk_file, '%s %s' % (segId, spkId)
+ print >> mgb2_file, '%s %s %.3f %.3f %s' % (basename, segId, start, end, words)
+
+#segments_file.close()
+#utt2spk_file.close()
+#text_file.close()
+mgb2_file.close()
+
+
diff --git a/egs/gale_arabic/s5d/local/arabic_convert.py b/egs/gale_arabic/s5d/local/arabic_convert.py
new file mode 100755
index 00000000000..83d271bab40
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/arabic_convert.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+
+import sys
+
+def hex_to_decimal(utf8_string):
+ assert(len(utf8_string) == 3)
+ hex_dict = {}
+ char_list = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"]
+ value_list = [0, 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+ for key, value in zip (char_list, value_list):
+ hex_dict[key] = value
+
+ result = 0
+ length = len(utf8_string)
+ for i in range(length):
+ digit = utf8_string[length - 1 - i]
+ result += hex_dict[digit] * (16 ** i)
+
+ return result
+
+def get_unicode_dict():
+ unicode_dict = {}
+ utf8_list = [("621", "'"), ("622", "|"),("623", ">"),
+ ("624", "&"), ("625", "<"),("626", "}"),
+ ("627", "A"), ("628", "b"),("629", "p"),
+ ("62A", "t"), ("62B", "v"),("62C", "j"),
+ ("62D", "H"), ("62E", "x"),("62F", "d"),
+ ("630", "*"), ("631", "r"),("632", "z"),
+ ("633", "s"), ("634", "$"),("635", "S"),
+ ("636", "D"), ("637", "T"),("638", "Z"),
+ ("639", "E"), ("63A", "g"),("640", "_"),
+ ("641", "f"), ("642", "q"),("643", "k"),
+ ("644", "l"), ("645", "m"),("646", "n"),
+ ("647", "h"), ("648", "w"),("649", "Y"),
+ ("64A", "y"), ("64B", "F"),("64C", "N"),
+ ("64D", "K"), ("64E", "a"),("64F", "u"),
+ ("650", "i"), ("651", "~"),("652", "o"),
+ ("670", "`"), ("671", "{"),("67E", "P"),
+ ("686", "J"), ("6A4", "V"),("6AF", "G")]
+
+ for word_pair in utf8_list:
+ utf8 = word_pair[0]
+ char = word_pair[1]
+ unicode_dict[hex_to_decimal(utf8)] = char
+
+ return unicode_dict
+
+
+def convert(word, unicode_dict):
+ word_list = []
+ for char in word:
+ c_unicode = ord(char)
+ if c_unicode in unicode_dict:
+ word_list.append(unicode_dict[c_unicode])
+
+ return "".join(word_list)
+
+def process_arabic_text(arabic_text, unicode_dict):
+ with open(arabic_text, 'r') as file:
+ sentence_list = []
+ is_sentence = False
+ for line in file.readlines():
+#print(line.split()[0], is_sentence, line.split()[0] == "
")
+ if len(line.split()) > 0:
+ if line.split()[0] == "":
+ is_sentence = True
+
+ elif (is_sentence and line.split()[0] != "
"):
+ for word in line.split():
+ if word == '.':
+ # when meet period ".", sentence_list should not be empty (do find sentence ending with two period)
+ if (len(sentence_list) > 0):
+ sentence = " ".join(sentence_list)
+ print(sentence)
+ sentence_list = []
+ elif word[-1] == ".":
+ word = word[:-1]
+ sentence_list.append(word)
+ sentence = " ".join(sentence_list)
+ print(sentence)
+ sentence_list = []
+ else:
+ word = word
+ if word != '':
+ sentence_list.append(word)
+
+ if line.split()[0] == "
":
+ is_sentence = False
+ if (len(sentence_list) > 0):
+ print(" ".join(sentence_list))
+ sentence_list = []
+
+
+
+def main():
+ arabic_text = sys.argv[1]
+ unicode_dict = get_unicode_dict()
+ process_arabic_text(arabic_text, unicode_dict)
+
+if __name__ == "__main__":
+ main()
diff --git a/egs/gale_arabic/s5d/local/bad_segments b/egs/gale_arabic/s5d/local/bad_segments
new file mode 100644
index 00000000000..c3413f0714c
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/bad_segments
@@ -0,0 +1,10 @@
+ARABIYA_FROMIRAQ_ARB_20070302_175801_2326286_2327450
+ARABIYA_BILARABI_ARB_20061005_201400_221375_223694
+LBC_NAHAR_ARB_20060911_142800_3683267_3685290
+LBC_NAHAR_ARB_20070303_145800_3249800_3251128
+LBC_NAHAR_ARB_20070303_145800_3623646_3624152
+LBC_NAHAR_ARB_20070305_035800_481003_484069
+ALAM_WITHEVENT_ARB_20070227_205800_3141876_3144152
+ALAM_NEWSRPT_ARB_20070130_015801_2875054_2876396
+ALJZ_TODHARV_ARB_20060914_155800_2947717_2949041
+ALJZ_TODHARV_ARB_20070107_145800_2417848_2419238
diff --git a/egs/gale_arabic/s5d/local/chain/compare_wer.sh b/egs/gale_arabic/s5d/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..ece324c279e
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/compare_wer.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
+
+# ./local/chain/compare_wer.sh exp/chain/cnn1a
+# System cnn1a
+# WER 0.61
+# CER 0.15
+# Final train prob -0.0377
+# Final valid prob -0.0380
+# Final train prob (xent) -0.0830
+# Final valid prob (xent) -0.0838
+
+if [ $# == 0 ]; then
+ echo "Usage: $0: [ ... ]"
+ echo "e.g.: $0 exp/chain/cnn{1a,1b}"
+ exit 1
+fi
+
+echo "# $0 $*"
+used_epochs=false
+
+echo -n "# System "
+for x in $*; do printf "% 10s" " $(basename $x)"; done
+echo
+
+echo -n "# WER "
+for x in $*; do
+ wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}')
+ printf "% 10s" $wer
+done
+echo
+
+echo -n "# CER "
+for x in $*; do
+ cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
+ printf "% 10s" $cer
+done
+echo
+
+if $used_epochs; then
+ exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+echo -n "# Final train prob "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent) "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent) "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
diff --git a/egs/gale_arabic/s5d/local/chain/run_chain_common.sh b/egs/gale_arabic/s5d/local/chain/run_chain_common.sh
new file mode 100755
index 00000000000..710625cf489
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/run_chain_common.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+
+# this script has common stages shared across librispeech chain recipes.
+# It generates a new topology in a new lang directory, gets the alignments as
+# lattices, and builds a tree for the new topology
+set -e
+
+stage=11
+
+# input directory names. These options are actually compulsory, and they have
+# been named for convenience
+gmm_dir=
+ali_dir=
+lores_train_data_dir=
+
+num_leaves=6000
+
+# output directory names. They are also compulsory.
+lang=
+lat_dir=
+tree_dir=
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+[ -z $lang ] && echo "Set --lang, this specifies the new lang directory which will have the new topology" && exit 1;
+[ -z $lat_dir ] && echo "Set --lat-dir, this specifies the experiment directory to store lattice" && exit 1;
+[ -z $tree_dir ] && echo "Set --tree-dir, this specifies the directory to store new tree " && exit 1;
+
+for f in $gmm_dir/final.mdl $ali_dir/ali.1.gz $lores_train_data_dir/feats.scp; do
+ [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 11 ]; then
+ echo "$0: creating lang directory with one state per phone."
+ # Create a version of the lang/ directory that has one state per phone in the
+ # topo file. [note, it really has two states.. the first one is only repeated
+ # once, the second one has zero or more repeats.]
+ if [ -d $lang ]; then
+ if [ $lang/L.fst -nt data/lang/L.fst ]; then
+ echo "$0: $lang already exists, not overwriting it; continuing"
+ else
+ echo "$0: $lang already exists and seems to be older than data/lang..."
+ echo " ... not sure what to do. Exiting."
+ exit 1;
+ fi
+ else
+ cp -r data/lang $lang
+ silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+ nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+ # Use our special topology... note that later on may have to tune this
+ # topology.
+ steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+ fi
+fi
+
+if [ $stage -le 12 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom).
+ # use the same num-jobs as the alignments
+ nj=$(cat ${ali_dir}/num_jobs) || exit 1;
+ steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
+ $lang $gmm_dir $lat_dir
+ rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 13 ]; then
+ # Build a tree using our new topology. We know we have alignments for the
+ # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+ # those.
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+ --context-opts "--context-width=2 --central-position=1" \
+ --cmd "$train_cmd" $num_leaves ${lores_train_data_dir} $lang $ali_dir $tree_dir
+fi
+
+exit 0;
diff --git a/egs/gale_arabic/s5d/local/chain/run_tdnn.sh b/egs/gale_arabic/s5d/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/local/chain/run_tdnn_lstm.sh b/egs/gale_arabic/s5d/local/chain/run_tdnn_lstm.sh
new file mode 120000
index 00000000000..8e647598556
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/run_tdnn_lstm.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_lstm_1a.sh
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..16e9b928714
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,211 @@
+#!/usr/bin/env bash
+
+set -e -o pipefail
+stage=0
+nj=30
+train_set=train
+test_set=dev
+gmm=tri3b # this is the source gmm-dir that we'll use for alignments; it
+ # should have alignments for the specified training data.
+num_threads_ubm=32
+nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+reporting_email=
+
+# LSTM/chain options
+train_stage=-10
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.5@0.50,0'
+
+# training chunk-options
+chunk_width=150,110,100
+get_egs_stage=-10
+
+# training options
+srand=0
+remove_egs=true
+run_ivector_common=true
+run_chain_common=true
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+ cat < $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+ # the first splicing is moved before the lda layer, so no splicing here
+ relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
+ tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+ tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+ tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+ tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
+ tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf16 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf17 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ linear-component name=prefinal-l dim=256 $linear_opts
+ prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+ output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+ prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 16 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+ fi
+
+ steps/nnet3/chain/train.py --stage $train_stage \
+ --cmd "$decode_cmd" \
+ --feat.online-ivector-dir $train_ivector_dir \
+ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+ --chain.xent-regularize $xent_regularize \
+ --chain.leaky-hmm-coefficient 0.1 \
+ --chain.l2-regularize 0.0 \
+ --chain.apply-deriv-weights false \
+ --chain.lm-opts="--num-extra-lm-states=2000" \
+ --trainer.dropout-schedule $dropout_schedule \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs 6 \
+ --trainer.frames-per-iter 1500000 \
+ --trainer.optimization.num-jobs-initial 3 \
+ --trainer.optimization.num-jobs-final 16 \
+ --trainer.optimization.initial-effective-lrate 0.00025 \
+ --trainer.optimization.final-effective-lrate 0.000025 \
+ --trainer.num-chunk-per-minibatch=64,32 \
+ --trainer.add-option="--optimization.memory-compression-level=2" \
+ --egs.chunk-width=$chunk_width \
+ --egs.dir="$common_egs_dir" \
+ --egs.opts "--frames-overlap-per-eg 0 --constrained false" \
+ --egs.stage $get_egs_stage \
+ --reporting.email="$reporting_email" \
+ --cleanup.remove-egs=$remove_egs \
+ --feat-dir=$train_data_dir \
+ --tree-dir $tree_dir \
+ --lat-dir=$lat_dir \
+ --dir $dir || exit 1;
+
+fi
+
+if [ $stage -le 17 ]; then
+ # The reason we are using data/lang here, instead of $lang, is just to
+ # emphasize that it's not actually important to give mkgraph.sh the
+ # lang directory with the matched topology (since it gets the
+ # topology file from the model). So you could give it a different
+ # lang directory, one that contained a wordlist and LM of your choice,
+ # as long as phones.txt was compatible.
+
+ utils/lang/check_phones_compatible.sh \
+ data/lang_test/phones.txt $lang/phones.txt
+ utils/mkgraph.sh \
+ --self-loop-scale 1.0 data/lang_test_srilm \
+ $tree_dir $tree_dir/graph || exit 1;
+fi
+
+if [ $stage -le 18 ]; then
+ frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+ rm $dir/.error 2>/dev/null || true
+ test_set=mt_all
+
+ steps/nnet3/decode.sh \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --extra-left-context 0 --extra-right-context 0 \
+ --extra-left-context-initial 0 \
+ --extra-right-context-final 0 \
+ --frames-per-chunk $frames_per_chunk \
+ --nj $nj --cmd "$decode_cmd" --num-threads 4 \
+ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test_set}_hires \
+ $tree_dir/graph data/${test_set}_hires ${dir}/decode_${test_set} || exit 1
+fi
diff --git a/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_lstm_1a.sh
new file mode 100755
index 00000000000..4273e83835a
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -0,0 +1,221 @@
+#!/usr/bin/env bash
+
+#started from tedlium recipe with few edits
+
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+decode_nj=30
+min_seg_len=1.55
+chunk_left_context=40
+chunk_right_context=0
+label_delay=5
+xent_regularize=0.025
+train_set=train
+gmm=tri3b # the gmm for the target data gmm for the target data
+num_threads_ubm=32
+nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
+# decode options
+extra_left_context=40
+extra_right_context=0
+frames_per_chunk=150
+
+# The rest are configs specific to this script. Most of the parameters
+# are just hardcoded at this level, in the commands below.
+train_stage=-10
+tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
+tdnn_lstm_affix=1a #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration.
+common_egs_dir= # you can set this to use previously dumped egs.
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+ cat <data/lang_chain/topo
+ fi
+fi
+
+if [ $stage -le 15 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom).
+ # use the same num-jobs as the alignments
+ steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \
+ data/lang $gmm_dir $lat_dir
+ rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 16 ]; then
+ # Build a tree using our new topology. We know we have alignments for the
+ # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+ # those.
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+ --context-opts "--context-width=2 --central-position=1" \
+ --cmd "$train_cmd" 7000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 17 ]; then
+ mkdir -p $dir
+ echo "$0: creating neural net configs using the xconfig parser";
+
+ num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+ learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+
+ mkdir -p $dir/configs
+ cat < $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ # the first splicing is moved before the lda layer, so no splicing here
+ relu-renorm-layer name=tdnn1 dim=1024
+ relu-renorm-layer name=tdnn2 dim=1024 input=Append(-1,0,1)
+ fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
+ relu-renorm-layer name=tdnn3 dim=1024 input=Append(-3,0,3)
+ relu-renorm-layer name=tdnn4 dim=1024 input=Append(-3,0,3)
+ fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
+ relu-renorm-layer name=tdnn5 dim=1024 input=Append(-3,0,3)
+ relu-renorm-layer name=tdnn6 dim=1024 input=Append(-3,0,3)
+ fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
+
+ ## adding the layers for chain branch
+ output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5
+
+ # adding the layers for xent branch
+ # This block prints the configs for a separate output that will be
+ # trained with a cross-entropy objective in the 'chain' models... this
+ # has the effect of regularizing the hidden parts of the model. we use
+ # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+ # 0.5 / args.xent_regularize is suitable as it means the xent
+ # final-layer learns at a rate independent of the regularization
+ # constant; and the 0.5 was tuned so as to make the relative progress
+ # similar in the xent and regular final layers.
+ output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 18 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+ fi
+
+ steps/nnet3/chain/train.py --stage $train_stage \
+ --cmd "$decode_cmd" \
+ --feat.online-ivector-dir $train_ivector_dir \
+ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+ --chain.xent-regularize $xent_regularize \
+ --chain.leaky-hmm-coefficient 0.1 \
+ --chain.l2-regularize 0.00005 \
+ --chain.apply-deriv-weights false \
+ --chain.lm-opts="--num-extra-lm-states=2000" \
+ --egs.dir "$common_egs_dir" \
+ --egs.opts "--frames-overlap-per-eg 0" \
+ --egs.chunk-width "$frames_per_chunk" \
+ --egs.chunk-left-context "$chunk_left_context" \
+ --egs.chunk-right-context "$chunk_right_context" \
+ --trainer.num-chunk-per-minibatch 64,32 \
+ --trainer.frames-per-iter 1500000 \
+ --trainer.max-param-change 2.0 \
+ --trainer.num-epochs 6 \
+ --trainer.deriv-truncate-margin 10 \
+ --trainer.optimization.shrink-value 0.99 \
+ --trainer.optimization.num-jobs-initial 3 \
+ --trainer.optimization.num-jobs-final 12 \
+ --trainer.optimization.initial-effective-lrate 0.001 \
+ --trainer.optimization.final-effective-lrate 0.0001 \
+ --trainer.optimization.momentum 0.0 \
+ --cleanup.remove-egs true \
+ --feat-dir $train_data_dir \
+ --tree-dir $tree_dir \
+ --lat-dir $lat_dir \
+ --dir $dir
+fi
+
+
+
+if [ $stage -le 19 ]; then
+ # Note: it might appear that this data/lang_chain directory is mismatched, and it is as
+ # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+ # the lang directory.
+ utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test_3_no_al $dir $dir/graph
+fi
+
+if [ $stage -le 20 ]; then
+ steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" --stage 3\
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --extra-left-context $extra_left_context \
+ --extra-right-context $extra_right_context \
+ --frames-per-chunk "$frames_per_chunk" \
+ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \
+ --scoring-opts "--min-lmwt 5 --max_lmwt 15" \
+ $dir/graph data/test_hires $dir/decode_bn || exit 1;
+fi
+exit 0
diff --git a/egs/gale_arabic/s5d/local/check_tools.sh b/egs/gale_arabic/s5d/local/check_tools.sh
new file mode 100755
index 00000000000..448a6536946
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/check_tools.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# check whether bs4 and lxml is installed
+if ! python3 -c "import bs4" 2>/dev/null; then
+ echo "$0: BeautifulSoup4 not installed, you can install it by 'pip install beautifulsoup4' if you prefer to use python to process xml file"
+ exit 1;
+fi
+
+if ! python3 -c "import lxml" 2>/dev/null; then
+ echo "$0: lxml not installed, you can install it by 'pip install lxml' if you prefer to use python to process xml file"
+ exit 1;
+fi
+
+echo "both BeatufileSoup4 and lxml are installed in python"
+exit 0
diff --git a/egs/gale_arabic/s5d/local/check_vocab.py b/egs/gale_arabic/s5d/local/check_vocab.py
new file mode 100755
index 00000000000..57ec32285bf
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/check_vocab.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+import sys
+
+def get_vocab_set(ref_file):
+ vocab_set = set()
+ with open(ref_file, 'r') as f:
+ for line in f.readlines():
+ word = line.split()[0]
+ vocab_set.add(word)
+ return vocab_set
+
+
+def compare(vocab_set, wordlist):
+ with open(wordlist, 'r') as f:
+ for line in f.readlines():
+ word = line.split()[0]
+ if word not in vocab_set:
+ print(word)
+
+def main():
+ ref_file = sys.argv[1]
+ wordlist = sys.argv[2]
+ vocab_set = get_vocab_set(ref_file)
+ compare(vocab_set, wordlist)
+
+if __name__ == "__main__":
+ main()
diff --git a/egs/gale_arabic/s5d/local/eng2arabic.pl b/egs/gale_arabic/s5d/local/eng2arabic.pl
new file mode 100755
index 00000000000..2fea8d33211
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/eng2arabic.pl
@@ -0,0 +1,108 @@
+#!/bin/usr/env perl
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Apache 2.0
+
+use warnings;
+use strict;
+use Encode;
+use utf8;
+
+
+
+if (@ARGV !=2 )
+ {#
+ print "usage: $0 \n";
+ exit (1);
+ }
+
+# <\check usage>
+my $inFile = shift (@ARGV);
+my $ouFile = shift(@ARGV);
+
+
+open INFILE, "<$inFile" || die "unable to open the input file $inFile\n";
+binmode INFILE, ":encoding(utf8)";
+
+
+open OUTPUTFILE, ">$ouFile" or die "unable to open the output mlf file $ouFile\n";
+binmode OUTPUTFILE, ":encoding(utf8)";
+
+while () {
+ my $BW = convertUTF8ToBuckwalter ($_);
+ print OUTPUTFILE "$BW";
+}
+close INFILE;
+close OUTPUTFILE;
+
+
+
+# this function is copied from MADATools.pm: MADA Tools
+ sub convertUTF8ToBuckwalter {
+
+ my ($line)= (@_);
+ $line =~ s/\'/\x{0621}/g; ## HAMZA
+ $line =~ s/\|/\x{0622}/g; ## ALEF WITH MADDA ABOVE
+ $line =~ s/\>/\x{0623}/g; ## ALEF WITH HAMZA ABOVE
+ $line =~ s/\&/\x{0624}/g; ## WAW WITH HAMZA ABOVE
+ $line =~ s/\\x{0625}/g; ## ALEF WITH HAMZA BELOW
+ $line =~ s/\}/\x{0626}/g; ## YEH WITH HAMZA ABOVE
+ $line =~ s/A/\x{0627}/g; ## ALEF
+ $line =~ s/b/\x{0628}/g; ## BEH
+ $line =~ s/p/\x{0629}/g; ## TEH MARBUTA
+ $line =~ s/t/\x{062A}/g; ## TEH
+ $line =~ s/v/\x{062B}/g; ## THEH
+ $line =~ s/j/\x{062C}/g; ## JEEM
+ $line =~ s/H/\x{062D}/g; ## HAH
+ $line =~ s/x/\x{062E}/g; ## KHAH
+ $line =~ s/d/\x{062F}/g; ## DAL
+ $line =~ s/\*/\x{0630}/g; ## THAL
+ $line =~ s/r/\x{0631}/g; ## REH
+ $line =~ s/z/\x{0632}/g; ## ZAIN
+ $line =~ s/s/\x{0633}/g; ## SEEN
+ $line =~ s/\$/\x{0634}/g; ## SHEEN
+ $line =~ s/S/\x{0635}/g; ## SAD
+ $line =~ s/D/\x{0636}/g; ## DAD
+ $line =~ s/T/\x{0637}/g; ## TAH
+ $line =~ s/Z/\x{0638}/g; ## ZAH
+ $line =~ s/E/\x{0639}/g; ## AIN
+ $line =~ s/g/\x{063A}/g; ## GHAIN
+ $line =~ s/_/\x{0640}/g; ## TATWEEL
+ $line =~ s/f/\x{0641}/g; ## FEH
+ $line =~ s/q/\x{0642}/g; ## QAF
+ $line =~ s/k/\x{0643}/g; ## KAF
+ $line =~ s/l/\x{0644}/g; ## LAM
+ $line =~ s/m/\x{0645}/g; ## MEEM
+ $line =~ s/n/\x{0646}/g; ## NOON
+ $line =~ s/h/\x{0647}/g; ## HEH
+ $line =~ s/w/\x{0648}/g; ## WAW
+ $line =~ s/Y/\x{0649}/g; ## ALEF MAKSURA
+ $line =~ s/y/\x{064A}/g; ## YEH
+
+ ## Diacritics
+ $line =~ s/F/\x{064B}/g; ## FATHATAN
+ $line =~ s/N/\x{064C}/g; ## DAMMATAN
+ $line =~ s/K/\x{064D}/g; ## KASRATAN
+ $line =~ s/a/\x{064E}/g; ## FATHA
+ $line =~ s/u/\x{064F}/g; ## DAMMA
+ $line =~ s/i/\x{0650}/g; ## KASRA
+ $line =~ s/\~/\x{0651}/g; ## SHADDA
+ $line =~ s/o/\x{0652}/g; ## SUKUN
+ $line =~ s/\`/\x{0670}/g; ## SUPERSCRIPT ALEF
+
+ $line =~ s/\{/\x{0671}/g; ## ALEF WASLA
+ $line =~ s/P/\x{067E}/g; ## PEH
+ $line =~ s/J/\x{0686}/g; ## TCHEH
+ $line =~ s/V/\x{06A4}/g; ## VEH
+ $line =~ s/G/\x{06AF}/g; ## GAF
+
+
+ ## Punctuation should really be handled by the utf8 cleaner or other method
+
+
+
+
+
+
+ return $line;
+}
diff --git a/egs/gale_arabic/s5d/local/gale_train_lms.sh b/egs/gale_arabic/s5d/local/gale_train_lms.sh
new file mode 100755
index 00000000000..be0b4ad8f79
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/gale_train_lms.sh
@@ -0,0 +1,137 @@
+#!/usr/bin/env bash
+
+# Copyright 2013 Arnab Ghoshal
+# Johns Hopkins University (author: Daniel Povey)
+# 2014 Guoguo Chen
+# 2019 Dongji Gao
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# To be run from one directory above this script.
+
+# Begin configuration section.
+weblm=
+# end configuration sections
+
+help_message="Usage: $0 [options] [giga-dirs]
+Train language models for GALE Arabic, and optionally for Gigaword.\n
+options:
+ --help # print this message and exit
+";
+
+. utils/parse_options.sh
+
+if [ $# -lt 3 ]; then
+ printf "$help_message\n";
+ exit 1;
+fi
+
+text=$1 # data/local/train/text
+lexicon=$2 # data/local/dict/lexicon.txt
+dir=$3 # data/local/lm
+
+shift 3
+giga_dirs=( $@ )
+
+for f in "$text" "$lexicon"; do
+ [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
+done
+
+loc=`which ngram-count`;
+if [ -z $loc ]; then
+ if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
+ sdir=`pwd`/../../../tools/srilm/bin/i686-m64
+ else
+ sdir=`pwd`/../../../tools/srilm/bin/i686
+ fi
+ if [ -f $sdir/ngram-count ]; then
+ echo Using SRILM tools from $sdir
+ export PATH=$PATH:$sdir
+ else
+ echo You appear to not have SRILM tools installed, either on your path,
+ echo or installed in $sdir. See tools/install_srilm.sh for installation
+ echo instructions.
+ exit 1
+ fi
+fi
+
+stage=0
+
+set -o errexit
+mkdir -p $dir
+export LC_ALL=C
+
+heldout_sent=10000
+cut -d' ' -f2- $text | gzip -c > $dir/train.all.gz
+cut -d' ' -f2- $text | tail -n +$heldout_sent | gzip -c > $dir/train.gz
+cut -d' ' -f2- $text | head -n $heldout_sent > $dir/heldout
+
+cut -d' ' -f1 $lexicon > $dir/wordlist
+
+if [ $stage -le 1 ]; then
+ # Trigram language model
+ echo "training tri-gram lm"
+ smoothing="kn"
+ ngram-count -text $dir/train.gz -order 3 -limit-vocab -vocab $dir/wordlist \
+ -unk -map-unk "" -${smoothing}discount -interpolate -lm $dir/gale.o3g.${smoothing}.gz
+ echo "PPL for GALE Arabic trigram LM:"
+ ngram -unk -lm $dir/gale.o3g.${smoothing}.gz -ppl $dir/heldout
+ ngram -unk -lm $dir/gale.o3g.${smoothing}.gz -ppl $dir/heldout -debug 2 >& $dir/3gram.${smoothing}.ppl2
+
+ # 4gram language model
+ echo "training 4-gram lm"
+ ngram-count -text $dir/train.gz -order 4 -limit-vocab -vocab $dir/wordlist \
+ -unk -map-unk "" -${smoothing}discount -interpolate -lm $dir/gale.o4g.${smoothing}.gz
+ echo "PPL for GALE Arabic 4gram LM:"
+ ngram -unk -lm $dir/gale.o4g.${smoothing}.gz -ppl $dir/heldout
+ ngram -unk -lm $dir/gale.o4g.${smoothing}.gz -ppl $dir/heldout -debug 2 >& $dir/4gram.${smoothing}.ppl2
+fi
+
+if [ ! -z $giga_dirs ]; then
+ mkdir -p $dir/giga
+ if [ ! -f $giga_dirs/text.2000k ]; then
+ echo "Arabic Gigaword text not found, prepare it"
+ local/prepare_giga.sh $giga_dirs
+ fi
+
+ cp $giga_dirs/text.2000k $dir/giga
+ cat $dir/giga/text.2000k | gzip -c > $dir/giga/text2000k.gz
+
+ for x in 3 4; do
+ smoothing="kn"
+ ngram-count -text $dir/giga/text2000k.gz -order $x -limit-vocab \
+ -vocab $dir/wordlist -unk -map-unk "" -${smoothing}discount -interpolate \
+ -lm $dir/giga/giga.o${x}g.${smoothing}.gz
+ echo "PPL for Gigaword ${x}gram LM:"
+ ngram -unk -lm $dir/giga/giga.o${x}g.${smoothing}.gz -ppl $dir/heldout
+ ngram -unk -lm $dir/giga/giga.o${x}g.${smoothing}.gz -ppl $dir/heldout -debug 2 \
+ >& $dir/giga/${x}gram.${smoothing}.ppl2
+ compute-best-mix $dir/${x}gram.${smoothing}.ppl2 \
+ $dir/giga/${x}gram.${smoothing}.ppl2 >& $dir/gale_giga_mix.${x}gram.${smoothing}.log
+ grep 'best lambda' $dir/gale_giga_mix.${x}gram.${smoothing}.log | perl -e '
+ $_=<>;
+ s/.*\(//; s/\).*//;
+ @A = split;
+ die "Expecting 2 numbers; found: $_" if(@A!=2);
+ print "$A[0]\n$A[1]\n";' > $dir/gale_giga_mix.${x}gram.${smoothing}.weights
+ gale_weight=$(head -1 $dir/gale_giga_mix.${x}gram.${smoothing}.weights)
+ giga_weight=$(tail -n 1 $dir/gale_giga_mix.${x}gram.${smoothing}.weights)
+ ngram -order $x -lm $dir/gale.o${x}g.${smoothing}.gz -lambda $swb1_weight \
+ -mix-lm $dir/giga/giga.o${x}g.${smoothing}.gz \
+ -unk -write-lm $dir/gale_giga.o${x}g.${smoothing}.gz
+ echo "PPL for GALE + Gigaword ${x}gram LM:"
+ ngram -unk -lm $dir/gale_giga.o${x}g.${smoothing}.gz -ppl $dir/heldout
+ done
+fi
diff --git a/egs/gale_arabic/s5d/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5d/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..31c58ef06b4
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,182 @@
+#!/usr/bin/env bash
+
+set -e -o pipefail
+
+# This script is called from scripts like local/nnet3/run_tdnn.sh and
+# local/chain/run_tdnn.sh (and may eventually be called by more scripts). It
+# contains the common feature preparation and iVector-related parts of the
+# script. See those scripts for examples of usage.
+
+
+stage=0
+nj=100
+train_set=train # you might set this to e.g. train.
+test_sets="test"
+gmm=tri3b # This specifies a GMM-dir from the features of the type you're training the system on;
+ # it should contain alignments for 'train_set'.
+
+num_threads_ubm=32
+nnet3_affix= # affix for exp/nnet3 directory to put iVector stuff
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
+ if [ ! -f $f ]; then
+ echo "$0: expected file $f to exist"
+ exit 1
+ fi
+done
+
+
+
+if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then
+ echo "$0: data/${train_set}_sp_hires/feats.scp already exists."
+ echo " ... Please either remove it, or rerun this script with stage > 2."
+ exit 1
+fi
+
+
+if [ $stage -le 1 ]; then
+ echo "$0: preparing directory for speed-perturbed data"
+ utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: creating high-resolution MFCC features"
+
+ # this shows how you can split across multiple file-systems. we'll split the
+ # MFCC dir across multiple locations. You might want to be careful here, if you
+ # have multiple copies of Kaldi checked out and run the same recipe, not to let
+ # them overwrite each other.
+ mfccdir=data/${train_set}_sp_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/gale_arabic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+ fi
+
+ for datadir in ${train_set}_sp ${test_sets}; do
+ utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+ done
+
+ # do volume-perturbation on the training data prior to extracting hires
+ # features; this helps make trained nnets more invariant to test data volume.
+ utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires
+
+ for datadir in ${train_set}_sp ${test_sets}; do
+ steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" --write-utt2dur false data/${datadir}_hires
+ steps/compute_cmvn_stats.sh data/${datadir}_hires
+ utils/fix_data_dir.sh data/${datadir}_hires
+ done
+fi
+
+if [ $stage -le 3 ]; then
+ echo "$0: computing a subset of data to train the diagonal UBM."
+ mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+ temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+ # train a diagonal UBM using a subset of about a quarter of the data
+ num_utts_total=$(wc -l /dev/null || true
+ steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \
+ --extra-left-context $extra_left_context \
+ --extra-right-context $extra_right_context \
+ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \
+ ${graph_dir} data/test_hires ${dir}/decode || exit 1
+ steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \
+ data/test_hires ${dir}/decode_test ${dir}/decode_test_rescore || exit 1
+fi
+
+exit 0;
diff --git a/egs/gale_arabic/s5d/local/nnet3/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5d/local/nnet3/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..c624d4e8535
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/nnet3/tuning/run_tdnn_1a.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+
+# started from tedlium recipe with few edits
+
+set -e -o pipefail -u
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+decode_nj=30
+min_seg_len=1.55
+train_set=train
+gmm=tri2b # this is the source gmm-dir for the data-type of interest; it
+ # should have alignments for the specified training data.
+num_threads_ubm=32
+nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned
+tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the configuration.
+
+# Options which are not passed through to run_ivector_common.sh
+train_stage=-10
+splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0"
+remove_egs=true
+relu_dim=850
+num_epochs=3
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat < \n";
+ exit (1);
+ }
+
+# <\check usage>
+my $inFile = shift (@ARGV);
+my $ouFile = shift(@ARGV);
+
+
+open INFILE, "<$inFile" || die "unable to open the input file $inFile\n";
+binmode INFILE, ":encoding(utf8)";
+
+
+open OUTPUTFILE, ">$ouFile" or die "unable to open the output mlf file $ouFile\n";
+binmode OUTPUTFILE, ":encoding(utf8)";
+
+
+while () {
+ s/[^اأإآبتثجحخدذرزسشصضطظعغفقكلمنهويىئءؤة0-9]+/ /g; ## Removes non Arabic or numbers
+# s/[^0-9]/ /g;
+# $_ =~ s/[^اأإآبتثجحخدذرزسشصضطظعغفقكلمنهويىئءؤة0-9]+/ /g; ## Removes non Arabic or numbers
+# s/[0-9]+//g;
+ my $BW = convertUTF8ToBuckwalter ($_);
+ print OUTPUTFILE "$BW"."\n";
+}
+close INFILE;
+close OUTPUTFILE;
+
+
+
+# this function is copied from MADATools.pm: MADA Tools
+ sub convertUTF8ToBuckwalter {
+
+ my ($line)= (@_);
+#$line = $UTF8_ENCODING_OBJ->decode($line); ## Same as Encode::decode("utf8",$line), but faster since object already created
+#$line =~ s/[^اأإآبتثجحخدذرزسشصضطظعغفقكلمنهويىئءؤة0-9]+//g; ## Removes non Arabic or numbers
+# $line =~ s/[0-9]//g;
+ $line =~ s/\x{0621}/\'/g; ## HAMZA
+ $line =~ s/\x{0622}/\|/g; ## ALEF WITH MADDA ABOVE
+ $line =~ s/\x{0623}/\>/g; ## ALEF WITH HAMZA ABOVE
+ $line =~ s/\x{0624}/\&/g; ## WAW WITH HAMZA ABOVE
+ $line =~ s/\x{0625}/\ ${arabic_giga_dir}/${dest_file}.orig
+#done
+
+#for x in $arabic_giga_dir/*.orig; do
+# echo "Processing $x"
+# local/arabic_convert.py $x > ${x}.mid
+#done
+
+for x in $arabic_giga_dir/*.mid; do
+ echo "Processing $x"
+ local/normalize_transcript_BW.pl $x ${x}.norm
+done
diff --git a/egs/gale_arabic/s5d/local/prepare_data.sh b/egs/gale_arabic/s5d/local/prepare_data.sh
new file mode 100755
index 00000000000..d09ff00acd0
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_data.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Apache 2.0
+
+# GALE Arabic phase 2 Conversation Speech
+dir1=/export/corpora/LDC/LDC2013S02/
+dir2=/export/corpora/LDC/LDC2013S07/
+text1=/export/corpora/LDC/LDC2013T04/
+text2=/export/corpora/LDC/LDC2013T17/
+# GALE Arabic phase 2 News Speech
+dir3=/export/corpora/LDC/LDC2014S07/
+dir4=/export/corpora/LDC/LDC2015S01/
+text3=/export/corpora/LDC/LDC2014T17/
+text4=/export/corpora/LDC/LDC2015T01/
+# GALE Arabic phase 3 Conversation Speech
+dir5=/export/corpora/LDC/LDC2015S11/
+dir6=/export/corpora/LDC/LDC2016S01/
+text5=/export/corpora/LDC/LDC2015T16/
+text6=/export/corpora/LDC/LDC2016T06/
+# GALE Arabic phase 3 News Speech
+dir7=/export/corpora/LDC/LDC2016S07/
+dir8=/export/corpora/LDC/LDC2017S02/
+text7=/export/corpora/LDC/LDC2016T17/
+text8=/export/corpora/LDC/LDC2017T04/
+# GALE Arabic phase 4 Conversation Speech
+dir9=/export/corpora/LDC/LDC2017S15/
+text9=/export/corpora/LDC/LDC2017T12/
+# GALE Arabic phase 4 News Speech
+dir10=/export/corpora/LDC/LDC2018S05/
+text10=/export/corpora/LDC/LDC2018T14/
+
+mgb2_dir=""
+process_xml=""
+mer=80
+
+. ./utils/parse_options.sh
+
+gale_data=GALE
+
+mkdir -p $gale_data
+# check that sox is installed
+which sox &>/dev/null
+if [[ $? != 0 ]]; then
+ echo "$0: sox is not installed"; exit 1
+fi
+
+for dvd in $dir1 $dir2 $dir3 $dir4 $dir5 $dir6 $dir7 $dir8 $dir9 $dir10; do
+ dvd_full_path=$(utils/make_absolute.sh $dvd)
+ if [[ ! -e $dvd_full_path ]]; then
+ echo "$0: missing $dvd_full_path"; exit 1;
+ fi
+ find $dvd_full_path \( -name "*.wav" -o -name "*.flac" \) | while read file; do
+ id=$(basename $file | awk '{gsub(".wav","");gsub(".flac","");print}')
+ echo "$id sox $file -r 16000 -t wav - |"
+ done
+done | sort -u > $gale_data/wav.scp
+echo "$0:data prep audio succeded"
+
+gale_data=$(utils/make_absolute.sh "GALE" );
+top_pwd=`pwd`
+txtdir=$gale_data/txt
+mkdir -p $txtdir; cd $txtdir
+
+for cdx in $text1 $text2 $text3 $text4 $text5 $text6 $text7 $text8 $text9 $text10; do
+ echo "$0:Preparing $cdx"
+ if [[ $cdx == *.tgz ]] ; then
+ tar -xvf $cdx
+ elif [ -d "$cdx" ]; then
+ ln -s $cdx `basename $cdx`
+ else
+ echo "$0:I don't really know what I shall do with $cdx " >&2
+ fi
+done
+
+find -L . -type f -name "*.tdf" | while read file; do
+sed '1,3d' $file # delete the first 3 lines
+done > all.tmp$$
+
+perl -e '
+ ($inFile,$idFile,$txtFile)= split /\s+/, $ARGV[0];
+ open(IN, "$inFile");
+ open(ID, ">$idFile");
+ open(TXT, ">$txtFile");
+ while () {
+ @arr= split /\t/,$_;
+ $start=sprintf ("%0.3f",$arr[2]);$rStart=$start;$start=~s/\.//; $start=~s/^0+$/0/; $start=~s/^0+([^0])/$1/; # remove zeros at the beginning
+ $end=sprintf ("%0.3f",$arr[3]);$rEnd=$end;$end=~s/^0+([^0])/$1/;$end=~s/\.//;
+ if ( ($arr[11] !~ m/report/) && ($arr[11] !~ m/conversational/) ){$arr[11]="UNK";}
+ $id="$arr[11] $arr[0] $arr[0]_${start}_${end} $rStart $rEnd\n";
+ next if ($rStart == $rEnd);
+ $id =~ s/.sph//g;
+ print ID $id;
+ print TXT "$arr[7]\n";
+ }' "all.tmp$$ allid.tmp$$ contentall.tmp$$"
+
+perl ${top_pwd}/local/normalize_transcript_BW.pl contentall.tmp$$ contentall.buck.tmp$$
+paste allid.tmp$$ contentall.buck.tmp$$ | sed 's: $::' | awk '{if (NF>5) {print $0}}' > all_1.tmp$$
+
+
+awk '{$1="";print $0}' all_1.tmp$$ | sed 's:^ ::' > $gale_data/all
+awk '{if ($1 == "report") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $gale_data/report
+awk '{if ($1 == "conversational") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $gale_data/conversational
+
+cd ..;
+rm -fr $txtdir
+cd $top_pwd
+
+# prepare MGB2 data
+if [ ! -z $mgb2_dir ]; then
+ echo "preparing MGB2 data"
+
+ xmldir=$mgb2_dir/train/xml/bw
+ output_dir=$gale_data/mgb2
+ mkdir -p $output_dir
+
+ if [ -f $output_dir/wav.scp ]; then
+ mkdir -p $output_dir/.backup
+ mv $output_dir/wav.scp ${output_dir}/.backup
+ mv $output_dir/mgb2 ${output_dir}/.backup
+ fi
+
+ if [ $process_xml == 'python' ]; then
+ echo "using python to process xml file"
+ # check if bs4 and lxml are installed in python
+ local/check_tools.sh
+ ls $mgb2_dir/train/wav/ | while read name; do
+ basename=`basename -s .wav $name`
+ [ ! -e $xmldir/$basename.xml ] && echo "Missing $xmldir/$basename.xml" && exit 1
+ local/process_xml.py $xmldir/$basename.xml - | local/add_to_datadir.py $basename $train_dir $mer
+ echo $basename $db_dir/train/wav/$basename.wav >> $output_dir/wav.scp
+ done
+ elif [ $process_xml == 'xml' ]; then
+ # check if xml binary exsits
+ if command -v xml >/dev/null 2>/dev/null; then
+ echo "using xml"
+ ls $mgb2_dir/train/wav/ | while read name; do
+ basename=`basename -s .wav $name`
+ [ ! -e $xmldir/$basename.xml ] && echo "Missing $xmldir/$basename.xml" && exit 1
+ xml sel -t -m '//segments[@annotation_id="transcript_align"]' -m "segment" -n -v "concat(@who,' ',@starttime,' ',@endtime,' ',@WMER,' ')" -m "element" -v "concat(text(),' ')" $xmldir/$basename.xml | local/add_to_datadir.py $basename $output_dir $mer
+ echo $basename $db_dir/train/wav/$basename.wav >> $output_dir/wav.scp
+ done
+ else
+ echo "xml not found, you may use python by '--process-xml python'"
+ exit 1;
+ fi
+ else
+ # invalid option
+ echo "$0: invalid option for --process-xml, choose from 'xml' or 'python'"
+ exit 1;
+ fi
+
+ # add mgb2 data to training data (GALE/all and wav.scp)
+ mv $gale_data/all $gale_data/all.gale
+ cat $gale_data/all.gale $output_dir/mgb2 > $gale_data/all
+ cat $output_dir/wav.scp >> $gale_data/wav.scp
+
+ # for dict preparation
+ grep -v -f local/test/dev_all $gale_data/all.gale | \
+ grep -v -f local/test/test_p2 | \
+ grep -v -f local/test/mt_eval_all | \
+ grep -v -f local/bad_segments > $gale_data/all.gale.train
+ awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $gale_data/all.gale.train | sort -u > $gale_data/gale_text
+echo "$0:MGB2 data added to training data"
+fi
+
+
+echo "$0:data prep text succeeded"
+
+mkdir -p data
+dir=$(utils/make_absolute.sh data/)
+grep -f local/test/dev_all $gale_data/all | grep -v -f local/bad_segments > $gale_data/all.dev
+grep -f local/test/test_p2 $gale_data/all | grep -v -f local/bad_segments > $gale_data/all.test_p2
+grep -f local/test/mt_eval_all $gale_data/all | grep -v -f local/bad_segments > $gale_data/all.mt_all
+grep -v -f local/test/dev_all $gale_data/all | \
+ grep -v -f local/test/test_p2 | \
+ grep -v -f local/test/mt_eval_all | \
+ grep -v -f local/bad_segments > $gale_data/all.train
+
+for x in dev test_p2 mt_all train; do
+ outdir=data/$x
+ file=$gale_data/all.$x
+ mkdir -p $outdir
+ awk '{print $2 " " $2}' $file | sort -u > $outdir/utt2spk
+ cp -pr $outdir/utt2spk $outdir/spk2utt
+ awk '{print $2 " " $1 " " $3 " " $4}' $file | sort -u > $outdir/segments
+ awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $file | sort -u > $outdir/text
+done
+
+grep -f local/test/dev_all $gale_data/wav.scp > $dir/dev/wav.scp
+grep -f local/test/test_p2 $gale_data/wav.scp > $dir/test_p2/wav.scp
+grep -f local/test/mt_eval_all $gale_data/wav.scp > $dir/mt_all/wav.scp
+
+cat $gale_data/wav.scp | awk -v seg=$dir/train/segments 'BEGIN{while((getline0) {seen[$2]=1;}}
+ {if (seen[$1]) { print $0}}' > $dir/train/wav.scp
+
+echo "$0:data prep split succeeded"
+exit 0
diff --git a/egs/gale_arabic/s5d/local/prepare_dict.sh b/egs/gale_arabic/s5d/local/prepare_dict.sh
new file mode 100755
index 00000000000..31bae09ff31
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_dict.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+# Copyright 2017 QCRI (author: Ahmed Ali)
+# Apache 2.0
+# This script prepares the dictionary.
+
+set -e
+dir=data/local/dict
+lexicon_url1="http://alt.qcri.org//resources/speech/dictionary/ar-ar_grapheme_lexicon_2016-02-09.bz2";
+lexicon_url2="http://alt.qcri.org//resources/speech/dictionary/ar-ar_lexicon_2014-03-17.txt.bz2";
+stage=0
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+mkdir -p $dir data/local/lexicon_data
+
+if [ $stage -le 0 ]; then
+ echo "$0: Downloading text for lexicon... $(date)."
+# wget -P data/local/lexicon_data $lexicon_url1
+# wget -P data/local/lexicon_data $lexicon_url2
+# bzcat data/local/lexicon_data/ar-ar_grapheme_lexicon_2016-02-09.bz2 | sed '1,3d' | awk '{print $1}' > data/local/lexicon_data/grapheme_lexicon
+# bzcat data/local/lexicon_data/ar-ar_lexicon_2014-03-17.txt.bz2 | sed '1,3d' | awk '{print $1}' >> data/local/lexicon_data/grapheme_lexicon
+ gale_data=GALE
+ text=data/train/text
+ [ -f $gale_data/gale_text ] && text=$gale_data/gale_text
+ echo "text is $text"
+ cat $text | cut -d ' ' -f 2- | tr -s " " "\n" | sort -u >> data/local/lexicon_data/grapheme_lexicon
+fi
+
+
+if [ $stage -le 0 ]; then
+ echo "$0: processing lexicon text and creating lexicon... $(date)."
+ # remove vowels and rare alef wasla
+ grep -v [0-9] data/local/lexicon_data/grapheme_lexicon | sed -e 's:[FNKaui\~o\`]::g' -e 's:{:}:g' | sort -u > data/local/lexicon_data/processed_lexicon
+ local/prepare_lexicon.py
+fi
+
+cut -d' ' -f2- $dir/lexicon.txt | sed 's/SIL//g' | tr ' ' '\n' | sort -u | sed '/^$/d' >$dir/nonsilence_phones.txt || exit 1;
+
+sed -i '1i UNK' $dir/lexicon.txt
+
+echo UNK >> $dir/nonsilence_phones.txt
+
+echo ' SIL' >> $dir/lexicon.txt
+
+echo SIL > $dir/silence_phones.txt
+
+echo SIL >$dir/optional_silence.txt
+
+echo -n "" >$dir/extra_questions.txt
+
+echo "$0: Dictionary preparation succeeded"
diff --git a/egs/gale_arabic/s5d/local/prepare_giga.sh b/egs/gale_arabic/s5d/local/prepare_giga.sh
new file mode 100755
index 00000000000..f7345803274
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_giga.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+giga_dir=$1
+
+source_dir=/export/corpora/LDC/LDC2011T11/arb_gw_5
+num=2000000
+suffix="2000k"
+
+[ ! -d $source_dir ] && echo "source Arabic Gigaword does not exist." && exit 1;
+
+[ -f $giga_dir/text ] && mv $giga_dir/text $giga_dir/text.bkp
+mkdir -p $giga_dir/
+
+find $source_dir/data/ -name "*.gz" | while read file; do
+ gunzip -c $file | local/arabic_convert.py - >> $giga_dir/text.arb
+done
+
+head -n $num $giga_dir/text.arb > $giga_dir/text.arb.${suffix}
+local/normalize_transcript_BW.pl $giga_dir/text.arb.${suffix} $giga_dir/text.${suffix}
+
+echo "finish preparing Arabic Gigaword"
+exit 0
diff --git a/egs/gale_arabic/s5d/local/prepare_lexicon.py b/egs/gale_arabic/s5d/local/prepare_lexicon.py
new file mode 100755
index 00000000000..215541585eb
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_lexicon.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+
+# Copyright 2018 Ashish Arora
+# Apache 2.0
+
+# This script prepares lexicon.
+
+import argparse
+import os
+
+parser = argparse.ArgumentParser(description="""Creates the list of characters and words in lexicon""")
+args = parser.parse_args()
+
+### main ###
+lex = {}
+text_path = os.path.join('data','local', 'lexicon_data', 'processed_lexicon')
+with open(text_path, 'r', encoding='utf-8') as f:
+ for line in f:
+ line = line.strip()
+ characters = list(line)
+ characters = " ".join(['V' if char == '*' else char for char in characters])
+ lex[line] = characters
+
+with open(os.path.join('data','local','dict', 'lexicon.txt'), 'w', encoding='utf-8') as fp:
+ for key in sorted(lex):
+ fp.write(key + " " + lex[key] + "\n")
diff --git a/egs/gale_arabic/s5d/local/prepare_lm.sh b/egs/gale_arabic/s5d/local/prepare_lm.sh
new file mode 100755
index 00000000000..a4f38a3da13
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_lm.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+
+# Copyright 2012 Vassil Panayotov
+# 2017 Ewald Enzinger
+# Apache 2.0
+
+. ./path.sh || exit 1
+
+echo "=== Building a language model ..."
+
+dir=data/local/lm/
+text=data/train/text
+lexicon=data/local/dict/lexicon.txt
+arabic_giga_dir=Arabic_giga
+# Language model order
+order=4
+
+. utils/parse_options.sh
+
+# Prepare a LM training corpus from the transcripts
+mkdir -p $dir
+
+for f in "$text" "$lexicon"; do
+ [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+loc=`which ngram-count`;
+if [ -z $loc ]; then
+ if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
+ sdir=$KALDI_ROOT/tools/srilm/bin/i686-m64
+ else
+ sdir=$KALDI_ROOT/tools/srilm/bin/i686
+ fi
+ if [ -f $sdir/ngram-count ]; then
+ echo Using SRILM tools from $sdir
+ export PATH=$PATH:$sdir
+ else
+ echo You appear to not have SRILM tools installed, either on your path,
+ echo or installed in $sdir. See tools/install_srilm.sh for installation
+ echo instructions.
+ exit 1
+ fi
+fi
+
+
+#cat Arabic_giga/text.1000000 > $dir/text.txt
+[ -f $dir/text.txt ] && rm $dir/text.txt && echo "deleted"
+cat data/train/text | cut -d " " -f2- > $dir/text.txt
+cat Arabic_giga/text.all >> $dir/text.txt
+echo "text.txt contains `wc -l $dir/text.txt` lines"
+cut -d' ' -f1 $lexicon > $dir/wordlist
+
+ngram-count -text $dir/text.txt -order $order -limit-vocab -vocab $dir/wordlist \
+ -unk -map-unk "" -kndiscount -interpolate -lm $dir/lm.$order.all.gz
+
+ngram -lm $dir/lm.$order.all.gz -ppl $dir/dev.txt
+echo "*** Finished building the LM model!"
diff --git a/egs/gale_arabic/s5d/local/prepare_lm_pocolm.sh b/egs/gale_arabic/s5d/local/prepare_lm_pocolm.sh
new file mode 100755
index 00000000000..e85f97d2faf
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/prepare_lm_pocolm.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+
+# Dongji Gao
+
+set -e
+set -o pipefail
+set -u
+
+stage=0
+
+dir=data/local/pocolm
+cmd=run.pl
+order=4
+extra_text=""
+
+. ./utils/parse_options.sh
+
+lm_dir=${dir}/data
+lm_name=10m_${order}
+
+mkdir -p $dir
+. ./path.sh
+export PATH=$KALDI_ROOT/tools/pocolm/scripts:$PATH
+
+if [ $stage -le 0 ]; then
+ mkdir -p ${dir}/data
+ mkdir -p ${dir}/data/text
+
+ echo "$0: Getting the data sources"
+
+ rm ${dir}/data/text/* 2>/dev/null || true
+
+ cat data/dev/text | cut -d ' ' -f2- > ${dir}/data/text/dev.txt
+ cat data/train/text | cut -d ' ' -f2- > ${dir}/data/text/train.txt
+ [ ! -z $extra_text ] && [ -f $extra_text ] && cp $extra_text ${dir}/data/text/giga.txt
+# cp temp/text.2000k ${dir}/data/text/arb_giga_2000k.txt
+fi
+
+if [ $stage -le 1 ]; then
+ mkdir -p ${dir}/data/work
+ if [ ! -f ${dir}/data/work/word_counts/.done ]; then
+ get_word_counts.py ${dir}/data/text ${dir}/data/work/word_counts
+ touch ${dir}/data/work/word_counts/.done
+ fi
+fi
+
+lexicon=data/local/dict/lexicon.txt
+[ ! -f $lexicon ] && echo "$0: No such file $lexicon" && exit 1;
+
+wordlist=${dir}/data/work/wordlist
+if [ $stage -le 2 ]; then
+ cut -d ' ' -f1 $lexicon > $wordlist
+ wordlist_to_vocab.py --unk-symbol="" $wordlist > ${dir}/data/work/vocab_wordlist.txt
+ touch ${dir}/data/work/.vocab_wordlist.txt.done
+fi
+
+unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm
+echo "$unpruned_lm_dir"
+
+if [ $stage -le 3 ]; then
+ echo "$0: training the unpruned LM"
+ $cmd ${unpruned_lm_dir}/log/train.log \
+ train_lm.py --wordlist=$wordlist --num-split=20 --warm-start-ratio=20 \
+ --limit-unk-history=false \
+ ${dir}/data/text $order ${lm_dir}/work ${unpruned_lm_dir}
+
+ for x in dev; do
+ $cmd ${unpruned_lm_dir}/log/compute_data_prob_${x}.log \
+ get_data_prob.py ${dir}/data/text/${x}.txt ${unpruned_lm_dir}
+ cat ${unpruned_lm_dir}/log/compute_data_prob_${x}.log | grep -F '[perplexity'
+ done
+
+ format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > pocolm/lm.$order.gz
+fi
+
+if [ $stage -le 4 ]; then
+ echo "$0: pruning the LM (to larger size)"
+ size=100000000
+ prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big
+
+ for x in dev; do
+ echo "============ compute perlexity for big lm ================="
+ get_data_prob.py ${dir}/data/text/${x}.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity'
+ done
+
+ format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > pocolm/lm.prune.${order}.2big.gz
+fi
diff --git a/egs/gale_arabic/s5d/local/process_xml.py b/egs/gale_arabic/s5d/local/process_xml.py
new file mode 100755
index 00000000000..3c6eed452ac
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/process_xml.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+from bs4 import BeautifulSoup
+import sys
+import argparse
+
+def get_args():
+ parser = argparse.ArgumentParser(description="""This script process xml file.""")
+ parser.add_argument("xml", type=str, help="""Input xml file""")
+ parser.add_argument("output", type=str, help="""output text file""")
+ args = parser.parse_args()
+ return args
+
+def process_xml(xml_handle, output_handle):
+ soup = BeautifulSoup(xml_handle, "xml")
+ for segment in soup.find_all("segment"):
+ who = segment["who"]
+ starttime = segment["starttime"]
+ endtime = segment["endtime"]
+ WMER = segment["WMER"]
+ text = " ".join([element.string for element in segment.find_all("element") if element.string != None])
+ output_handle.write("{} {} {} {} {}\n".format(who, starttime, endtime, WMER, text))
+ xml_handle.close()
+ output_handle.close()
+
+def main():
+ args = get_args()
+
+ xml_handle = open(args.xml, 'r')
+ output_handle = sys.stdout if args.output == '-' else open(args.output, 'w')
+
+ process_xml(xml_handle, output_handle)
+
+if __name__ == "__main__":
+ main()
diff --git a/egs/gale_arabic/s5d/local/reorder.py b/egs/gale_arabic/s5d/local/reorder.py
new file mode 100755
index 00000000000..4255b6b642f
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/reorder.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from pathlib import Path
+
+asr_result = sys.argv[1]
+mt_dir = sys.argv[2]
+file = sys.argv[3]
+output_dir = sys.argv[4]
+
+def get_asr_dict(file):
+ asr_dict = dict()
+ with open(file, 'r') as f:
+ for line in f.readlines():
+ line_split = line.split()
+ utt_id = line_split[0]
+ sentence = " ".join(line_split[1:])
+ assert(utt_id not in asr_dict)
+ asr_dict[utt_id] = sentence
+
+ return asr_dict
+
+def get_utt_id(line_list):
+ start_time = "".join(line_list[2].split('.'))
+ start_digit = len(line_list[2].split('.')[1])
+ start_dif = 3 - start_digit
+
+ end_time = "".join(line_list[3].split('.'))
+ end_digit = len(line_list[3].split('.')[1])
+ end_dif = 3 - end_digit
+
+ if start_time == '00':
+ start_time = '0'
+ else:
+ start_time += "0" * start_dif
+ end_time += "0" * end_dif
+
+ utt_id = line_list[0] + "_" + start_time + "_" + end_time
+ return utt_id
+
+def write_result(asr_dict, file, mt_dir, output_dir):
+ # create output directory
+ output_path = Path(output_dir)
+ output_path.mkdir(exist_ok=True)
+
+ # read each file
+ with open(file, 'r') as f:
+ for line in f.readlines():
+ file_name = line.split()[0]
+ output_file_name = file_name.split('.')[0]
+ with open(mt_dir+"/"+file_name, 'r') as input_file:
+ with open(output_dir+"/"+output_file_name+".txt", 'w') as output_file:
+ for line in input_file.readlines():
+ line_split = line.split()
+ utt_id = get_utt_id(line_split)
+ if utt_id not in asr_dict:
+ print(utt_id)
+ else:
+ output_file.write(utt_id + " " + asr_dict[utt_id] + "\n")
+
+def main():
+ asr_dict = get_asr_dict(asr_result)
+ write_result(asr_dict, file, mt_dir, output_dir)
+
+if __name__ == "__main__":
+ main()
diff --git a/egs/gale_arabic/s5d/local/reverse.sh b/egs/gale_arabic/s5d/local/reverse.sh
new file mode 100755
index 00000000000..c09371b15d6
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/reverse.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+import sys
+
+text = sys.argv[1]
+output = sys.argv[2]
+
+with open(text, 'r') as text:
+ with open(output, 'w') as output:
+ for line in text.readlines():
+ sentence = list()
+ line_list = line.split()
+ line_list.reverse()
+ for word in line_list:
+ sentence.append("".join(reversed(word)))
+ output.write(" ".join(sentence)+"\n")
diff --git a/egs/gale_arabic/s5d/local/rnnlm/run_tdnn_lstm.sh b/egs/gale_arabic/s5d/local/rnnlm/run_tdnn_lstm.sh
new file mode 100755
index 00000000000..18e6b784fef
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/rnnlm/run_tdnn_lstm.sh
@@ -0,0 +1,134 @@
+#!/usr/bin/env bash
+
+# Copyright 2012 Johns Hopkins University (author: Daniel Povey)
+# 2015 Guoguo Chen
+# 2017 Hainan Xu
+# 2017 Xiaohui Zhang
+
+# This script trains LMs on the swbd LM-training data.
+
+dir=exp/rnnlm_lstm_1e
+embedding_dim=1024
+lstm_rpd=256
+lstm_nrpd=256
+stage=-10
+train_stage=-10
+
+# variables for lattice rescoring
+run_lat_rescore=true
+run_nbest_rescore=false
+
+ac_model_dir=exp/chain/tdnn_1a_sp/
+decode_dir_suffix=rnnlm_1a
+ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
+ # if it's set, it merges histories in the lattice if they share
+ # the same ngram history and this prevents the lattice from
+ # exploding exponentially
+pruned_rescore=true
+
+. ./cmd.sh
+. ./utils/parse_options.sh
+
+text=data/train/text
+giga_text=giga/text.2000k
+lexicon=data/local/dict/lexiconp.txt
+text_dir=data/rnnlm/text_nosp_1a
+mkdir -p $dir/config
+set -e
+
+for f in $text $lexicon $giga_text; do
+ [ ! -f $f ] && \
+ echo "$0: expected file $f to exist;" && exit 1
+done
+
+if [ $stage -le 0 ]; then
+ mkdir -p $text_dir
+ echo -n >$text_dir/dev.txt
+ # hold out one in every 50 lines as dev data.
+ cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/gale.txt
+ cp $giga_text > $text_dir/giga.txt
+fi
+
+if [ $stage -le 1 ]; then
+ cp data/lang/words.txt $dir/config/
+ n=`cat $dir/config/words.txt | wc -l`
+ echo " $n" >> $dir/config/words.txt
+
+ # words that are not present in words.txt but are in the training or dev data, will be
+ # mapped to during training.
+ echo "" >$dir/config/oov.txt
+
+ cat > $dir/config/data_weights.txt <" \
+ --data-weights-file=$dir/config/data_weights.txt \
+ $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
+
+ # choose features
+ rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
+ --use-constant-feature=true \
+ --special-words=',,' \
+ $dir/config/words.txt > $dir/config/features.txt
+
+ cat >$dir/config/xconfig < list_decode$$
+
+#split the test set per type:
+awk '{print $2}' $galeFolder/all.test | sort -u > $galeFolder/test_id$$
+
+# generate the report test set
+awk '{print $2}' $galeFolder/report | sort -u > $galeFolder/report_id$$
+comm -1 -2 $galeFolder/test_id$$ $galeFolder/report_id$$ > $galeFolder/report.test
+
+# generate the conversational test set
+awk '{print $2}' $galeFolder/conversational | sort -u > $galeFolder/conversational_id$$
+
+comm -1 -2 $galeFolder/test_id$$ $galeFolder/conversational_id$$ > $galeFolder/conversational.test
+
+rm -fr $galeFolder/test_id$$ $galeFolder/report_id$$ $galeFolder/conversational_id$$
+
+min_lmwt=7
+max_lmwt=20
+cat list_decode$$ | while read dir; do
+ for type in report conversational; do
+ #echo "Processing: $dir $type"
+ rm -fr $dir/scoring_$type
+ cp -pr $dir/scoring $dir/scoring_$type
+ ( cd $dir/scoring_$type;
+ for x in *.tra test_filt.txt; do
+ sort -u $x > tmp$$
+ join tmp$$ $galeFolder/${type}.test > $x
+ rm -fr tmp$$
+ done
+ )
+
+utils/run.pl LMWT=$min_lmwt:$max_lmwt $dir/scoring_$type/log/score.LMWT.log \
+ cat $dir/scoring_${type}/LMWT.tra \| \
+ utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \
+ compute-wer --text --mode=present \
+ ark:$dir/scoring_${type}/test_filt.txt ark,p:- ">&" $dir/wer_${type}_LMWT
+done
+done
+
+
+time=$(date +"%Y-%m-%d-%H-%M-%S")
+echo "RESULTS generated by $USER at $time"
+
+echo "Report Results WER:"
+cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_report_* | utils/best_wer.sh; done | sort -n -k2
+
+echo "Conversational Results WER:"
+cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_conversational_* | utils/best_wer.sh; done | sort -n -k2
+
+echo "Combined Results for Reports and Conversational WER:"
+cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_?? $x/wer_?| utils/best_wer.sh; done | sort -n -k2
+
+rm list_decode$$
+
+
+
diff --git a/egs/gale_arabic/s5d/local/test/dev_all b/egs/gale_arabic/s5d/local/test/dev_all
new file mode 100644
index 00000000000..8b295b5602d
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/dev_all
@@ -0,0 +1,16 @@
+ALBAGHDADYA_BAGHDADYANEWS10_ARB_20080728_100000
+ALHURRA_NEWS10_ARB_20061005_102800
+ALJZ_NEWS15_ARB_20081210_095801
+ALURDUNYA_URDUNYANEWS_ARB_20070424_000000
+ARABIYA_PANORAMA_ARB_20090302_200000
+IRAQIYAH_ECONRPT_ARB_20081210_075801
+LBC_NEWS_ARB_20070322_195800
+SCOLA_EGYPNNSCO_ARB_20070426_035900
+YEMENTV_YEMENNEWS_ARB_20080728_130000
+ALHIWAR_FREEOPINION_ARB_20090519_180000
+ALHURRA_FREEHOUR_ARB_20080731_020000
+ALJZ_TODINTER_ARB_20070811_222800
+ARABIYA_ARABSDEBATE_ARB_20070830_210000
+ARABIYA_THIRDEYE_ARB_20070323_000000
+OMANTV_MORNCOFF_ARB_20070418_000000
+SYRIANTV_CIRCLEVT_ARB_20070813_142801
diff --git a/egs/gale_arabic/s5d/local/test/dev_bc b/egs/gale_arabic/s5d/local/test/dev_bc
new file mode 100644
index 00000000000..2a5c09250d6
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/dev_bc
@@ -0,0 +1,7 @@
+ALHIWAR_FREEOPINION_ARB_20090519_180000
+ALHURRA_FREEHOUR_ARB_20080731_020000
+ALJZ_TODINTER_ARB_20070811_222800
+ARABIYA_ARABSDEBATE_ARB_20070830_210000
+ARABIYA_THIRDEYE_ARB_20070323_000000
+OMANTV_MORNCOFF_ARB_20070418_000000
+SYRIANTV_CIRCLEVT_ARB_20070813_142801
diff --git a/egs/gale_arabic/s5d/local/test/dev_bn b/egs/gale_arabic/s5d/local/test/dev_bn
new file mode 100644
index 00000000000..d3aa6ca77be
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/dev_bn
@@ -0,0 +1,9 @@
+ALBAGHDADYA_BAGHDADYANEWS10_ARB_20080728_100000
+ALHURRA_NEWS10_ARB_20061005_102800
+ALJZ_NEWS15_ARB_20081210_095801
+ALURDUNYA_URDUNYANEWS_ARB_20070424_000000
+ARABIYA_PANORAMA_ARB_20090302_200000
+IRAQIYAH_ECONRPT_ARB_20081210_075801
+LBC_NEWS_ARB_20070322_195800
+SCOLA_EGYPNNSCO_ARB_20070426_035900
+YEMENTV_YEMENNEWS_ARB_20080728_130000
diff --git a/egs/gale_arabic/s5d/local/test/mt_eval_all b/egs/gale_arabic/s5d/local/test/mt_eval_all
new file mode 100644
index 00000000000..5a425ceb382
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/mt_eval_all
@@ -0,0 +1,22 @@
+ALAM_IRAQNOW_ARB_20070208_085800
+ALJZ_TODINTER_ARB_20070205_132800
+SYRIANTV_WEEKFILE_ARB_20070203_142800
+ALAM_WITHEVENT_ARB_20070206_205801
+ALAM_WITHEVENT_ARB_20070227_205800
+ARABIYA_ALARABIYANEWS2_ARB_20070312_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070322_000000
+ARABIYA_LATEHRNEWS_ARB_20070227_000000
+DUBAI_DUBAINEWS2_ARB_20070313_000000
+ABUDHABI_ABUDHNEWS2_ARB_20070228_000000
+ALURDUNYA_URDUNYANEWS_ARB_20070312_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070308_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070316_000000
+ARABIYA_LATEHRNEWS_ARB_20070222_000000
+ARABIYA_PANORAMA_ARB_20070226_000000
+ARABIYA_PANORAMA_ARB_20070306_000000
+ARABIYA_PANORAMA_ARB_20070311_000000
+DUBAI_DUBAINEWS2_ARB_20070227_000000
+DUBAI_DUBAINEWS2_ARB_20070306_000000
+DUBAI_DUBAINEWS2_ARB_20070312_000000
+ALURDUNYA_URDUNYANEWS_ARB_20070326_000000
+ARABIYA_PANORAMA_ARB_20070326_000000
diff --git a/egs/gale_arabic/s5d/local/test/mt_eval_bc b/egs/gale_arabic/s5d/local/test/mt_eval_bc
new file mode 100644
index 00000000000..423a77c543b
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/mt_eval_bc
@@ -0,0 +1,5 @@
+ALAM_IRAQNOW_ARB_20070208_085800
+ALJZ_TODINTER_ARB_20070205_132800
+SYRIANTV_WEEKFILE_ARB_20070203_142800
+ALAM_WITHEVENT_ARB_20070206_205801
+ALAM_WITHEVENT_ARB_20070227_205800
diff --git a/egs/gale_arabic/s5d/local/test/mt_eval_bn b/egs/gale_arabic/s5d/local/test/mt_eval_bn
new file mode 100644
index 00000000000..02542707633
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/mt_eval_bn
@@ -0,0 +1,17 @@
+ARABIYA_ALARABIYANEWS2_ARB_20070312_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070322_000000
+ARABIYA_LATEHRNEWS_ARB_20070227_000000
+DUBAI_DUBAINEWS2_ARB_20070313_000000
+ABUDHABI_ABUDHNEWS2_ARB_20070228_000000
+ALURDUNYA_URDUNYANEWS_ARB_20070312_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070308_000000
+ARABIYA_ALARABIYANEWS2_ARB_20070316_000000
+ARABIYA_LATEHRNEWS_ARB_20070222_000000
+ARABIYA_PANORAMA_ARB_20070226_000000
+ARABIYA_PANORAMA_ARB_20070306_000000
+ARABIYA_PANORAMA_ARB_20070311_000000
+DUBAI_DUBAINEWS2_ARB_20070227_000000
+DUBAI_DUBAINEWS2_ARB_20070306_000000
+DUBAI_DUBAINEWS2_ARB_20070312_000000
+ALURDUNYA_URDUNYANEWS_ARB_20070326_000000
+ARABIYA_PANORAMA_ARB_20070326_000000
diff --git a/egs/gale_arabic/s5d/local/test/test_p2 b/egs/gale_arabic/s5d/local/test/test_p2
new file mode 100644
index 00000000000..d82cf498804
--- /dev/null
+++ b/egs/gale_arabic/s5d/local/test/test_p2
@@ -0,0 +1,11 @@
+ALAM_WITHEVENT_ARB_20070116_205800
+ALAM_WITHEVENT_ARB_20070130_205800
+ALAM_WITHEVENT_ARB_20070206_205801
+ALAM_WITHEVENT_ARB_20070213_205800
+ALAM_WITHEVENT_ARB_20070227_205800
+ALAM_WITHEVENT_ARB_20070306_205800
+ALAM_WITHEVENT_ARB_20070313_205800
+ARABIYA_FROMIRAQ_ARB_20070216_175800
+ARABIYA_FROMIRAQ_ARB_20070223_175801
+ARABIYA_FROMIRAQ_ARB_20070302_175801
+ARABIYA_FROMIRAQ_ARB_20070309_175800
diff --git a/egs/gale_arabic/s5d/path.sh b/egs/gale_arabic/s5d/path.sh
new file mode 100755
index 00000000000..be11b34cbc6
--- /dev/null
+++ b/egs/gale_arabic/s5d/path.sh
@@ -0,0 +1,5 @@
+export KALDI_ROOT=$(pwd)/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/gale_arabic/s5d/rnnlm b/egs/gale_arabic/s5d/rnnlm
new file mode 120000
index 00000000000..e136939ba72
--- /dev/null
+++ b/egs/gale_arabic/s5d/rnnlm
@@ -0,0 +1 @@
+../../../scripts/rnnlm/
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/run.sh b/egs/gale_arabic/s5d/run.sh
new file mode 100755
index 00000000000..f8fdafe0a77
--- /dev/null
+++ b/egs/gale_arabic/s5d/run.sh
@@ -0,0 +1,163 @@
+#!/bin/bash -e
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# 2019 Dongji Gao
+# Apache 2.0
+
+# This is the recipe for GALE Arabic speech translation project.
+# It is similar to gale_arabic/s5b but with more training data.
+
+num_jobs=60
+num_decode_jobs=60
+decode_gmm=true
+stage=0
+overwrite=true
+
+# GALE Arabic phase 2 Conversation Speech
+dir1=/export/corpora/LDC/LDC2013S02/ # checked
+dir2=/export/corpora/LDC/LDC2013S07/ # checked (16k)
+text1=/export/corpora/LDC/LDC2013T04/ # checked
+text2=/export/corpora/LDC/LDC2013T17/ # checked
+# GALE Arabic phase 2 News Speech
+dir3=/export/corpora/LDC/LDC2014S07/ # checked (16k)
+dir4=/export/corpora/LDC/LDC2015S01/ # checked (16k)
+text3=/export/corpora/LDC/LDC2014T17/ # checked
+text4=/export/corpora/LDC/LDC2015T01/ # checked
+# GALE Arabic phase 3 Conversation Speech
+dir5=/export/corpora/LDC/LDC2015S11/ # checked (16k)
+dir6=/export/corpora/LDC/LDC2016S01/ # checked (16k)
+text5=/export/corpora/LDC/LDC2015T16/ # checked
+text6=/export/corpora/LDC/LDC2016T06/ # checked
+# GALE Arabic phase 3 News Speech
+dir7=/export/corpora/LDC/LDC2016S07/ # checked (16k)
+dir8=/export/corpora/LDC/LDC2017S02/ # checked (16k)
+text7=/export/corpora/LDC/LDC2016T17/ # checked
+text8=/export/corpora/LDC/LDC2017T04/ # checked
+# GALE Arabic phase 4 Conversation Speech
+dir9=/export/corpora/LDC/LDC2017S15/ # checked (16k)
+text9=/export/corpora/LDC/LDC2017T12/ # checked
+# GALE Arabic phase 4 News Speech
+dir10=/export/corpora/LDC/LDC2018S05/ # checked (16k)
+text10=/export/corpora/LDC/LDC2018T14/ # checked
+
+# Training: 941h Testing: 10.4h
+
+galeData=GALE
+mgb2_dir=""
+giga_dir=""
+
+# preference on how to process xml file (use xml binary or python)
+process_xml=""
+
+run_rnnlm=false
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+ ## This relates to the queue.
+. ./path.sh
+. ./utils/parse_options.sh # e.g. this parses the above options
+ # if supplied.
+
+if [ $stage -le 0 ]; then
+
+ if [ -f data/train/text ] && ! $overwrite; then
+ echo "$0: Not processing, probably script have run from wrong stage"
+ echo "Exiting with status 1 to avoid data corruption"
+ exit 1;
+ fi
+
+ echo "$0: Preparing data..."
+
+ options=""
+ [ ! -z $mgb2_dir ] && options="--process-xml python --mgb2-dir $mgb2_dir"
+ local/prepare_data.sh $options
+
+ echo "$0: Preparing lexicon and LM..."
+ local/prepare_dict.sh
+
+ utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang
+
+ local/gale_train_lms.sh data/train/text data/local/dict/lexicon.txt data/local/lm $giga_dir # giga is Arabic Gigawords
+
+ utils/format_lm.sh data/lang data/local/lm/gale_giga.o4g.kn.gz \
+ data/local/dict/lexicon.txt data/lang_test
+fi
+
+mfccdir=mfcc
+if [ $stage -le 1 ]; then
+ echo "$0: Preparing the test and train feature files..."
+ for x in dev test_p2 mt_all train; do
+ utils/fix_data_dir.sh data/$x
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj $num_jobs \
+ data/$x exp/make_mfcc/$x $mfccdir
+ utils/fix_data_dir.sh data/$x # some files fail to get mfcc for many reasons
+ steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
+ done
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: creating sub-set and training monophone system"
+ utils/subset_data_dir.sh data/train 10000 data/train.10K || exit 1;
+
+ steps/train_mono.sh --nj 40 --cmd "$train_cmd" \
+ data/train.10K data/lang exp/mono || exit 1;
+fi
+
+if [ $stage -le 3 ]; then
+ echo "$0: Aligning data using monophone system"
+ steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \
+ data/train data/lang exp/mono exp/mono_ali || exit 1;
+
+ echo "$0: training triphone system with delta features"
+ steps/train_deltas.sh --cmd "$train_cmd" \
+ 2500 30000 data/train data/lang exp/mono_ali exp/tri1 || exit 1;
+fi
+
+if [ $stage -le 4 ] && $decode_gmm; then
+ utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph
+ steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \
+ exp/tri1/graph data/dev exp/tri1/decode
+fi
+
+if [ $stage -le 5 ]; then
+ echo "$0: Aligning data and retraining and realigning with lda_mllt"
+ steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \
+ data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
+
+ steps/train_lda_mllt.sh --cmd "$train_cmd" 4000 50000 \
+ data/train data/lang exp/tri1_ali exp/tri2b || exit 1;
+fi
+
+if [ $stage -le 6 ] && $decode_gmm; then
+ utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph
+ steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \
+ exp/tri2b/graph data/dev exp/tri2b/decode
+fi
+
+if [ $stage -le 7 ]; then
+ echo "$0: Aligning data and retraining and realigning with sat_basis"
+ steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \
+ data/train data/lang exp/tri2b exp/tri2b_ali || exit 1;
+
+ steps/train_sat_basis.sh --cmd "$train_cmd" \
+ 5000 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1;
+
+ steps/align_fmllr.sh --nj $num_jobs --cmd "$train_cmd" \
+ data/train data/lang exp/tri3b exp/tri3b_ali || exit 1;
+fi
+
+if [ $stage -le 8 ] && $decode_gmm; then
+ utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph
+ steps/decode_fmllr.sh --nj $num_decode_jobs --cmd \
+ "$decode_cmd" exp/tri3b/graph data/dev exp/tri3b/decode
+fi
+
+if [ $stage -le 9 ]; then
+ echo "$0: Training a regular chain model using the e2e alignments..."
+ local/chain/run_tdnn.sh
+fi
+
+if [ $stage -le 10 ] && $run_rnnlm; then
+ local/rnnlm/run_tdnn_lstm.sh
+fi
+
+echo "$0: training succedded"
+exit 0
diff --git a/egs/gale_arabic/s5d/steps b/egs/gale_arabic/s5d/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/gale_arabic/s5d/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/gale_arabic/s5d/utils b/egs/gale_arabic/s5d/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/gale_arabic/s5d/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/gale_mandarin/s5/local/gale_data_prep_audio.sh b/egs/gale_mandarin/s5/local/gale_data_prep_audio.sh
index 21f325b9b84..0ea6cfcf9f9 100755
--- a/egs/gale_mandarin/s5/local/gale_data_prep_audio.sh
+++ b/egs/gale_mandarin/s5/local/gale_data_prep_audio.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 QCRI (author: Ahmed Ali)
# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
diff --git a/egs/gale_mandarin/s5/local/gale_data_prep_split.sh b/egs/gale_mandarin/s5/local/gale_data_prep_split.sh
index f7ca324355d..23774cd299b 100755
--- a/egs/gale_mandarin/s5/local/gale_data_prep_split.sh
+++ b/egs/gale_mandarin/s5/local/gale_data_prep_split.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 (author: Ahmed Ali, Hainan Xu)
# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
diff --git a/egs/gale_mandarin/s5/local/gale_data_prep_txt.sh b/egs/gale_mandarin/s5/local/gale_data_prep_txt.sh
index d9b82902f0d..8404529e85a 100755
--- a/egs/gale_mandarin/s5/local/gale_data_prep_txt.sh
+++ b/egs/gale_mandarin/s5/local/gale_data_prep_txt.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 (author: Ahmed Ali, Hainan Xu)
# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
diff --git a/egs/gale_mandarin/s5/local/gale_format_data.sh b/egs/gale_mandarin/s5/local/gale_format_data.sh
index fcd04e572e8..4ecac74d340 100755
--- a/egs/gale_mandarin/s5/local/gale_format_data.sh
+++ b/egs/gale_mandarin/s5/local/gale_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 QCRI (author: Ahmed Ali)
# Apache 2.0
diff --git a/egs/gale_mandarin/s5/local/gale_prep_dict.sh b/egs/gale_mandarin/s5/local/gale_prep_dict.sh
index c6a80240754..bc7c91b7fc7 100755
--- a/egs/gale_mandarin/s5/local/gale_prep_dict.sh
+++ b/egs/gale_mandarin/s5/local/gale_prep_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# prepare dictionary for HKUST
# it is done for English and Chinese separately,
# For English, we use CMU dictionary, and Sequitur G2P
@@ -41,7 +41,7 @@ cat $dict_dir/vocab-full.txt | grep -v '[a-zA-Z]' | \
if [ ! -f $dict_dir/cmudict/cmudict.0.7a ]; then
echo "--- Downloading CMU dictionary ..."
svn co http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/ $dict_dir/cmudict || \
- wget -e robots=off -r -np -nH --cut-dirs=4 -R index.html http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/ -P $dict_dir || exit 1
+ wget -c -e robots=off -r -np -nH --cut-dirs=4 -R index.html http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/ -P $dict_dir || exit 1
fi
if [ ! -f $dict_dir/cmudict/scripts/make_baseform.pl ] ; then
diff --git a/egs/gale_mandarin/s5/local/gale_train_lms.sh b/egs/gale_mandarin/s5/local/gale_train_lms.sh
index b70bf8de564..11573d06ffe 100755
--- a/egs/gale_mandarin/s5/local/gale_train_lms.sh
+++ b/egs/gale_mandarin/s5/local/gale_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
diff --git a/egs/gale_mandarin/s5/local/nnet/run_dnn.sh b/egs/gale_mandarin/s5/local/nnet/run_dnn.sh
index 31159ae1754..79a20851a94 100755
--- a/egs/gale_mandarin/s5/local/nnet/run_dnn.sh
+++ b/egs/gale_mandarin/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 (author: Hainan Xu, Ahmed Ali)
# Apache 2.0
diff --git a/egs/gale_mandarin/s5/local/score_combine.sh b/egs/gale_mandarin/s5/local/score_combine.sh
index 65caab06ecc..c4d3c13886a 100755
--- a/egs/gale_mandarin/s5/local/score_combine.sh
+++ b/egs/gale_mandarin/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Arnab Ghoshal
diff --git a/egs/gale_mandarin/s5/local/score_mbr.sh b/egs/gale_mandarin/s5/local/score_mbr.sh
index 04b84ccce5a..8c752368906 100755
--- a/egs/gale_mandarin/s5/local/score_mbr.sh
+++ b/egs/gale_mandarin/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Script for minimum bayes risk decoding.
diff --git a/egs/gale_mandarin/s5/local/split_wer_per_corpus.sh b/egs/gale_mandarin/s5/local/split_wer_per_corpus.sh
index baaf55e50a9..a438a142891 100755
--- a/egs/gale_mandarin/s5/local/split_wer_per_corpus.sh
+++ b/egs/gale_mandarin/s5/local/split_wer_per_corpus.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Report WER for reports and conversational
# Copyright 2014 QCRI (author: Ahmed Ali)
diff --git a/egs/gale_mandarin/s5/run.sh b/egs/gale_mandarin/s5/run.sh
index fe9fdbdd483..7cb89ab6b65 100755
--- a/egs/gale_mandarin/s5/run.sh
+++ b/egs/gale_mandarin/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 (author: Hainan Xu, Ahmed Ali)
# Apache 2.0
diff --git a/egs/gop/README.md b/egs/gop/README.md
new file mode 100644
index 00000000000..d95f4e966fd
--- /dev/null
+++ b/egs/gop/README.md
@@ -0,0 +1,98 @@
+There is a copy of this document on Google Docs, which renders the equations better:
+[link](https://docs.google.com/document/d/1pie-PU6u2NZZC_FzocBGGm6mpfBJMiCft9UoG0uA1kA/edit?usp=sharing)
+
+* * *
+
+# GOP on Kaldi
+
+The Goodness of Pronunciation (GOP) is a variation of the posterior probability, for phone level pronunciation scoring.
+GOP is widely used in pronunciation evaluation and mispronunciation detection tasks.
+
+This implementation is mainly based on the following paper:
+
+Hu, W., Qian, Y., Soong, F. K., & Wang, Y. (2015). Improved mispronunciation detection with deep neural network trained acoustic models and transfer learning based logistic regression classifiers. Speech Communication, 67(January), 154-166.
+
+## GOP-GMM
+
+In the conventional GMM-HMM based system, GOP was first proposed in (Witt et al., 2000). It was defined as the duration normalised log of the posterior:
+
+$$
+GOP(p)=\frac{1}{t_e-t_s+1} \log p(p|\mathbf o)
+$$
+
+where $\mathbf o$ is the input observations, $p$ is the canonical phone, $t_s, t_e$ are the start and end frame indexes.
+
+Assuming $p(q_i)\approx p(q_j)$ for any $q_i, q_j$, we have:
+
+$$
+\log p(p|\mathbf o)=\frac{p(\mathbf o|p)p(p)}{\sum_{q\in Q} p(\mathbf o|q)p(q)}
+ \approx\frac{p(\mathbf o|p)}{\sum_{q\in Q} p(\mathbf o|q)}
+$$
+
+where $Q$ is the whole phone set.
+
+The numerator of the equation is calculated from forced alignment result and the denominator is calculated from an Viterbi decoding with a unconstrained phone loop.
+
+We do not implement GOP-GMM for Kaldi, as GOP-NN performs much better than GOP-GMM.
+
+## GOP-NN
+
+The definition of GOP-NN is a bit different from the GOP-GMM. GOP-NN was defined as the log phone posterior ratio between the canonical phone and the one with the highest score (Hu et al., 2015).
+
+Firstly we define Log Phone Posterior (LPP):
+
+$$
+LPP(p)=\log p(p|\mathbf o; t_s,t_e)
+$$
+
+Then we define the GOP-NN using LPP:
+
+$$
+GOP(p)=\log \frac{LPP(p)}{\max_{q\in Q} LPP(q)}
+$$
+
+LPP could be calculated as:
+
+$$
+LPP(p) \approx \frac{1}{t_e-t_s+1} \sum_{t=t_s}^{t_e}\log p(p|o_t)
+$$
+
+$$
+p(p|o_t) = \sum_{s \in p} p(s|o_t)
+$$
+
+where $s$ is the senone label, $\{s|s \in p\}$ is the states belonging to those triphones whose current phone is $p$.
+
+## Phone-level Feature
+
+Normally the classifier-based approach archives better performance than GOP-based approach.
+
+Different from GOP based method, an extra supervised training process is needed. The input features for supervised training are phone-level, segmental features. The phone-level feature is defined as:
+
+$$
+{[LPP(p_1),\cdots,LPP(p_M), LPR(p_1|p_i), \cdots, LPR(p_j|p_i),\cdots]}^T
+$$
+
+where the Log Posterior Ratio (LPR) between phone $p_j$ and $p_i$ is defined as:
+
+$$
+LPR(p_j|p_i) = \log p(p_j|\mathbf o; t_s, t_e) - \log p(p_i|\mathbf o; t_s, t_e)
+$$
+
+## Implementation
+
+This implementation consists of a executable binary `bin/compute-gop` and some scripts.
+
+`compute-gop` computes GOP and extracts phone-level features using nnet output probabilities.
+The output probabilities are assumed to be from a log-softmax layer.
+
+The script `run.sh` shows a typical pipeline based on librispeech's model and data.
+
+In Hu's paper, GOP was computed using a feed-forward DNN.
+We have tried to use the output-xent of a chain model to compute GOP, but the result was not good.
+We guess the HMM topo of chain model may not fit for GOP.
+
+The nnet3's TDNN (no chain) model performs well in GOP computing, so this recipe uses it.
+
+## Acknowledgement
+The author of this recipe would like to thank Xingyu Na for his works of model tuning and his helpful suggestions.
diff --git a/egs/gop/s5/cmd.sh b/egs/gop/s5/cmd.sh
new file mode 100644
index 00000000000..9139633e57a
--- /dev/null
+++ b/egs/gop/s5/cmd.sh
@@ -0,0 +1,13 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine). queue.pl works with GridEngine (qsub). slurm.pl works
+# with slurm. Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration. Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export cmd="run.pl"
diff --git a/egs/gop/s5/local/make_testcase.sh b/egs/gop/s5/local/make_testcase.sh
new file mode 100755
index 00000000000..93ed03e3653
--- /dev/null
+++ b/egs/gop/s5/local/make_testcase.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+src=$1
+dst=$2
+
+# Select a very small set for testing
+utils/subset_data_dir.sh --shortest $src 10 $dst
+
+# make fake transcripts as negative examples
+cp $dst/text $dst/text.ori
+sed -i "s/ THERE / THOSE /" $dst/text
+sed -i "s/ IN / ON /" $dst/text
diff --git a/egs/gop/s5/local/remove_phone_markers.pl b/egs/gop/s5/local/remove_phone_markers.pl
new file mode 100755
index 00000000000..16236a749cf
--- /dev/null
+++ b/egs/gop/s5/local/remove_phone_markers.pl
@@ -0,0 +1,72 @@
+#!/usr/bin/env perl
+# Copyright 2019 Junbo Zhang
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+my $Usage = <new phone mapping file, in which each line is: "old-integer-id new-integer-id.
+
+Usage: utils/remove_phone_markers.pl
+ e.g.: utils/remove_phone_markers.pl phones.txt phones-pure.txt phone-to-pure-phone.int
+EOU
+
+if (@ARGV < 3) {
+ die $Usage;
+}
+
+my $old_phone_symbols_filename = shift @ARGV;
+my $new_phone_symbols_filename = shift @ARGV;
+my $mapping_filename = shift @ARGV;
+
+my %id_of_old_phone;
+open(IN, $old_phone_symbols_filename) or die "Can't open $old_phone_symbols_filename";
+while () {
+ chomp;
+ my ($phone, $id) = split;
+ next if $phone =~ /\#/;
+ $id_of_old_phone{$phone} = $id;
+}
+close IN;
+
+my $new_id = 0;
+my %id_of_new_phone;
+my %id_old_to_new;
+foreach (sort { $id_of_old_phone{$a} <=> $id_of_old_phone{$b} } keys %id_of_old_phone) {
+ my $old_phone = $_;
+ s/_[BIES]//;
+ s/\d//;
+ my $new_phone = $_;
+ $id_of_new_phone{$new_phone} = $new_id++ if not exists $id_of_new_phone{$new_phone};
+ $id_old_to_new{$id_of_old_phone{$old_phone}} = $id_of_new_phone{$new_phone};
+}
+
+# Write to file
+open(OUT, ">$new_phone_symbols_filename") or die "Can\'t write to $new_phone_symbols_filename";
+foreach (sort { $id_of_new_phone{$a} <=> $id_of_new_phone{$b} } keys %id_of_new_phone) {
+ print OUT "$_\t$id_of_new_phone{$_}\n";
+}
+close OUT;
+
+open(OUT, ">$mapping_filename") or die "Can\'t write to $mapping_filename";
+foreach (sort { $a <=> $b } keys %id_old_to_new) {
+ next if $_ == 0;
+ print OUT "$_ $id_old_to_new{$_}\n";
+}
+close OUT;
diff --git a/egs/gop/s5/path.sh b/egs/gop/s5/path.sh
new file mode 100755
index 00000000000..03df6dd9f2b
--- /dev/null
+++ b/egs/gop/s5/path.sh
@@ -0,0 +1,27 @@
+export KALDI_ROOT=`pwd`/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
+
+# we use this both in the (optional) LM training and the G2P-related scripts
+PYTHON='python2.7'
+
+### Below are the paths used by the optional parts of the recipe
+
+# We only need the Festival stuff below for the optional text normalization(for LM-training) step
+FEST_ROOT=tools/festival
+NSW_PATH=${FEST_ROOT}/festival/bin:${FEST_ROOT}/nsw/bin
+export PATH=$PATH:$NSW_PATH
+
+# SRILM is needed for LM model building
+SRILM_ROOT=$KALDI_ROOT/tools/srilm
+SRILM_PATH=$SRILM_ROOT/bin:$SRILM_ROOT/bin/i686-m64
+export PATH=$PATH:$SRILM_PATH
+
+# Sequitur G2P executable
+sequitur=$KALDI_ROOT/tools/sequitur/g2p.py
+sequitur_path="$(dirname $sequitur)/lib/$PYTHON/site-packages"
+
+# Directory under which the LM training corpus should be extracted
+LM_CORPUS_ROOT=./lm-corpus
diff --git a/egs/gop/s5/run.sh b/egs/gop/s5/run.sh
new file mode 100755
index 00000000000..f9ae0d45672
--- /dev/null
+++ b/egs/gop/s5/run.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Junbo Zhang
+# Apache 2.0
+
+# This script shows how to calculate Goodness of Pronunciation (GOP) and
+# extract phone-level pronunciation feature for mispronunciations detection
+# tasks. Read ../README.md or the following paper for details:
+#
+# "Hu et al., Improved mispronunciation detection with deep neural network
+# trained acoustic models and transfer learning based logistic regression
+# classifiers, 2015."
+
+# You might not want to do this for interactive shells.
+set -e
+
+# Before running this recipe, you have to run the librispeech recipe firstly.
+# This script assumes the following paths exist.
+librispeech_eg=../../librispeech/s5
+model=$librispeech_eg/exp/nnet3_cleaned/tdnn_sp
+ivector=$librispeech_eg/exp/nnet3_cleaned/ivectors_test_clean_hires
+lang=$librispeech_eg/data/lang
+test_data=$librispeech_eg/data/test_clean_hires
+
+for d in $model $ivector $lang $test_data; do
+ [ ! -d $d ] && echo "$0: no such path $d" && exit 1;
+done
+
+# Global configurations
+stage=0
+nj=4
+
+data=test_10short
+dir=exp/gop_$data
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+
+if [ $stage -le 0 ]; then
+ # Prepare test data
+ [ -d data ] || mkdir -p data/$data
+ local/make_testcase.sh $test_data data/$data
+fi
+
+if [ $stage -le 1 ]; then
+ # Compute Log-likelihoods
+ steps/nnet3/compute_output.sh --cmd "$cmd" --nj $nj \
+ --online-ivector-dir $ivector data/$data $model exp/probs_$data
+fi
+
+if [ $stage -le 2 ]; then
+ steps/nnet3/align.sh --cmd "$cmd" --nj $nj --use_gpu false \
+ --online_ivector_dir $ivector data/$data $lang $model $dir
+fi
+
+if [ $stage -le 3 ]; then
+ # make a map which converts phones to "pure-phones"
+ # "pure-phone" means the phone whose stress and pos-in-word markers are ignored
+ # eg. AE1_B --> AE, EH2_S --> EH, SIL --> SIL
+ local/remove_phone_markers.pl $lang/phones.txt $dir/phones-pure.txt \
+ $dir/phone-to-pure-phone.int
+
+ # Convert transition-id to pure-phone id
+ $cmd JOB=1:$nj $dir/log/ali_to_phones.JOB.log \
+ ali-to-phones --per-frame=true $model/final.mdl "ark,t:gunzip -c $dir/ali.JOB.gz|" \
+ "ark,t:-" \| utils/apply_map.pl -f 2- $dir/phone-to-pure-phone.int \| \
+ gzip -c \>$dir/ali-pure-phone.JOB.gz || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+ # The outputs of the binary compute-gop are the GOPs and the phone-level features.
+ #
+ # An example of the GOP result (extracted from "ark,t:$dir/gop.3.txt"):
+ # 4446-2273-0031 [ 1 0 ] [ 12 0 ] [ 27 -5.382001 ] [ 40 -13.91807 ] [ 1 -0.2555897 ] \
+ # [ 21 -0.2897284 ] [ 5 0 ] [ 31 0 ] [ 33 0 ] [ 3 -11.43557 ] [ 25 0 ] \
+ # [ 16 0 ] [ 30 -0.03224623 ] [ 5 0 ] [ 25 0 ] [ 33 0 ] [ 1 0 ]
+ # It is in the posterior format, where each pair stands for [pure-phone-index gop-value].
+ # For example, [ 27 -5.382001 ] means the GOP of the pure-phone 27 (it corresponds to the
+ # phone "OW", according to "$dir/phones-pure.txt") is -5.382001, indicating the audio
+ # segment of this phone should be a mispronunciation.
+ #
+ # The phone-level features are in matrix format:
+ # 4446-2273-0031 [ -0.2462088 -10.20292 -11.35369 ...
+ # -8.584108 -7.629755 -13.04877 ...
+ # ...
+ # ... ]
+ # The row number is the phone number of the utterance. In this case, it is 17.
+ # The column number is 2 * (pure-phone set size), as the feature is consist of LLR + LPR.
+ # The phone-level features can be used to train a classifier with human labels. See Hu's
+ # paper for detail.
+ $cmd JOB=1:$nj $dir/log/compute_gop.JOB.log \
+ compute-gop --phone-map=$dir/phone-to-pure-phone.int $model/final.mdl \
+ "ark,t:gunzip -c $dir/ali-pure-phone.JOB.gz|" \
+ "ark:exp/probs_$data/output.JOB.ark" \
+ "ark,t:$dir/gop.JOB.txt" "ark,t:$dir/phonefeat.JOB.txt" || exit 1;
+ echo "Done compute-gop, the results: \"$dir/gop..txt\" in posterior format."
+
+ # We set -5 as a universal empirical threshold here. You can also determine multiple phone
+ # dependent thresholds based on the human-labeled mispronunciation data.
+ echo "The phones whose gop values less than -5 could be treated as mispronunciations."
+fi
diff --git a/egs/gop/s5/steps b/egs/gop/s5/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/gop/s5/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/gop/s5/utils b/egs/gop/s5/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/gop/s5/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/gp/s1/install.sh b/egs/gp/s1/install.sh
index 0222b098664..ab486ee1f97 100755
--- a/egs/gp/s1/install.sh
+++ b/egs/gp/s1/install.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Arnab Ghoshal
diff --git a/egs/gp/s1/local/gp_train_multi_ubm.sh b/egs/gp/s1/local/gp_train_multi_ubm.sh
index 9afc78ae7c0..f6d4abc19c8 100755
--- a/egs/gp/s1/local/gp_train_multi_ubm.sh
+++ b/egs/gp/s1/local/gp_train_multi_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
diff --git a/egs/gp/s1/steps/align_deltas.sh b/egs/gp/s1/steps/align_deltas.sh
index 22da04432c7..a9d25853fdf 100755
--- a/egs/gp/s1/steps/align_deltas.sh
+++ b/egs/gp/s1/steps/align_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2012 Microsoft Corporation; Arnab Ghoshal
diff --git a/egs/gp/s1/steps/decode_deltas.sh b/egs/gp/s1/steps/decode_deltas.sh
index 98f9c8ed337..58cf9ae3873 100755
--- a/egs/gp/s1/steps/decode_deltas.sh
+++ b/egs/gp/s1/steps/decode_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2011 Microsoft Corporation
diff --git a/egs/gp/s1/steps/train_deltas.sh b/egs/gp/s1/steps/train_deltas.sh
index 0efe7b60379..975faa93bc6 100755
--- a/egs/gp/s1/steps/train_deltas.sh
+++ b/egs/gp/s1/steps/train_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
diff --git a/egs/gp/s1/steps/train_mono.sh b/egs/gp/s1/steps/train_mono.sh
index e82c14fcaf2..10970a5714b 100755
--- a/egs/gp/s1/steps/train_mono.sh
+++ b/egs/gp/s1/steps/train_mono.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
diff --git a/egs/gp/s1/steps/train_trees.sh b/egs/gp/s1/steps/train_trees.sh
index 9a3a51c8654..52511a0a5c6 100755
--- a/egs/gp/s1/steps/train_trees.sh
+++ b/egs/gp/s1/steps/train_trees.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
diff --git a/egs/gp/s1/steps/train_ubm_deltas.sh b/egs/gp/s1/steps/train_ubm_deltas.sh
index 7a666ca7668..73617e2a1d6 100755
--- a/egs/gp/s1/steps/train_ubm_deltas.sh
+++ b/egs/gp/s1/steps/train_ubm_deltas.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Arnab Ghoshal
# Copyright 2010-2011 Microsoft Corporation
diff --git a/egs/gp/s1/utils/lmrescore.sh b/egs/gp/s1/utils/lmrescore.sh
index c911d0ce8b0..9e706395c4f 100755
--- a/egs/gp/s1/utils/lmrescore.sh
+++ b/egs/gp/s1/utils/lmrescore.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2012 Microsoft Corporation; Arnab Ghoshal
diff --git a/egs/gp/s1/utils/mkgraph.sh b/egs/gp/s1/utils/mkgraph.sh
index 3aba742832d..c9225a63b81 100755
--- a/egs/gp/s1/utils/mkgraph.sh
+++ b/egs/gp/s1/utils/mkgraph.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/score_lats.sh b/egs/gp/s1/utils/score_lats.sh
index 5aed89b5ef4..fadb3d635f5 100755
--- a/egs/gp/s1/utils/score_lats.sh
+++ b/egs/gp/s1/utils/score_lats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/score_sclite.sh b/egs/gp/s1/utils/score_sclite.sh
index 9e7426e84b7..49e2398095b 100755
--- a/egs/gp/s1/utils/score_sclite.sh
+++ b/egs/gp/s1/utils/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/score_text.sh b/egs/gp/s1/utils/score_text.sh
index cf485261bca..b71e0bc030e 100755
--- a/egs/gp/s1/utils/score_text.sh
+++ b/egs/gp/s1/utils/score_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2011 Microsoft Corporation
if [ -f ./path.sh ]; then . ./path.sh; fi
diff --git a/egs/gp/s1/utils/split_data.sh b/egs/gp/s1/utils/split_data.sh
index 19431aa5c6d..e8f5f048edb 100755
--- a/egs/gp/s1/utils/split_data.sh
+++ b/egs/gp/s1/utils/split_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s5/local/gp_install.sh b/egs/gp/s5/local/gp_install.sh
index 85d16bc5c21..6f65704e5f4 100755
--- a/egs/gp/s5/local/gp_install.sh
+++ b/egs/gp/s5/local/gp_install.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Arnab Ghoshal
diff --git a/egs/gp/s5/local/score.sh b/egs/gp/s5/local/score.sh
index d74868282f1..802be09ba9c 100755
--- a/egs/gp/s5/local/score.sh
+++ b/egs/gp/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/heroico/s5/local/chain/compare_wer.sh b/egs/heroico/s5/local/chain/compare_wer.sh
index 3ee755dee36..157e618927b 100755
--- a/egs/heroico/s5/local/chain/compare_wer.sh
+++ b/egs/heroico/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
index 361879b4142..715e7f398b7 100755
--- a/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# run_cnn_tdnn_1a.sh is modified from run_tdnn_1b.sh but taking
# the xconfig from mini-librispeech's run_cnn_tdnn_1a54.sh; only
diff --git a/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh
index 290bd4c7970..42391b37e5f 100755
--- a/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# local/chain/compare_wer.sh exp/chain/tdnn1a_sp
# ./local/chain/compare_wer.sh exp/chain/tdnn1a_sp
diff --git a/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh
index cfb4dc1f697..b8f397baff8 100755
--- a/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1b is as 1a but a re-tuned model with quite a few changes, including moving to
# a resnet-style factored TDNN-F model.
diff --git a/egs/heroico/s5/local/get_wav_list.sh b/egs/heroico/s5/local/get_wav_list.sh
index 2d79079935b..419b449f2ab 100755
--- a/egs/heroico/s5/local/get_wav_list.sh
+++ b/egs/heroico/s5/local/get_wav_list.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 John Morgan
# Apache 2.0.
diff --git a/egs/heroico/s5/local/nnet3/run_ivector_common.sh b/egs/heroico/s5/local/nnet3/run_ivector_common.sh
index e882ce0c918..4d2014cd4fb 100755
--- a/egs/heroico/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/heroico/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/heroico/s5/local/prepare_data.sh b/egs/heroico/s5/local/prepare_data.sh
index b78d9f1d1cb..d5fcd782766 100755
--- a/egs/heroico/s5/local/prepare_data.sh
+++ b/egs/heroico/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 John Morgan
# Apache 2.0.
diff --git a/egs/heroico/s5/local/prepare_lm.sh b/egs/heroico/s5/local/prepare_lm.sh
index e2a92ba3c5a..7072b9b7088 100755
--- a/egs/heroico/s5/local/prepare_lm.sh
+++ b/egs/heroico/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 John Morgan
# Apache 2.0.
diff --git a/egs/heroico/s5/local/subs_download.sh b/egs/heroico/s5/local/subs_download.sh
index 98dcb42d4e0..ab236a8ecb5 100755
--- a/egs/heroico/s5/local/subs_download.sh
+++ b/egs/heroico/s5/local/subs_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 John Morgan
# Apache 2.0.
diff --git a/egs/heroico/s5/run.sh b/egs/heroico/s5/run.sh
index 4cc5617e985..c990468a9db 100755
--- a/egs/heroico/s5/run.sh
+++ b/egs/heroico/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
. ./path.sh
diff --git a/egs/hkust/s5/local/chain/compare_wer.sh b/egs/hkust/s5/local/chain/compare_wer.sh
index 27a6b783433..364d40d4ec4 100755
--- a/egs/hkust/s5/local/chain/compare_wer.sh
+++ b/egs/hkust/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Emotech LTD (Author: Xuechen Liu)
# compare wer between diff. models in hkust chain directory
diff --git a/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh
index c62b776de2b..85e2323688e 100755
--- a/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh
+++ b/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on run_tdnn_7p.sh in swbd chain recipe.
diff --git a/egs/hkust/s5/local/ext/score.sh b/egs/hkust/s5/local/ext/score.sh
index e4009abfe94..f6f5d5af61d 100755
--- a/egs/hkust/s5/local/ext/score.sh
+++ b/egs/hkust/s5/local/ext/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012-2013.
# Hong Kong University of Science and Technology (Ricky Chan) 2013. Apache 2.0.
diff --git a/egs/hkust/s5/local/ext/score_basic_ext.sh b/egs/hkust/s5/local/ext/score_basic_ext.sh
index c54c732edf1..1378cbd16c5 100755
--- a/egs/hkust/s5/local/ext/score_basic_ext.sh
+++ b/egs/hkust/s5/local/ext/score_basic_ext.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012-2013.
# Hong Kong University of Science and Technology (Author: Ricky Chan Ho Yin). Apache 2.0.
#
diff --git a/egs/hkust/s5/local/hkust_data_prep.sh b/egs/hkust/s5/local/hkust_data_prep.sh
index 6342ccfe861..7857f2c6722 100755
--- a/egs/hkust/s5/local/hkust_data_prep.sh
+++ b/egs/hkust/s5/local/hkust_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./path.sh || exit 1;
diff --git a/egs/hkust/s5/local/hkust_format_data.sh b/egs/hkust/s5/local/hkust_format_data.sh
index 7fc9b701f49..7396485e4bb 100755
--- a/egs/hkust/s5/local/hkust_format_data.sh
+++ b/egs/hkust/s5/local/hkust_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
if [ -f ./path.sh ]; then . ./path.sh; fi
diff --git a/egs/hkust/s5/local/hkust_prepare_dict.sh b/egs/hkust/s5/local/hkust_prepare_dict.sh
index 49f27f2f868..0f3f26efa53 100755
--- a/egs/hkust/s5/local/hkust_prepare_dict.sh
+++ b/egs/hkust/s5/local/hkust_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 LeSpeech (Author: Xingyu Na)
# prepare dictionary for HKUST
diff --git a/egs/hkust/s5/local/hkust_train_lms.sh b/egs/hkust/s5/local/hkust_train_lms.sh
index 8520bb26d2d..7d83ffd00fc 100755
--- a/egs/hkust/s5/local/hkust_train_lms.sh
+++ b/egs/hkust/s5/local/hkust_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
diff --git a/egs/hkust/s5/local/nnet/run_cnn.sh b/egs/hkust/s5/local/nnet/run_cnn.sh
index e0b7e10df86..1d92bcb0cb5 100755
--- a/egs/hkust/s5/local/nnet/run_cnn.sh
+++ b/egs/hkust/s5/local/nnet/run_cnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
diff --git a/egs/hkust/s5/local/nnet/run_dnn.sh b/egs/hkust/s5/local/nnet/run_dnn.sh
index e7e229b1d2b..4cfa53a9cc7 100755
--- a/egs/hkust/s5/local/nnet/run_dnn.sh
+++ b/egs/hkust/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# 2014 Guoguo Chen
diff --git a/egs/hkust/s5/local/nnet/run_lstm.sh b/egs/hkust/s5/local/nnet/run_lstm.sh
index ec5d0e3a856..6a15f87c1d1 100755
--- a/egs/hkust/s5/local/nnet/run_lstm.sh
+++ b/egs/hkust/s5/local/nnet/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/hkust/s5/local/nnet2/run_5d.sh b/egs/hkust/s5/local/nnet2/run_5d.sh
index b97f5c8af99..d73c04e6112 100755
--- a/egs/hkust/s5/local/nnet2/run_5d.sh
+++ b/egs/hkust/s5/local/nnet2/run_5d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This runs on the full training set (with duplicates removed), with p-norm
diff --git a/egs/hkust/s5/local/nnet2/run_convnet.sh b/egs/hkust/s5/local/nnet2/run_convnet.sh
index 56b81c42a11..f21727643b8 100755
--- a/egs/hkust/s5/local/nnet2/run_convnet.sh
+++ b/egs/hkust/s5/local/nnet2/run_convnet.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 2015 Xingyu Na
# This script runs on the full training set, using ConvNet setup on top of
diff --git a/egs/hkust/s5/local/nnet3/compare_wer.sh b/egs/hkust/s5/local/nnet3/compare_wer.sh
index 252fab12e18..8216ed6b00e 100755
--- a/egs/hkust/s5/local/nnet3/compare_wer.sh
+++ b/egs/hkust/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Emotech LTD (Author: Xuechen Liu)
# compare wer between diff. models in hkust nnet3 directory
diff --git a/egs/hkust/s5/local/nnet3/run_ivector_common.sh b/egs/hkust/s5/local/nnet3/run_ivector_common.sh
index de952e08904..e7c79cdf3bf 100755
--- a/egs/hkust/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/hkust/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is modified based on swbd/s5c/local/nnet3/run_ivector_common.sh
diff --git a/egs/hkust/s5/local/nnet3/run_lstm.sh b/egs/hkust/s5/local/nnet3/run_lstm.sh
index 2a470868298..279302759d9 100755
--- a/egs/hkust/s5/local/nnet3/run_lstm.sh
+++ b/egs/hkust/s5/local/nnet3/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is a basic lstm script
diff --git a/egs/hkust/s5/local/nnet3/tuning/run_tdnn_2a.sh b/egs/hkust/s5/local/nnet3/tuning/run_tdnn_2a.sh
index 1cdbbf3bb2c..7339ce595c6 100755
--- a/egs/hkust/s5/local/nnet3/tuning/run_tdnn_2a.sh
+++ b/egs/hkust/s5/local/nnet3/tuning/run_tdnn_2a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on run_tdnn_7h.sh in swbd chain recipe.
# exp 2a: change the step of making configs, using xconfig with
diff --git a/egs/hkust/s5/local/online/run_nnet2_common.sh b/egs/hkust/s5/local/online/run_nnet2_common.sh
index 185bca38d8f..737694e625a 100755
--- a/egs/hkust/s5/local/online/run_nnet2_common.sh
+++ b/egs/hkust/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
set -e
diff --git a/egs/hkust/s5/local/online/run_nnet2_ms.sh b/egs/hkust/s5/local/online/run_nnet2_ms.sh
index c3177e1136e..4c3f01de3fc 100755
--- a/egs/hkust/s5/local/online/run_nnet2_ms.sh
+++ b/egs/hkust/s5/local/online/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
set -e
diff --git a/egs/hkust/s5/local/run_discriminative.sh b/egs/hkust/s5/local/run_discriminative.sh
index f7c399c65dd..6d62c493cd0 100755
--- a/egs/hkust/s5/local/run_discriminative.sh
+++ b/egs/hkust/s5/local/run_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Alibaba Robotics Corp. (Author: Xingyu Na)
# Apache 2.0
diff --git a/egs/hkust/s5/local/run_sgmm.sh b/egs/hkust/s5/local/run_sgmm.sh
index 74c4811f0a3..58ce1b6dafa 100755
--- a/egs/hkust/s5/local/run_sgmm.sh
+++ b/egs/hkust/s5/local/run_sgmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Alibaba Robotics Corp. (Author: Xingyu Na)
# Apache2.0
diff --git a/egs/hkust/s5/local/score.sh b/egs/hkust/s5/local/score.sh
index a9786169973..d283ceb68dc 100755
--- a/egs/hkust/s5/local/score.sh
+++ b/egs/hkust/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
set -x
diff --git a/egs/hkust/s5/run.sh b/egs/hkust/s5/run.sh
index 9b684d5a215..9347598464c 100755
--- a/egs/hkust/s5/run.sh
+++ b/egs/hkust/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Chao Weng
# 2016 Alibaba Robotics Corp. (Author: Xingyu Na)
diff --git a/egs/hub4_english/s5/local/data_prep/prepare_1996_bn_data.sh b/egs/hub4_english/s5/local/data_prep/prepare_1996_bn_data.sh
index ea4e5699ce3..da355ddfc19 100755
--- a/egs/hub4_english/s5/local/data_prep/prepare_1996_bn_data.sh
+++ b/egs/hub4_english/s5/local/data_prep/prepare_1996_bn_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# 2017 Vimal Manohar
# License: Apache 2.0
diff --git a/egs/hub4_english/s5/local/data_prep/prepare_1997_bn_data.sh b/egs/hub4_english/s5/local/data_prep/prepare_1997_bn_data.sh
index 5f049f7831c..865d6eb54d4 100755
--- a/egs/hub4_english/s5/local/data_prep/prepare_1997_bn_data.sh
+++ b/egs/hub4_english/s5/local/data_prep/prepare_1997_bn_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# 2017 Vimal Manohar
# License: Apache 2.0
diff --git a/egs/hub4_english/s5/local/format_lms.sh b/egs/hub4_english/s5/local/format_lms.sh
index 1d18209aa60..7afdf51757f 100755
--- a/egs/hub4_english/s5/local/format_lms.sh
+++ b/egs/hub4_english/s5/local/format_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2014 Nickolay V. Shmyrev
# Apache 2.0
diff --git a/egs/hub4_english/s5/local/prepare_dict.sh b/egs/hub4_english/s5/local/prepare_dict.sh
index 3f53ec6af74..acbbd6de364 100755
--- a/egs/hub4_english/s5/local/prepare_dict.sh
+++ b/egs/hub4_english/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2010-2012 Microsoft Corporation
# 2012-2014 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/hub4_english/s5/local/run_cleanup_segmentation.sh b/egs/hub4_english/s5/local/run_cleanup_segmentation.sh
index e91ec318650..3d3558d032c 100755
--- a/egs/hub4_english/s5/local/run_cleanup_segmentation.sh
+++ b/egs/hub4_english/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vimal Manohar
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/hub4_english/s5/local/run_segmentation_wsj.sh b/egs/hub4_english/s5/local/run_segmentation_wsj.sh
index a321abe9a29..89598e7916d 100755
--- a/egs/hub4_english/s5/local/run_segmentation_wsj.sh
+++ b/egs/hub4_english/s5/local/run_segmentation_wsj.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016-18 Vimal Manohar
# Apache 2.0
diff --git a/egs/hub4_english/s5/local/score_sclite.sh b/egs/hub4_english/s5/local/score_sclite.sh
index add014c2dcc..f8ec12e8339 100755
--- a/egs/hub4_english/s5/local/score_sclite.sh
+++ b/egs/hub4_english/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
# begin configuration section.
diff --git a/egs/hub4_english/s5/local/train_lm.sh b/egs/hub4_english/s5/local/train_lm.sh
index 4378a287d42..04f979af135 100755
--- a/egs/hub4_english/s5/local/train_lm.sh
+++ b/egs/hub4_english/s5/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vimal Manohar
# Apache 2.0
diff --git a/egs/hub4_english/s5/run.sh b/egs/hub4_english/s5/run.sh
index 5db61d4eb10..f9e6cf4ab2c 100755
--- a/egs/hub4_english/s5/run.sh
+++ b/egs/hub4_english/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vimal Manohar
# Apache 2.0.
diff --git a/egs/hub4_spanish/s5/local/chain/compare_wer.sh b/egs/hub4_spanish/s5/local/chain/compare_wer.sh
index 0194b86ac69..72bb3ab1dc9 100755
--- a/egs/hub4_spanish/s5/local/chain/compare_wer.sh
+++ b/egs/hub4_spanish/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
index d1b657a2d74..b18d8ec0aaa 100755
--- a/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
## This is taken from mini_librispeech.
diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh
index 40bbbe1ae79..0c13f46ffa6 100755
--- a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
## This is taken from mini_librispeech, but the proportional-shrink value was
# tuned for this corpus (hub4-spanish)
diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh
index a498d8157f3..159f3134fb4 100755
--- a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
## This is taken from mini_librispeech
diff --git a/egs/hub4_spanish/s5/local/nnet3/run_ivector_common.sh b/egs/hub4_spanish/s5/local/nnet3/run_ivector_common.sh
index 623e0e7e02f..14387b7a30c 100755
--- a/egs/hub4_spanish/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/hub4_spanish/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/hub4_spanish/s5/local/prepare_data.sh b/egs/hub4_spanish/s5/local/prepare_data.sh
index cadefe560a8..6a8e8a60213 100755
--- a/egs/hub4_spanish/s5/local/prepare_data.sh
+++ b/egs/hub4_spanish/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/hub4_spanish/s5/local/prepare_lexicon.sh b/egs/hub4_spanish/s5/local/prepare_lexicon.sh
index 1460e60e558..c54bdbc92a3 100755
--- a/egs/hub4_spanish/s5/local/prepare_lexicon.sh
+++ b/egs/hub4_spanish/s5/local/prepare_lexicon.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/hub4_spanish/s5/local/reestimate_langp.sh b/egs/hub4_spanish/s5/local/reestimate_langp.sh
index 62fc4c24a16..3380152aa3b 100755
--- a/egs/hub4_spanish/s5/local/reestimate_langp.sh
+++ b/egs/hub4_spanish/s5/local/reestimate_langp.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1a.sh b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1a.sh
index 3055fe95928..255872e1b33 100755
--- a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1a.sh
+++ b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson
# 2017 Hainan Xu
diff --git a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1b.sh b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1b.sh
index 5835de9d373..4577f966574 100755
--- a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1b.sh
+++ b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson
# 2017 Hainan Xu
diff --git a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_tdnn.sh b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_tdnn.sh
index 5dc840d9b03..6d0fbdead34 100755
--- a/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_tdnn.sh
+++ b/egs/hub4_spanish/s5/local/rnnlm/tuning/run_lstm_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson
# 2017 Hainan Xu
diff --git a/egs/hub4_spanish/s5/local/run_sgmm2.sh b/egs/hub4_spanish/s5/local/run_sgmm2.sh
index eb23eddea2d..c0a515c26e3 100755
--- a/egs/hub4_spanish/s5/local/run_sgmm2.sh
+++ b/egs/hub4_spanish/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/hub4_spanish/s5/local/train_lms_srilm.sh b/egs/hub4_spanish/s5/local/train_lms_srilm.sh
index 12ce6d79490..29d60eecd60 100755
--- a/egs/hub4_spanish/s5/local/train_lms_srilm.sh
+++ b/egs/hub4_spanish/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017 Johns Hopkins University (Author: Yenda Trmal )
# Apache 2.0
diff --git a/egs/hub4_spanish/s5/run.sh b/egs/hub4_spanish/s5/run.sh
index a0420c99839..6857976567f 100755
--- a/egs/hub4_spanish/s5/run.sh
+++ b/egs/hub4_spanish/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/iam/v1/local/augment_data.sh b/egs/iam/v1/local/augment_data.sh
index 31e4a8217ca..e5179a77c72 100755
--- a/egs/iam/v1/local/augment_data.sh
+++ b/egs/iam/v1/local/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Hossein Hadian
# 2018 Ashish Arora
diff --git a/egs/iam/v1/local/chain/compare_wer.sh b/egs/iam/v1/local/chain/compare_wer.sh
index 4a2cc29481c..ae575b29d4f 100755
--- a/egs/iam/v1/local/chain/compare_wer.sh
+++ b/egs/iam/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
index ef1273f3961..1c766e270ad 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# 2017 Chun Chieh Chang
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index bbcc55aa2b0..7b334fdbdbc 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# chainali_1a is as 1a except it uses chain alignments (using 1a system) instead of gmm alignments
# local/chain/compare_wer.sh exp/chain/cnn_chainali_1a
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
index 401ffa14e19..840803f0249 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# chainali_1b is as chainali_1a except it has 3 more cnn layers and 1 less tdnn layer.
# local/chain/compare_wer.sh exp/chain/cnn_chainali_1b
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
index 17209b9204f..9423612f029 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# chainali_1c is as chainali_1b except it uses l2-regularize
# local/chain/compare_wer.sh exp/chain/cnn_chainali_1c
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
index 89a40ed2a13..a26882e99da 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# chainali_1d is as chainali_1c except it uses unconstrained egs
# local/chain/compare_wer.sh exp/chain/cnn_chainali_1d
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index 703d404159a..76493fcc0f1 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a
# System cnn_e2eali_1a_(dict_50k) cnn_e2eali_1a_(dict_50k + unk model)
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index 905c4661477..af3ff312cc0 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# e2eali_1b is the same as e2eali_1a but uses unconstrained egs
# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1b
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
index 26b1aca0929..fa884390c26 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# e2eali_1c is the same as e2eali_1b but has more CNN layers, different filter size
# smaller lm-opts, minibatch, frams-per-iter, less epochs and more initial/finaljobs.
diff --git a/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 462ad0522de..35528afcad0 100755
--- a/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# This script does end2end chain training (i.e. from scratch)
diff --git a/egs/iam/v1/local/extract_features.sh b/egs/iam/v1/local/extract_features.sh
index 1741ad3f9b2..1479f21d127 100755
--- a/egs/iam/v1/local/extract_features.sh
+++ b/egs/iam/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Yiwen Shao
# 2018 Ashish Arora
diff --git a/egs/iam/v1/local/prepare_data.sh b/egs/iam/v1/local/prepare_data.sh
index dc07f07e318..59e619f196f 100755
--- a/egs/iam/v1/local/prepare_data.sh
+++ b/egs/iam/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Chun Chieh Chang
# 2017 Ashish Arora
diff --git a/egs/iam/v1/local/score.sh b/egs/iam/v1/local/score.sh
index d964d70206b..2e27f23ce29 100755
--- a/egs/iam/v1/local/score.sh
+++ b/egs/iam/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
# Apache 2.0
diff --git a/egs/iam/v1/local/train_lm.sh b/egs/iam/v1/local/train_lm.sh
index 3e8c838efdb..57afa897743 100755
--- a/egs/iam/v1/local/train_lm.sh
+++ b/egs/iam/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/iam/v1/run.sh b/egs/iam/v1/run.sh
index 85811b6cb3d..9362c353346 100755
--- a/egs/iam/v1/run.sh
+++ b/egs/iam/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Chun Chieh Chang
# 2017 Ashish Arora
diff --git a/egs/iam/v1/run_end2end.sh b/egs/iam/v1/run_end2end.sh
index 0a8b014715f..2f85a9940a9 100755
--- a/egs/iam/v1/run_end2end.sh
+++ b/egs/iam/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
set -e
diff --git a/egs/iam/v2/local/augment_data.sh b/egs/iam/v2/local/augment_data.sh
index 31e4a8217ca..e5179a77c72 100755
--- a/egs/iam/v2/local/augment_data.sh
+++ b/egs/iam/v2/local/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Hossein Hadian
# 2018 Ashish Arora
diff --git a/egs/iam/v2/local/chain/compare_wer.sh b/egs/iam/v2/local/chain/compare_wer.sh
index 2ce14e13694..a1b8fffe166 100755
--- a/egs/iam/v2/local/chain/compare_wer.sh
+++ b/egs/iam/v2/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
index 9a01688ba35..07188c7186e 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# e2eali_1a is the same as chainali_1c but uses the e2e chain model to get the
# lattice alignments and to build a tree
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
index 28aa246f334..3db893e7a65 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# e2eali_1b is the same as e2eali_1a but uses unconstrained egs
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
index f158317950a..1ceec555b7a 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# e2eali_1c is the same as e2eali_1b but has fewer CNN layers, smaller
# l2-regularize, more epochs and uses dropout.
diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
index 1c44057454a..9575b55c3c6 100755
--- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
+++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# e2eali_1d is the same as e2eali_1c but has more CNN layers, different filter size
# smaller lm-opts, minibatch, frams-per-iter, less epochs and more initial/finaljobs.
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
index cb2bfa0a82d..cf7f3051654 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# This script does end2end chain training (i.e. from scratch)
diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
index d5f79602695..72703583954 100755
--- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
+++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# This script does end2end chain training (i.e. from scratch)
diff --git a/egs/iam/v2/local/extract_features.sh b/egs/iam/v2/local/extract_features.sh
index 1741ad3f9b2..1479f21d127 100755
--- a/egs/iam/v2/local/extract_features.sh
+++ b/egs/iam/v2/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Yiwen Shao
# 2018 Ashish Arora
diff --git a/egs/iam/v2/local/prepare_data.sh b/egs/iam/v2/local/prepare_data.sh
index cf729d9a939..97ceb618b1a 100755
--- a/egs/iam/v2/local/prepare_data.sh
+++ b/egs/iam/v2/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Chun Chieh Chang
# 2017 Ashish Arora
diff --git a/egs/iam/v2/local/score.sh b/egs/iam/v2/local/score.sh
index 1d84815fc69..6168f38a929 100755
--- a/egs/iam/v2/local/score.sh
+++ b/egs/iam/v2/local/score.sh
@@ -1,5 +1,5 @@
-#!/bin/bash
+#!/usr/bin/env bash
steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/iam/v2/local/train_lm.sh b/egs/iam/v2/local/train_lm.sh
index cc0119eb748..a63ec51d874 100755
--- a/egs/iam/v2/local/train_lm.sh
+++ b/egs/iam/v2/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh
index c515c85fc72..1b0538f2560 100755
--- a/egs/iam/v2/run_end2end.sh
+++ b/egs/iam/v2/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
set -e
diff --git a/egs/iban/s5/local/arpa2G.sh b/egs/iban/s5/local/arpa2G.sh
index dddd7eb9097..97a139db0ec 100755
--- a/egs/iban/s5/local/arpa2G.sh
+++ b/egs/iban/s5/local/arpa2G.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013-2014 Johns Hopkins University (authors: Yenda Trmal, Daniel Povey)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh
index 10650a18269..23bacb2cc23 100755
--- a/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey)
# 2017-2018 Yiming Wang
diff --git a/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh
index db62e6f8a55..338fc090e07 100755
--- a/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey)
# 2017-2018 Yiming Wang
diff --git a/egs/iban/s5/local/nnet3/run_ivector_common.sh b/egs/iban/s5/local/nnet3/run_ivector_common.sh
index b909ed04cde..ddec4419a61 100755
--- a/egs/iban/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/iban/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/iban/s5/local/nnet3/tuning/run_tdnn_1a.sh b/egs/iban/s5/local/nnet3/tuning/run_tdnn_1a.sh
index f53c5443ba8..88ff628e3df 100755
--- a/egs/iban/s5/local/nnet3/tuning/run_tdnn_1a.sh
+++ b/egs/iban/s5/local/nnet3/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# cat exp/nnet3/tdnn_1a/decode_dev/scoring_kaldi/best_wer
# %WER 17.34 [ 1908 / 11006, 257 ins, 303 del, 1348 sub ] exp/nnet3/tdnn_1a/decode_dev/wer_12_0.0
diff --git a/egs/iban/s5/local/prepare_data.sh b/egs/iban/s5/local/prepare_data.sh
index 8afffb56898..cb1077054d3 100755
--- a/egs/iban/s5/local/prepare_data.sh
+++ b/egs/iban/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/iban/s5/local/prepare_dict.sh b/egs/iban/s5/local/prepare_dict.sh
index ebec12bc171..6f87e1c1fe4 100755
--- a/egs/iban/s5/local/prepare_dict.sh
+++ b/egs/iban/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/iban/s5/local/prepare_lm.sh b/egs/iban/s5/local/prepare_lm.sh
index 10d5e276aa3..2c3a1aee1da 100755
--- a/egs/iban/s5/local/prepare_lm.sh
+++ b/egs/iban/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0
diff --git a/egs/iban/s5/local/train_lms_srilm.sh b/egs/iban/s5/local/train_lms_srilm.sh
index f72596e750a..ecc88611554 100755
--- a/egs/iban/s5/local/train_lms_srilm.sh
+++ b/egs/iban/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
export LC_ALL=C
words_file=
diff --git a/egs/iban/s5/run.sh b/egs/iban/s5/run.sh
index 278a8177c0e..2d08c0ad6fd 100755
--- a/egs/iban/s5/run.sh
+++ b/egs/iban/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Sarah Samson Juan
# Apache 2.0
diff --git a/egs/ifnenit/v1/local/chain/compare_wer.sh b/egs/ifnenit/v1/local/chain/compare_wer.sh
index ff2a766f9e2..2e7d657f330 100755
--- a/egs/ifnenit/v1/local/chain/compare_wer.sh
+++ b/egs/ifnenit/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/ifnenit/v1/local/chain/run_cnn_1a.sh b/egs/ifnenit/v1/local/chain/run_cnn_1a.sh
index b0ecd547741..135d74fa8c9 100755
--- a/egs/ifnenit/v1/local/chain/run_cnn_1a.sh
+++ b/egs/ifnenit/v1/local/chain/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# steps/info/chain_dir_info.pl exp/chainfsf4/cnn1a_1/
# exp/chainfsf4/cnn1a_1/: num-iters=21 nj=2..4 num-params=4.4M dim=40->380 combine=-0.033->-0.025 xent:train/valid[13,20,final]=(-1.07,-1.31,-0.560/-1.30,-1.70,-0.978) logprob:train/valid[13,20,final]=(-0.064,-0.119,-0.011/-0.115,-0.208,-0.096)
diff --git a/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh b/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh
index 7f3132d657e..4ca73450b77 100755
--- a/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh
+++ b/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# chainali_1b is as chainali_1a except it has 3 more cnn layers and 1 less tdnn layer.
diff --git a/egs/ifnenit/v1/local/ienit_initialize.sh b/egs/ifnenit/v1/local/ienit_initialize.sh
index e9412eb715f..072d85388ba 100755
--- a/egs/ifnenit/v1/local/ienit_initialize.sh
+++ b/egs/ifnenit/v1/local/ienit_initialize.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is originally from qatip project (http://qatsdemo.cloudapp.net/qatip/demo/)
# of Qatar Computing Research Institute (http://qcri.qa/)
diff --git a/egs/ifnenit/v1/local/prepare_data.sh b/egs/ifnenit/v1/local/prepare_data.sh
index ee20822d557..99fce0b239d 100755
--- a/egs/ifnenit/v1/local/prepare_data.sh
+++ b/egs/ifnenit/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
# Creat text, utt2spk, spk2utt, images.scp, and feats.scp for test and train.
diff --git a/egs/ifnenit/v1/local/prepare_dict.sh b/egs/ifnenit/v1/local/prepare_dict.sh
index de0ee6a433c..38b461ba6ad 100755
--- a/egs/ifnenit/v1/local/prepare_dict.sh
+++ b/egs/ifnenit/v1/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
. ./path.sh
diff --git a/egs/ifnenit/v1/path.sh b/egs/ifnenit/v1/path.sh
index 0d7641cd5c1..85a0ae8e0d0 100755
--- a/egs/ifnenit/v1/path.sh
+++ b/egs/ifnenit/v1/path.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# path to Kaldi's root directory
export KALDI_ROOT=`pwd`/../../..
diff --git a/egs/ifnenit/v1/run.sh b/egs/ifnenit/v1/run.sh
index 2df5a06161c..65cc7adf9a4 100755
--- a/egs/ifnenit/v1/run.sh
+++ b/egs/ifnenit/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
stage=0
nj=8
diff --git a/egs/librispeech/s5/RESULTS b/egs/librispeech/s5/RESULTS
index b45271765bc..bf67de1d840 100644
--- a/egs/librispeech/s5/RESULTS
+++ b/egs/librispeech/s5/RESULTS
@@ -1,6 +1,6 @@
# In the results below, "tgsmall" is the pruned 3-gram LM, which is used for lattice generation.
# The following language models are then used for rescoring:
-# a) tgmed- slightly less pruned 3-gram LM
+# a) tgmed- slightly less pruned 3-gram LM
# b) tglarge- the full, non-pruned 3-gram LM
# c) fglarge- non-pruned 4-gram LM
#
@@ -337,7 +337,7 @@
%WER 4.39 [ 2387 / 54402, 377 ins, 199 del, 1811 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch3_dev_clean_tglarge/wer_14
%WER 5.36 [ 2918 / 54402, 328 ins, 338 del, 2252 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch3_dev_clean_tgmed/wer_17
%WER 6.08 [ 3305 / 54402, 369 ins, 396 del, 2540 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch3_dev_clean_tgsmall/wer_15
-%WER 4.40 [ 2395 / 54402, 375 ins, 200 del, 1820 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch4_dev_clean_tglarge/wer_14
+%WER 4.40 [ 2395 / 54402, 375 ins, 200 del, 1820 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch4_dev_clean_tglarge/wer_14
%WER 5.35 [ 2909 / 54402, 328 ins, 339 del, 2242 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch4_dev_clean_tgmed/wer_17
%WER 6.05 [ 3291 / 54402, 384 ins, 381 del, 2526 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch4_dev_clean_tgsmall/wer_14
%WER 13.45 [ 6850 / 50948, 808 ins, 876 del, 5166 sub ] exp/nnet2_online/nnet_ms_a_smbr_0.000005/decode_epoch0_dev_other_tglarge/wer_15
@@ -423,7 +423,7 @@
%WER 17.64 [ 9231 / 52343, 764 ins, 1662 del, 6805 sub ] exp/nnet2_online/nnet_ms_a_online/decode_pp_test_other_tgsmall_utt_offline/wer_14
# Results with nnet3 tdnn
-# local/nnet3/run_tdnn.sh
+# local/nnet3/run_tdnn.sh (with old configs, now moved to local/nnet3/tuning/run_tdnn_1a.sh)
# (4 epoch training on speed-perturbed data)
# num_params=19.3M
%WER 4.43 [ 2410 / 54402, 306 ins, 278 del, 1826 sub ] exp/nnet3/tdnn_sp/decode_dev_clean_fglarge/wer_13_1.0
@@ -444,7 +444,7 @@
%WER 16.29 [ 8528 / 52343, 828 ins, 1320 del, 6380 sub ] exp/nnet3/tdnn_sp/decode_test_other_tgsmall/wer_14_0.0
# Results with nnet3 tdnn
-# local/nnet3/run_tdnn.sh
+# local/nnet3/run_tdnn.sh (with old configs, now moved to local/nnet3/tuning/run_tdnn_1a.sh)
# (4 epoch training on speed-perturbed and volumn-perturbed "cleaned" data)
# num_params=19.3M, average training time=68.8s per job(on Tesla K80), real-time factor=1.23161
# for x in exp/nnet3_cleaned/tdnn_sp/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
@@ -465,7 +465,6 @@
%WER 14.78 [ 7737 / 52343, 807 ins, 1115 del, 5815 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_other_tgmed/wer_15_0.0
%WER 16.28 [ 8521 / 52343, 843 ins, 1258 del, 6420 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_other_tgsmall/wer_14_0.0
-
# Results with nnet3 tdnn+sMBR
# local/nnet3/run_tdnn_discriminative.sh
# a subset of the full list of results (using the acoustic model obtained at the end of the training):
diff --git a/egs/librispeech/s5/local/chain/compare_wer.sh b/egs/librispeech/s5/local/chain/compare_wer.sh
index ec205670b76..dd84279df30 100755
--- a/egs/librispeech/s5/local/chain/compare_wer.sh
+++ b/egs/librispeech/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/librispeech/s5/local/chain/run_chain_common.sh b/egs/librispeech/s5/local/chain/run_chain_common.sh
index da37e148441..710625cf489 100755
--- a/egs/librispeech/s5/local/chain/run_chain_common.sh
+++ b/egs/librispeech/s5/local/chain/run_chain_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script has common stages shared across librispeech chain recipes.
# It generates a new topology in a new lang directory, gets the alignments as
diff --git a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh
index 6bf3a139ad1..02214315263 100755
--- a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh
+++ b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
echo "This script has not yet been tested, you would have to comment this statement if you want to run it. Please let us know if you see any issues" && exit 1;
@@ -95,8 +95,8 @@ if [ $frame_subsampling_factor -ne 1 ]; then
data_dirs=
for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
- steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \
- $x $train_data_dir exp/shift_hires mfcc_hires
+ utils/data/shift_feats.sh \
+ $x $train_data_dir ${train_data_dir}_fs$x
utils/fix_data_dir.sh ${train_data_dir}_fs$x
data_dirs="$data_dirs ${train_data_dir}_fs$x"
awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp
diff --git a/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
index db17a35be64..b995ff10b16 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is based on tdnn_1d_sp, but adding cnn as the front-end.
# The cnn-tdnn-f (tdnn_cnn_1a_sp) outperforms the tdnn-f (tdnn_1d_sp).
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
index 48d6ddb804f..d8eedcfad2b 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# run_tdnn_1b.sh's topo is similiar with run_tdnn_1a.sh but we used the xconfigs. Otherwise "frames_per_eg=150,140,100".
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
index 101fd6a4c15..9c7194a2041 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
## Adapted from swbd for librispeech by David van Leeuwen
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
index 865b10dea0c..cae7e0249a1 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
# 1d is as 1c but a recipe based on the newer, more compact configs, and with
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index 0e97e46194d..a25078fbbec 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is the tdnn-lstmp based on the run_tdnn_lstm_1n.sh under Switchboard.
# training acoustic model and decoding:
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
index 0da813267fc..27f15581e67 100755
--- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is the tdnn-lstmp based on the run_tdnn_lstm_1a.sh under Librispeech but with larger model size.
# training acoustic model and decoding:
diff --git a/egs/librispeech/s5/local/data_prep.sh b/egs/librispeech/s5/local/data_prep.sh
index 20c5697d61f..c05c3f89109 100755
--- a/egs/librispeech/s5/local/data_prep.sh
+++ b/egs/librispeech/s5/local/data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vassil Panayotov
# 2014 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/librispeech/s5/local/decode_example.sh b/egs/librispeech/s5/local/decode_example.sh
index 815bf17b9f7..3b1b93d90d9 100755
--- a/egs/librispeech/s5/local/decode_example.sh
+++ b/egs/librispeech/s5/local/decode_example.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Guoguo Chen
# Apache 2.0
diff --git a/egs/librispeech/s5/local/download_and_untar.sh b/egs/librispeech/s5/local/download_and_untar.sh
index 1bb6d909edc..5cf6adde8bc 100755
--- a/egs/librispeech/s5/local/download_and_untar.sh
+++ b/egs/librispeech/s5/local/download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# Apache 2.0
diff --git a/egs/librispeech/s5/local/download_lm.sh b/egs/librispeech/s5/local/download_lm.sh
index 382f313df7c..129ca1edbe3 100755
--- a/egs/librispeech/s5/local/download_lm.sh
+++ b/egs/librispeech/s5/local/download_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vassil Panayotov
# Apache 2.0
diff --git a/egs/librispeech/s5/local/format_data.sh b/egs/librispeech/s5/local/format_data.sh
index 71ef6f0e36c..7b5a0823b15 100755
--- a/egs/librispeech/s5/local/format_data.sh
+++ b/egs/librispeech/s5/local/format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vassil Panayotov
# Apache 2.0
diff --git a/egs/librispeech/s5/local/format_lms.sh b/egs/librispeech/s5/local/format_lms.sh
index b530f61d2d9..d1a18bada88 100755
--- a/egs/librispeech/s5/local/format_lms.sh
+++ b/egs/librispeech/s5/local/format_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vassil Panayotov
# Apache 2.0
diff --git a/egs/librispeech/s5/local/g2p.sh b/egs/librispeech/s5/local/g2p.sh
index 5bc934499d9..b3ffa8e19b1 100755
--- a/egs/librispeech/s5/local/g2p.sh
+++ b/egs/librispeech/s5/local/g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vassil Panayotov
# Apache 2.0
diff --git a/egs/librispeech/s5/local/g2p/train_g2p.sh b/egs/librispeech/s5/local/g2p/train_g2p.sh
index 635a382e575..216fc3b5dc0 100755
--- a/egs/librispeech/s5/local/g2p/train_g2p.sh
+++ b/egs/librispeech/s5/local/g2p/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vassil Panayotov
# Apache 2.0
diff --git a/egs/librispeech/s5/local/lm/install_festival.sh b/egs/librispeech/s5/local/lm/install_festival.sh
index d5fa72b6d45..01dacd29b71 100755
--- a/egs/librispeech/s5/local/lm/install_festival.sh
+++ b/egs/librispeech/s5/local/lm/install_festival.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./path.sh || exit 1
diff --git a/egs/librispeech/s5/local/lm/normalize_text.sh b/egs/librispeech/s5/local/lm/normalize_text.sh
index e4eed324735..50e58205382 100755
--- a/egs/librispeech/s5/local/lm/normalize_text.sh
+++ b/egs/librispeech/s5/local/lm/normalize_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vassil Panayotov
# Apache 2.0
@@ -39,10 +39,10 @@ for b in $(cat $in_list); do
[[ -f "$in_file" ]] || { echo "WARNING: $in_file does not exists"; continue; }
out_file=$out_root/$id/$id.txt
mkdir -p $out_root/$id
- $PYTHON local/lm/python/pre_filter.py $in_file /dev/stdout |\
- $PYTHON local/lm/python/text_pre_process.py /dev/stdin /dev/stdout |\
+ python local/lm/python/pre_filter.py $in_file /dev/stdout |\
+ python local/lm/python/text_pre_process.py /dev/stdin /dev/stdout |\
nsw_expand -format opl /dev/stdin |\
- $PYTHON local/lm/python/text_post_process.py /dev/stdin $out_file /dev/null || exit 1
+ python local/lm/python/text_post_process.py /dev/stdin $out_file /dev/null || exit 1
processed=$((processed + 1))
echo "Processing of $id has finished at $(date '+%T %F') [$processed texts ready so far]"
done
diff --git a/egs/librispeech/s5/local/lm/train_lm.sh b/egs/librispeech/s5/local/lm/train_lm.sh
index 6e6ae5970fb..0af3bb0e28c 100755
--- a/egs/librispeech/s5/local/lm/train_lm.sh
+++ b/egs/librispeech/s5/local/lm/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vassil Panayotov
# Apache 2.0
diff --git a/egs/librispeech/s5/local/lookahead/run_lookahead.sh b/egs/librispeech/s5/local/lookahead/run_lookahead.sh
new file mode 100755
index 00000000000..acd123fa8d3
--- /dev/null
+++ b/egs/librispeech/s5/local/lookahead/run_lookahead.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. ./path.sh
+
+# Example script for lookahead composition
+
+lm=tgmed
+am=exp/chain_cleaned/tdnn_1d_sp
+testset=test_clean
+
+# %WER 4.86 [ 2553 / 52576, 315 ins, 222 del, 2016 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead/wer_11_0.0
+# %WER 4.79 [ 2518 / 52576, 279 ins, 292 del, 1947 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_arpa/wer_11_0.0
+# %WER 4.82 [ 2532 / 52576, 286 ins, 290 del, 1956 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_arpa_fast/wer_11_0.0
+# %WER 4.86 [ 2553 / 52576, 314 ins, 222 del, 2017 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_base/wer_11_0.0
+# %WER 4.86 [ 2553 / 52576, 315 ins, 222 del, 2016 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_static/wer_11_0.0
+
+
+# Speed
+#
+# base 0.18 xRT
+# static 0.18 xRT
+# lookahead 0.29 xRT
+# arpa 0.35 xRT
+# arpa_fast 0.21 xRT
+
+# Graph size
+#
+# Base 476 Mb
+# Static 621 Mb
+# Lookahead 48 Mb HCL + 77 Mb Grammar
+# Lookahead + OpenGrm 48 Mb HCL + 42 Mb Grammar
+
+if [ ! -f "${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so" ]; then
+ echo "Missing ${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so"
+ echo "Make sure you compiled openfst with lookahead support. Run make in ${KALDI_ROOT}/tools after git pull."
+ exit 1
+fi
+if [ ! -f "${KALDI_ROOT}/tools/openfst/bin/ngramread" ]; then
+ echo "You appear to not have OpenGRM tools installed. Missing ${KALDI_ROOT}/tools/openfst/bin/ngramread"
+ echo "cd to $KALDI_ROOT/tools and run extras/install_opengrm.sh."
+ exit 1
+fi
+export LD_LIBRARY_PATH=${KALDI_ROOT}/tools/openfst/lib/fst
+
+# Baseline
+utils/format_lm.sh data/lang data/local/lm/lm_${lm}.arpa.gz \
+ data/local/dict/lexicon.txt data/lang_test_${lm}_base
+
+utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov \
+ data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead_base
+
+steps/nnet3/decode.sh --nj 20 \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+ ${am}/graph_${lm}_lookahead_base data/${testset}_hires ${am}/decode_${testset}_lookahead_base
+
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --remove-oov --compose-graph \
+ data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead
+
+# Decode with statically composed lookahead graph
+steps/nnet3/decode.sh --nj 20 \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+ ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead_static
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+ ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead
+
+# Compile arpa graph
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --compose-graph \
+ data/lang_test_${lm}_base ${am} data/local/lm/lm_tgmed.arpa.gz ${am}/graph_${lm}_lookahead_arpa
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+ ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa
+
+# Decode with runtime composition and tuned beams
+steps/nnet3/decode_lookahead.sh --nj 20 \
+ --beam 12.0 --max-active 3000 \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+ ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa_fast
diff --git a/egs/librispeech/s5/local/nnet2/run_5a_clean_100.sh b/egs/librispeech/s5/local/nnet2/run_5a_clean_100.sh
index b0b46859170..c84072995ac 100755
--- a/egs/librispeech/s5/local/nnet2/run_5a_clean_100.sh
+++ b/egs/librispeech/s5/local/nnet2/run_5a_clean_100.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is p-norm neural net training, with the "fast" script, on top of adapted
# 40-dimensional features.
diff --git a/egs/librispeech/s5/local/nnet2/run_5c.sh b/egs/librispeech/s5/local/nnet2/run_5c.sh
index fefca74b748..bce646615c7 100755
--- a/egs/librispeech/s5/local/nnet2/run_5c.sh
+++ b/egs/librispeech/s5/local/nnet2/run_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is neural net training on top of adapted 40-dimensional features.
#
diff --git a/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh b/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh
index 06be974d8ac..1e33e50681c 100755
--- a/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh
+++ b/egs/librispeech/s5/local/nnet2/run_6a_clean_460.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is p-norm neural net training, with the "fast" script, on top of adapted
# 40-dimensional features.
diff --git a/egs/librispeech/s5/local/nnet2/run_7a_960.sh b/egs/librispeech/s5/local/nnet2/run_7a_960.sh
index f05ff8d2542..0eb2af6434f 100755
--- a/egs/librispeech/s5/local/nnet2/run_7a_960.sh
+++ b/egs/librispeech/s5/local/nnet2/run_7a_960.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is p-norm neural net training, with the "fast" script, on top of adapted
# 40-dimensional features.
diff --git a/egs/librispeech/s5/local/nnet3/compare_wer.sh b/egs/librispeech/s5/local/nnet3/compare_wer.sh
new file mode 100755
index 00000000000..4a272839687
--- /dev/null
+++ b/egs/librispeech/s5/local/nnet3/compare_wer.sh
@@ -0,0 +1,152 @@
+#!/usr/bin/env bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+ echo "Usage: $0: [--looped] [--online] [ ... ]"
+ echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+ echo "or (with epoch numbers for discriminative training):"
+ echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+ exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+ include_looped=true
+ shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+ include_online=true
+ shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+# set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+ if [ $# != 1 ]; then
+ echo "compare_wer_general.sh: internal error"
+ exit 1 # exit the program
+ fi
+ dirname=$(echo $1 | cut -d: -f1)
+ epoch=$(echo $1 | cut -s -d: -f2)
+ if [ -z $epoch ]; then
+ epoch_infix=""
+ else
+ used_epochs=true
+ epoch_infix=_epoch${epoch}
+ fi
+}
+
+
+
+echo -n "# System "
+for x in $*; do printf "% 10s" " $(basename $x)"; done
+echo
+
+strings=(
+ "# WER on dev(fglarge) "
+ "# WER on dev(tglarge) "
+ "# WER on dev(tgmed) "
+ "# WER on dev(tgsmall) "
+ "# WER on dev_other(fglarge) "
+ "# WER on dev_other(tglarge) "
+ "# WER on dev_other(tgmed) "
+ "# WER on dev_other(tgsmall) "
+ "# WER on test(fglarge) "
+ "# WER on test(tglarge) "
+ "# WER on test(tgmed) "
+ "# WER on test(tgsmall) "
+ "# WER on test_other(fglarge) "
+ "# WER on test_other(tglarge) "
+ "# WER on test_other(tgmed) "
+ "# WER on test_other(tgsmall) ")
+
+for n in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
+ echo -n "${strings[$n]}"
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ decode_names=(dev_clean_fglarge dev_clean_tglarge dev_clean_tgmed dev_clean_tgsmall dev_other_fglarge dev_other_tglarge dev_other_tgmed dev_other_tgsmall test_clean_fglarge test_clean_tglarge test_clean_tgmed test_clean_tgsmall test_other_fglarge test_other_tglarge test_other_tgmed test_other_tgsmall)
+
+ wer=$(grep WER $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ if $include_looped; then
+ echo -n "# [looped:] "
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ wer=$(grep WER $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ fi
+ if $include_online; then
+ echo -n "# [online:] "
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ wer=$(grep WER ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ fi
+done
+
+
+if $used_epochs; then
+ exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+
+echo -n "# Final train prob "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_train.combined.log | grep -v likelihood | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_valid.combined.log | grep -v likelihood | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (logLL) "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_train.combined.log | grep -w likelihood | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (logLL) "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_valid.combined.log | grep -w likelihood | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Num-parameters "
+for x in $*; do
+ num_params=$(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
+ printf "% 10d" $num_params
+done
+echo
diff --git a/egs/librispeech/s5/local/nnet3/run_ivector_common.sh b/egs/librispeech/s5/local/nnet3/run_ivector_common.sh
index b937232eb8d..1e1d4aef901 100755
--- a/egs/librispeech/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/librispeech/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/librispeech/s5/local/nnet3/run_tdnn.sh b/egs/librispeech/s5/local/nnet3/run_tdnn.sh
deleted file mode 100755
index 28ee2b92004..00000000000
--- a/egs/librispeech/s5/local/nnet3/run_tdnn.sh
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/bin/bash
-
-# this is the standard "tdnn" system, built in nnet3; it's what we use to
-# call multi-splice.
-
-# without cleanup:
-# local/nnet3/run_tdnn.sh --train-set train960 --gmm tri6b --nnet3-affix "" &
-
-
-# At this script level we don't support not running on GPU, as it would be painfully slow.
-# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
-# --num-threads 16 and --minibatch-size 128.
-
-# First the options that are passed through to run_ivector_common.sh
-# (some of which are also used in this script directly).
-stage=0
-decode_nj=30
-train_set=train_960_cleaned
-gmm=tri6b_cleaned # this is the source gmm-dir for the data-type of interest; it
- # should have alignments for the specified training data.
-nnet3_affix=_cleaned
-
-# Options which are not passed through to run_ivector_common.sh
-affix=
-train_stage=-10
-common_egs_dir=
-reporting_email=
-remove_egs=true
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
- cat </dev/null || true
- for test in test_clean test_other dev_clean dev_other; do
- (
- steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
- --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
- ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
- steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
- data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1
- steps/lmrescore_const_arpa.sh \
- --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
- data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
- steps/lmrescore_const_arpa.sh \
- --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
- data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
- ) || touch $dir/.error &
- done
- wait
- [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
-fi
-
-exit 0;
diff --git a/egs/librispeech/s5/local/nnet3/run_tdnn.sh b/egs/librispeech/s5/local/nnet3/run_tdnn.sh
new file mode 120000
index 00000000000..61f8f499182
--- /dev/null
+++ b/egs/librispeech/s5/local/nnet3/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1b.sh
\ No newline at end of file
diff --git a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh
index f283d69e7fe..d4ecd6ea0f0 100755
--- a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh
+++ b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
echo "This script has not yet been tested, you would have to comment this statement if you want to run it. Please let us know if you see any issues" && exit 1;
diff --git a/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1a.sh b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..e4c98ccef12
--- /dev/null
+++ b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1a.sh
@@ -0,0 +1,127 @@
+#!/usr/bin/env bash
+
+# this is the standard "tdnn" system, built in nnet3; it's what we use to
+# call multi-splice.
+
+# without cleanup:
+# local/nnet3/run_tdnn.sh --train-set train960 --gmm tri6b --nnet3-affix "" &
+
+
+# At this script level we don't support not running on GPU, as it would be painfully slow.
+# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
+# --num-threads 16 and --minibatch-size 128.
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=30
+train_set=train_960_cleaned
+gmm=tri6b_cleaned # this is the source gmm-dir for the data-type of interest; it
+ # should have alignments for the specified training data.
+nnet3_affix=_cleaned
+
+# Options which are not passed through to run_ivector_common.sh
+affix=
+train_stage=-10
+common_egs_dir=
+reporting_email=
+remove_egs=true
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+ cat </dev/null || true
+ for test in test_clean test_other dev_clean dev_other; do
+ (
+ steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
+ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
+ ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
+ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+ data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1
+ steps/lmrescore_const_arpa.sh \
+ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+ data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
+ steps/lmrescore_const_arpa.sh \
+ --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+ data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
+ ) || touch $dir/.error &
+ done
+ wait
+ [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+exit 0;
diff --git a/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1b.sh b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1b.sh
new file mode 100755
index 00000000000..bb695d8bd79
--- /dev/null
+++ b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1b.sh
@@ -0,0 +1,163 @@
+#!/usr/bin/env bash
+
+# 1b is as 1a but uses xconfigs.
+
+# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_sp
+# System tdnn_sp
+# WER on dev(fglarge) 4.52
+# WER on dev(tglarge) 4.80
+# WER on dev(tgmed) 6.02
+# WER on dev(tgsmall) 6.80
+# WER on dev_other(fglarge) 12.54
+# WER on dev_other(tglarge) 13.16
+# WER on dev_other(tgmed) 15.51
+# WER on dev_other(tgsmall) 17.12
+# WER on test(fglarge) 5.00
+# WER on test(tglarge) 5.22
+# WER on test(tgmed) 6.40
+# WER on test(tgsmall) 7.14
+# WER on test_other(fglarge) 12.56
+# WER on test_other(tglarge) 13.04
+# WER on test_other(tgmed) 15.58
+# WER on test_other(tgsmall) 16.88
+# Final train prob 0.7180
+# Final valid prob 0.7003
+# Final train prob (logLL) -0.9483
+# Final valid prob (logLL) -0.9963
+# Num-parameters 19268504
+
+
+# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn_sp
+# exp/nnet3_cleaned/tdnn_sp/: num-iters=1088 nj=3..16 num-params=19.3M dim=40+100->5784 combine=-0.94->-0.93 (over 7) loglike:train/valid[723,1087,combined]=(-0.99,-0.95,-0.95/-1.02,-0.99,-1.00) accuracy:train/valid[723,1087,combined]=(0.710,0.721,0.718/0.69,0.70,0.700)
+
+# this is the standard "tdnn" system, built in nnet3; it's what we use to
+# call multi-splice.
+
+# without cleanup:
+# local/nnet3/run_tdnn.sh --train-set train960 --gmm tri6b --nnet3-affix "" &
+
+
+# At this script level we don't support not running on GPU, as it would be painfully slow.
+# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
+# --num-threads 16 and --minibatch-size 128.
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=30
+train_set=train_960_cleaned
+gmm=tri6b_cleaned # this is the source gmm-dir for the data-type of interest; it
+ # should have alignments for the specified training data.
+nnet3_affix=_cleaned
+
+# Options which are not passed through to run_ivector_common.sh
+affix=
+train_stage=-10
+common_egs_dir=
+reporting_email=
+remove_egs=true
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+ cat < $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+ fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ relu-batchnorm-layer name=tdnn0 dim=1280
+ relu-batchnorm-layer name=tdnn1 dim=1280 input=Append(-1,2)
+ relu-batchnorm-layer name=tdnn2 dim=1280 input=Append(-3,3)
+ relu-batchnorm-layer name=tdnn3 dim=1280 input=Append(-7,2)
+ relu-batchnorm-layer name=tdnn4 dim=1280
+ output-layer name=output input=tdnn4 dim=$num_targets max-change=1.5
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
+ --config-dir $dir/configs || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+ fi
+
+ steps/nnet3/train_dnn.py --stage=$train_stage \
+ --cmd="$decode_cmd" \
+ --feat.online-ivector-dir $train_ivector_dir \
+ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+ --trainer.num-epochs 4 \
+ --trainer.optimization.num-jobs-initial 3 \
+ --trainer.optimization.num-jobs-final 16 \
+ --trainer.optimization.initial-effective-lrate 0.0017 \
+ --trainer.optimization.final-effective-lrate 0.00017 \
+ --egs.dir "$common_egs_dir" \
+ --cleanup.remove-egs $remove_egs \
+ --cleanup.preserve-model-interval 100 \
+ --feat-dir=$train_data_dir \
+ --ali-dir $ali_dir \
+ --lang data/lang \
+ --reporting.email="$reporting_email" \
+ --dir=$dir || exit 1;
+
+fi
+
+if [ $stage -le 13 ]; then
+ # this does offline decoding that should give about the same results as the
+ # real online decoding (the one with --per-utt true)
+ rm $dir/.error 2>/dev/null || true
+ for test in test_clean test_other dev_clean dev_other; do
+ (
+ steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
+ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
+ ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
+ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+ data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1
+ steps/lmrescore_const_arpa.sh \
+ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+ data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
+ steps/lmrescore_const_arpa.sh \
+ --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+ data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
+ ) || touch $dir/.error &
+ done
+ wait
+ [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+exit 0;
diff --git a/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1c.sh b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1c.sh
new file mode 100755
index 00000000000..2f1ceb7595e
--- /dev/null
+++ b/egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1c.sh
@@ -0,0 +1,177 @@
+#!/usr/bin/env bash
+
+# 1c is as 1b, but uses more modern TDNN configuration.
+
+# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_sp exp/nnet3_cleaned/tdnn_1c_sp
+# System tdnn_sp tdnn_1c_sp
+# WER on dev(fglarge) 4.52 4.20
+# WER on dev(tglarge) 4.80 4.37
+# WER on dev(tgmed) 6.02 5.31
+# WER on dev(tgsmall) 6.80 5.86
+# WER on dev_other(fglarge) 12.54 12.55
+# WER on dev_other(tglarge) 13.16 13.00
+# WER on dev_other(tgmed) 15.51 14.98
+# WER on dev_other(tgsmall) 17.12 15.88
+# WER on test(fglarge) 5.00 4.91
+# WER on test(tglarge) 5.22 4.99
+# WER on test(tgmed) 6.40 5.93
+# WER on test(tgsmall) 7.14 6.49
+# WER on test_other(fglarge) 12.56 12.94
+# WER on test_other(tglarge) 13.04 13.38
+# WER on test_other(tgmed) 15.58 15.11
+# WER on test_other(tgsmall) 16.88 16.28
+# Final train prob 0.7180 0.8509
+# Final valid prob 0.7003 0.8157
+# Final train prob (logLL) -0.9483 -0.4294
+# Final valid prob (logLL) -0.9963 -0.5662
+# Num-parameters 19268504 18391704
+
+# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn_sp
+# exp/nnet3_cleaned/tdnn_1c_sp: num-iters=1088 nj=3..16 num-params=18.4M dim=40+100->5784 combine=-0.43->-0.43 (over 4) loglike:train/valid[723,1087,combined]=(-0.48,-0.43,-0.43/-0.58,-0.57,-0.57) accuracy:train/valid[723,1087,combined]=(0.840,0.854,0.851/0.811,0.816,0.816)
+
+# this is the standard "tdnn" system, built in nnet3; it's what we use to
+# call multi-splice.
+
+# without cleanup:
+# local/nnet3/run_tdnn.sh --train-set train960 --gmm tri6b --nnet3-affix "" &
+
+
+# At this script level we don't support not running on GPU, as it would be painfully slow.
+# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
+# --num-threads 16 and --minibatch-size 128.
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=30
+train_set=train_960_cleaned
+gmm=tri6b_cleaned # this is the source gmm-dir for the data-type of interest; it
+ # should have alignments for the specified training data.
+nnet3_affix=_cleaned
+
+# Options which are not passed through to run_ivector_common.sh
+affix=
+train_stage=-10
+common_egs_dir=
+reporting_email=
+remove_egs=true
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+ cat < $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+ fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
+ tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+ tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+ tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+ tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
+ tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf16 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf17 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ linear-component name=prefinal-l dim=256 $linear_opts
+
+ prefinal-layer name=prefinal input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+ output-layer name=output input=prefinal dim=$num_targets max-change=1.5
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
+ --config-dir $dir/configs || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+ fi
+
+ steps/nnet3/train_dnn.py --stage=$train_stage \
+ --cmd="$decode_cmd" \
+ --feat.online-ivector-dir $train_ivector_dir \
+ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+ --trainer.num-epochs 4 \
+ --trainer.optimization.num-jobs-initial 3 \
+ --trainer.optimization.num-jobs-final 16 \
+ --trainer.optimization.initial-effective-lrate 0.0017 \
+ --trainer.optimization.final-effective-lrate 0.00017 \
+ --egs.dir "$common_egs_dir" \
+ --cleanup.remove-egs $remove_egs \
+ --cleanup.preserve-model-interval 100 \
+ --feat-dir=$train_data_dir \
+ --ali-dir $ali_dir \
+ --lang data/lang \
+ --reporting.email="$reporting_email" \
+ --dir=$dir || exit 1;
+
+fi
+
+if [ $stage -le 13 ]; then
+ # this does offline decoding that should give about the same results as the
+ # real online decoding (the one with --per-utt true)
+ rm $dir/.error 2>/dev/null || true
+ for test in test_clean test_other dev_clean dev_other; do
+ (
+ steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
+ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
+ ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
+ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
+ data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1
+ steps/lmrescore_const_arpa.sh \
+ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+ data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
+ steps/lmrescore_const_arpa.sh \
+ --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+ data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
+ ) || touch $dir/.error &
+ done
+ wait
+ [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+exit 0;
diff --git a/egs/librispeech/s5/local/online/run_nnet2.sh b/egs/librispeech/s5/local/online/run_nnet2.sh
index 5e0a616906f..b7dbe22a8d5 100755
--- a/egs/librispeech/s5/local/online/run_nnet2.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# example script for online-nnet2 system training and decoding,
# based on the one for fisher-English.
diff --git a/egs/librispeech/s5/local/online/run_nnet2_common.sh b/egs/librispeech/s5/local/online/run_nnet2_common.sh
index 1813233e7d0..9666e6b26b9 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_common.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script contains some common (shared) parts of the run_nnet*.sh scripts.
diff --git a/egs/librispeech/s5/local/online/run_nnet2_disc.sh b/egs/librispeech/s5/local/online/run_nnet2_disc.sh
index 65a661a21fe..108b1717871 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_disc.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script does discriminative training on top of the online,
diff --git a/egs/librispeech/s5/local/online/run_nnet2_ms.sh b/egs/librispeech/s5/local/online/run_nnet2_ms.sh
index c74de372fcc..7fa3f179fd6 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_ms.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is the "multi-splice" version of the online-nnet2 training script.
# It's currently the best recipe.
diff --git a/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh b/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh
index f20d3230cc2..6c52db7831b 100755
--- a/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2_ms_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script does discriminative training on top of the online, multi-splice
diff --git a/egs/librispeech/s5/local/online_pitch/run_nnet2_common.sh b/egs/librispeech/s5/local/online_pitch/run_nnet2_common.sh
index c5fdd6033f6..32e03f922ae 100755
--- a/egs/librispeech/s5/local/online_pitch/run_nnet2_common.sh
+++ b/egs/librispeech/s5/local/online_pitch/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script contains some common (shared) parts of the run_nnet*.sh scripts.
diff --git a/egs/librispeech/s5/local/online_pitch/run_nnet2_ms.sh b/egs/librispeech/s5/local/online_pitch/run_nnet2_ms.sh
index ca7b6139e0b..3509e0d2eed 100755
--- a/egs/librispeech/s5/local/online_pitch/run_nnet2_ms.sh
+++ b/egs/librispeech/s5/local/online_pitch/run_nnet2_ms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is the "multi-splice" version of the online-nnet2 training script,
# with pitch.
diff --git a/egs/librispeech/s5/local/prepare_dict.sh b/egs/librispeech/s5/local/prepare_dict.sh
index f9efb2ee46b..7b345b6bf1c 100755
--- a/egs/librispeech/s5/local/prepare_dict.sh
+++ b/egs/librispeech/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Vassil Panayotov
# Apache 2.0
diff --git a/egs/librispeech/s5/local/prepare_example_data.sh b/egs/librispeech/s5/local/prepare_example_data.sh
index 169aaea52a3..fc010dcc907 100755
--- a/egs/librispeech/s5/local/prepare_example_data.sh
+++ b/egs/librispeech/s5/local/prepare_example_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Guoguo Chen
# Apache 2.0
diff --git a/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh b/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
index 137a972f3d9..b0eace29773 100755
--- a/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey)
# 2018 Ke Li
diff --git a/egs/librispeech/s5/local/run_cleanup_segmentation.sh b/egs/librispeech/s5/local/run_cleanup_segmentation.sh
index c67d76a0096..e88b4e44619 100755
--- a/egs/librispeech/s5/local/run_cleanup_segmentation.sh
+++ b/egs/librispeech/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vimal Manohar
# 2016 Yiming Wang
diff --git a/egs/librispeech/s5/local/run_data_cleaning.sh b/egs/librispeech/s5/local/run_data_cleaning.sh
index 3300ad4c4a1..f856ce1154e 100755
--- a/egs/librispeech/s5/local/run_data_cleaning.sh
+++ b/egs/librispeech/s5/local/run_data_cleaning.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script shows how you can do data-cleaning, and exclude data that has a
diff --git a/egs/librispeech/s5/local/run_nnet2.sh b/egs/librispeech/s5/local/run_nnet2.sh
index f816e41af6e..be50a1c8608 100755
--- a/egs/librispeech/s5/local/run_nnet2.sh
+++ b/egs/librispeech/s5/local/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
train_set="train-clean-100"
test_sets="dev-clean dev-other"
diff --git a/egs/librispeech/s5/local/run_nnet2_clean_100.sh b/egs/librispeech/s5/local/run_nnet2_clean_100.sh
index 091bf2ab237..d64f6861395 100755
--- a/egs/librispeech/s5/local/run_nnet2_clean_100.sh
+++ b/egs/librispeech/s5/local/run_nnet2_clean_100.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. utils/parse_options.sh
. ./cmd.sh
diff --git a/egs/librispeech/s5/local/run_nnet2_clean_460.sh b/egs/librispeech/s5/local/run_nnet2_clean_460.sh
index 67d40eab629..fb5ecfbea28 100755
--- a/egs/librispeech/s5/local/run_nnet2_clean_460.sh
+++ b/egs/librispeech/s5/local/run_nnet2_clean_460.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. utils/parse_options.sh
. ./cmd.sh
diff --git a/egs/librispeech/s5/local/run_rnnlm.sh b/egs/librispeech/s5/local/run_rnnlm.sh
index ebb1832040b..819a2fafd61 100755
--- a/egs/librispeech/s5/local/run_rnnlm.sh
+++ b/egs/librispeech/s5/local/run_rnnlm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Yandex (Author: Ilya Edrenkin)
# Apache 2.0
diff --git a/egs/librispeech/s5/local/score.sh b/egs/librispeech/s5/local/score.sh
index c812199fc98..cb5bbb7277b 100755
--- a/egs/librispeech/s5/local/score.sh
+++ b/egs/librispeech/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# 2014 Guoguo Chen
# Apache 2.0
diff --git a/egs/librispeech/s5/run.sh b/egs/librispeech/s5/run.sh
index f784a8972db..86907413503 100755
--- a/egs/librispeech/s5/run.sh
+++ b/egs/librispeech/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Set this to somewhere where you want to put your data, or where
diff --git a/egs/lre/v1/lid/extract_ivectors.sh b/egs/lre/v1/lid/extract_ivectors.sh
index 4a5bf5650cc..a6fb4953332 100755
--- a/egs/lre/v1/lid/extract_ivectors.sh
+++ b/egs/lre/v1/lid/extract_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Daniel Povey
# 2014 David Snyder
diff --git a/egs/lre/v1/lid/get_vtln_warps.sh b/egs/lre/v1/lid/get_vtln_warps.sh
index 72a8fb33200..94f06bb6b80 100755
--- a/egs/lre/v1/lid/get_vtln_warps.sh
+++ b/egs/lre/v1/lid/get_vtln_warps.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Daniel Povey
# Apache 2.0
diff --git a/egs/lre/v1/lid/train_diag_ubm.sh b/egs/lre/v1/lid/train_diag_ubm.sh
index 8ba703073c0..52a947b7a39 100755
--- a/egs/lre/v1/lid/train_diag_ubm.sh
+++ b/egs/lre/v1/lid/train_diag_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# 2013 Daniel Povey
diff --git a/egs/lre/v1/lid/train_full_ubm.sh b/egs/lre/v1/lid/train_full_ubm.sh
index 4511d0985fa..7fc25e86667 100755
--- a/egs/lre/v1/lid/train_full_ubm.sh
+++ b/egs/lre/v1/lid/train_full_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# 2013 Daniel Povey
# 2014 David Snyder
diff --git a/egs/lre/v1/lid/train_ivector_extractor.sh b/egs/lre/v1/lid/train_ivector_extractor.sh
index eda607a9eff..17f87600892 100755
--- a/egs/lre/v1/lid/train_ivector_extractor.sh
+++ b/egs/lre/v1/lid/train_ivector_extractor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Daniel Povey
# 2014 David Snyder
diff --git a/egs/lre/v1/lid/train_lvtln_model.sh b/egs/lre/v1/lid/train_lvtln_model.sh
index 35e6968e4e6..015e80ea62c 100755
--- a/egs/lre/v1/lid/train_lvtln_model.sh
+++ b/egs/lre/v1/lid/train_lvtln_model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Daniel Povey
# Apache 2.0
diff --git a/egs/lre/v1/local/split_long_utts.sh b/egs/lre/v1/local/split_long_utts.sh
index 083954ea288..f8df872d77b 100755
--- a/egs/lre/v1/local/split_long_utts.sh
+++ b/egs/lre/v1/local/split_long_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
max_utt_len=60 # 60 seconds.
stage=0
diff --git a/egs/lre/v1/run.sh b/egs/lre/v1/run.sh
index 9818a8aa5f7..6941196f94a 100755
--- a/egs/lre/v1/run.sh
+++ b/egs/lre/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 David Snyder
# 2014 Daniel Povey
# Apache 2.0.
diff --git a/egs/lre/v1/run_logistic_regression.sh b/egs/lre/v1/run_logistic_regression.sh
index d39dc3353c2..d19d0d29638 100755
--- a/egs/lre/v1/run_logistic_regression.sh
+++ b/egs/lre/v1/run_logistic_regression.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 David Snyder, Daniel Povey
# Apache 2.0.
#
diff --git a/egs/lre07/v1/lid/extract_ivectors.sh b/egs/lre07/v1/lid/extract_ivectors.sh
index b1d745dda32..0ed3bbd53d8 100755
--- a/egs/lre07/v1/lid/extract_ivectors.sh
+++ b/egs/lre07/v1/lid/extract_ivectors.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Daniel Povey
# 2014 David Snyder
diff --git a/egs/lre07/v1/lid/extract_ivectors_dnn.sh b/egs/lre07/v1/lid/extract_ivectors_dnn.sh
index f2c3f2697d3..56b8d339dc0 100755
--- a/egs/lre07/v1/lid/extract_ivectors_dnn.sh
+++ b/egs/lre07/v1/lid/extract_ivectors_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Daniel Povey
# 2014-2015 David Snyder
diff --git a/egs/lre07/v1/lid/get_vtln_warps.sh b/egs/lre07/v1/lid/get_vtln_warps.sh
index 72a8fb33200..94f06bb6b80 100755
--- a/egs/lre07/v1/lid/get_vtln_warps.sh
+++ b/egs/lre07/v1/lid/get_vtln_warps.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Daniel Povey
# Apache 2.0
diff --git a/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh b/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh
index 45e32477cfc..334a88551a3 100755
--- a/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh
+++ b/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 David Snyder
# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2015 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/lre07/v1/lid/nnet2/get_egs2.sh b/egs/lre07/v1/lid/nnet2/get_egs2.sh
index 78f4b5a5101..3edf37e97d7 100755
--- a/egs/lre07/v1/lid/nnet2/get_egs2.sh
+++ b/egs/lre07/v1/lid/nnet2/get_egs2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).
# 2015 David Snyder
diff --git a/egs/lre07/v1/lid/nnet2/get_lda.sh b/egs/lre07/v1/lid/nnet2/get_lda.sh
index a8b0a87fa22..a030abbda2b 100755
--- a/egs/lre07/v1/lid/nnet2/get_lda.sh
+++ b/egs/lre07/v1/lid/nnet2/get_lda.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).
# 2015 David Snyder
diff --git a/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh b/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh
index 533001934ab..48fd79556cb 100755
--- a/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh
+++ b/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).
# 2013 Xiaohui Zhang
diff --git a/egs/lre07/v1/lid/run_logistic_regression.sh b/egs/lre07/v1/lid/run_logistic_regression.sh
index 9caeda2423f..3ce16e40297 100755
--- a/egs/lre07/v1/lid/run_logistic_regression.sh
+++ b/egs/lre07/v1/lid/run_logistic_regression.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 David Snyder, Daniel Povey
# Apache 2.0.
#
diff --git a/egs/lre07/v1/lid/train_diag_ubm.sh b/egs/lre07/v1/lid/train_diag_ubm.sh
index a5e256818ce..3092f1c559b 100755
--- a/egs/lre07/v1/lid/train_diag_ubm.sh
+++ b/egs/lre07/v1/lid/train_diag_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# 2013 Daniel Povey
diff --git a/egs/lre07/v1/lid/train_full_ubm.sh b/egs/lre07/v1/lid/train_full_ubm.sh
index 4511d0985fa..7fc25e86667 100755
--- a/egs/lre07/v1/lid/train_full_ubm.sh
+++ b/egs/lre07/v1/lid/train_full_ubm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# 2013 Daniel Povey
# 2014 David Snyder
diff --git a/egs/lre07/v1/lid/train_ivector_extractor.sh b/egs/lre07/v1/lid/train_ivector_extractor.sh
index 55bd54bb275..6bdc23c08e2 100755
--- a/egs/lre07/v1/lid/train_ivector_extractor.sh
+++ b/egs/lre07/v1/lid/train_ivector_extractor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Daniel Povey
# 2014 David Snyder
diff --git a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh
index 72c7e486273..19e1315d7f0 100755
--- a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh
+++ b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Daniel Povey
# 2014-2015 David Snyder
diff --git a/egs/lre07/v1/lid/train_lvtln_model.sh b/egs/lre07/v1/lid/train_lvtln_model.sh
index 77dd8c4bb5a..531e18bc246 100755
--- a/egs/lre07/v1/lid/train_lvtln_model.sh
+++ b/egs/lre07/v1/lid/train_lvtln_model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Daniel Povey
# Apache 2.0
diff --git a/egs/lre07/v1/local/lre07_eval/lre07_eval.sh b/egs/lre07/v1/local/lre07_eval/lre07_eval.sh
index e12ddccece0..fe185a1c397 100755
--- a/egs/lre07/v1/local/lre07_eval/lre07_eval.sh
+++ b/egs/lre07/v1/local/lre07_eval/lre07_eval.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 David Snyder
# Apache 2.0.
#
diff --git a/egs/lre07/v1/local/split_long_utts.sh b/egs/lre07/v1/local/split_long_utts.sh
index 083954ea288..f8df872d77b 100755
--- a/egs/lre07/v1/local/split_long_utts.sh
+++ b/egs/lre07/v1/local/split_long_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
max_utt_len=60 # 60 seconds.
stage=0
diff --git a/egs/lre07/v1/run.sh b/egs/lre07/v1/run.sh
index ca9f3df41bb..984b1982c6a 100755
--- a/egs/lre07/v1/run.sh
+++ b/egs/lre07/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014-2015 David Snyder
# Daniel Povey
# Apache 2.0.
diff --git a/egs/lre07/v2/local/dnn/fisher_data_prep.sh b/egs/lre07/v2/local/dnn/fisher_data_prep.sh
index 771c868064d..fc96c491f51 100755
--- a/egs/lre07/v2/local/dnn/fisher_data_prep.sh
+++ b/egs/lre07/v2/local/dnn/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
diff --git a/egs/lre07/v2/local/dnn/fisher_prepare_dict.sh b/egs/lre07/v2/local/dnn/fisher_prepare_dict.sh
index 1ffa4928fd7..a4f3f3c2ca1 100755
--- a/egs/lre07/v2/local/dnn/fisher_prepare_dict.sh
+++ b/egs/lre07/v2/local/dnn/fisher_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# To be run from one directory above this script.
diff --git a/egs/lre07/v2/local/dnn/fisher_train_lms.sh b/egs/lre07/v2/local/dnn/fisher_train_lms.sh
index 354882a3760..3497da32213 100755
--- a/egs/lre07/v2/local/dnn/fisher_train_lms.sh
+++ b/egs/lre07/v2/local/dnn/fisher_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
diff --git a/egs/lre07/v2/local/dnn/remove_dup_utts.sh b/egs/lre07/v2/local/dnn/remove_dup_utts.sh
index 1211e0e04fd..f40a7b781c8 100755
--- a/egs/lre07/v2/local/dnn/remove_dup_utts.sh
+++ b/egs/lre07/v2/local/dnn/remove_dup_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Remove excess utterances once they appear more than a specified
# number of times with the same transcription, in a data set.
diff --git a/egs/lre07/v2/local/dnn/run_nnet2_common.sh b/egs/lre07/v2/local/dnn/run_nnet2_common.sh
index 2d0703b51c0..032282c11ef 100755
--- a/egs/lre07/v2/local/dnn/run_nnet2_common.sh
+++ b/egs/lre07/v2/local/dnn/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Make the features.
diff --git a/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh b/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh
index 699cbe60542..c8e818e45a5 100755
--- a/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh
+++ b/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on run_nnet2_multisplice.sh in
# egs/fisher_english/s5/local/online. It has been modified
diff --git a/egs/lre07/v2/local/dnn/train_dnn.sh b/egs/lre07/v2/local/dnn/train_dnn.sh
index dd2469b4009..b779f20d8a0 100755
--- a/egs/lre07/v2/local/dnn/train_dnn.sh
+++ b/egs/lre07/v2/local/dnn/train_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is based on egs/fisher_english/s5/run.sh. It trains a
# multisplice time-delay neural network used in the DNN-based speaker
diff --git a/egs/lre07/v2/local/lre07_eval/lre07_eval.sh b/egs/lre07/v2/local/lre07_eval/lre07_eval.sh
index e12ddccece0..fe185a1c397 100755
--- a/egs/lre07/v2/local/lre07_eval/lre07_eval.sh
+++ b/egs/lre07/v2/local/lre07_eval/lre07_eval.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 David Snyder
# Apache 2.0.
#
diff --git a/egs/lre07/v2/local/split_long_utts.sh b/egs/lre07/v2/local/split_long_utts.sh
index 083954ea288..f8df872d77b 100755
--- a/egs/lre07/v2/local/split_long_utts.sh
+++ b/egs/lre07/v2/local/split_long_utts.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
max_utt_len=60 # 60 seconds.
stage=0
diff --git a/egs/lre07/v2/run.sh b/egs/lre07/v2/run.sh
index e81dd869cc6..f8693234454 100755
--- a/egs/lre07/v2/run.sh
+++ b/egs/lre07/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016-2017 Go-Vivace Inc. (Author: Mousmita Sarma)
#
# Apache 2.0.
diff --git a/egs/madcat_ar/v1/local/chain/compare_wer.sh b/egs/madcat_ar/v1/local/chain/compare_wer.sh
index 7f04061dafb..01e403e8ba9 100755
--- a/egs/madcat_ar/v1/local/chain/compare_wer.sh
+++ b/egs/madcat_ar/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
index 892ee441516..7478c5acedb 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# 2017 Chun Chieh Chang
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index 7ca7c652fd2..047893b8659 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index a8bc1836ffe..7fbd52b5965 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# e2eali_1a is the same as chainali_1c but uses the e2e chain model to get the
# lattice alignments and to build a tree
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index 0828e051dcc..dfd0ad40bfc 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
# lattice alignments and to build a tree
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 3caf8ae4494..ad5f3cd4879 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# This script does end2end chain training (i.e. from scratch)
diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 9fe588f31b8..dd802417f4f 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Yiwen Shao
# 2018 Ashish Arora
diff --git a/egs/madcat_ar/v1/local/extract_lines.sh b/egs/madcat_ar/v1/local/extract_lines.sh
index ab87836ae3a..c4b5e77a4b2 100755
--- a/egs/madcat_ar/v1/local/extract_lines.sh
+++ b/egs/madcat_ar/v1/local/extract_lines.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Ashish Arora
nj=4
diff --git a/egs/madcat_ar/v1/local/prepare_data.sh b/egs/madcat_ar/v1/local/prepare_data.sh
index 1049db9826d..8a6fa428807 100755
--- a/egs/madcat_ar/v1/local/prepare_data.sh
+++ b/egs/madcat_ar/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Chun Chieh Chang
# 2017 Ashish Arora
diff --git a/egs/madcat_ar/v1/local/score.sh b/egs/madcat_ar/v1/local/score.sh
index 31564d25326..e1befafd8b2 100755
--- a/egs/madcat_ar/v1/local/score.sh
+++ b/egs/madcat_ar/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/madcat_ar/v1/local/tl/augment_data.sh b/egs/madcat_ar/v1/local/tl/augment_data.sh
index cc44aa58a62..6f2d3cc0217 100755
--- a/egs/madcat_ar/v1/local/tl/augment_data.sh
+++ b/egs/madcat_ar/v1/local/tl/augment_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Hossein Hadian
# 2018 Ashish Arora
diff --git a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
index ccbb7119674..ec73d4dd406 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# ./local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a/
# System cnn_e2eali_1a
diff --git a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
index 3fca8cf5fdc..ca7fef9eb85 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# This script does end2end chain training (i.e. from scratch)
diff --git a/egs/madcat_ar/v1/local/tl/run_text_localization.sh b/egs/madcat_ar/v1/local/tl/run_text_localization.sh
index 8d12f7d802f..24269d9f479 100755
--- a/egs/madcat_ar/v1/local/tl/run_text_localization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_text_localization.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# 2018 Ashish Arora
diff --git a/egs/madcat_ar/v1/local/tl/train_lm.sh b/egs/madcat_ar/v1/local/tl/train_lm.sh
index 524bb2e9f40..c37fe64569f 100755
--- a/egs/madcat_ar/v1/local/tl/train_lm.sh
+++ b/egs/madcat_ar/v1/local/tl/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/madcat_ar/v1/local/train_lm.sh b/egs/madcat_ar/v1/local/train_lm.sh
index 903b288a834..c53a6fa8f35 100755
--- a/egs/madcat_ar/v1/local/train_lm.sh
+++ b/egs/madcat_ar/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/madcat_ar/v1/run.sh b/egs/madcat_ar/v1/run.sh
index 01bfdbed543..076f34ced2b 100755
--- a/egs/madcat_ar/v1/run.sh
+++ b/egs/madcat_ar/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Chun Chieh Chang
# 2017 Ashish Arora
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 62f4eeb7c71..837c482af0f 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# 2018 Ashish Arora
set -e
diff --git a/egs/madcat_zh/v1/local/chain/compare_wer.sh b/egs/madcat_zh/v1/local/chain/compare_wer.sh
index 4eb665fc702..2e2bc73e01c 100755
--- a/egs/madcat_zh/v1/local/chain/compare_wer.sh
+++ b/egs/madcat_zh/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
index 164d62a7ad9..17bae9941c4 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# 2017 Chun Chieh Chang
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index be51bdcc3d1..7b15b89a549 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# chainali_1a is as 1a except it uses chain alignments (using 1a system) instead of gmm alignments
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
index aa61620a92f..e14dbda0c39 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# chainali_1b is as chainali_1a except it has 3 more cnn layers and 1 less tdnn layer.
# ./local/chain/compare_wer.sh exp/chain/cnn_chainali_1a/ exp/chain/cnn_chainali_1b/
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index ffc9a4c8a14..037fcbaaad2 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
diff --git a/egs/madcat_zh/v1/local/extract_features.sh b/egs/madcat_zh/v1/local/extract_features.sh
index 9fe588f31b8..dd802417f4f 100755
--- a/egs/madcat_zh/v1/local/extract_features.sh
+++ b/egs/madcat_zh/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Yiwen Shao
# 2018 Ashish Arora
diff --git a/egs/madcat_zh/v1/local/extract_lines.sh b/egs/madcat_zh/v1/local/extract_lines.sh
index ed752e97e13..391f51a9ea9 100755
--- a/egs/madcat_zh/v1/local/extract_lines.sh
+++ b/egs/madcat_zh/v1/local/extract_lines.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Ashish Arora
nj=4
diff --git a/egs/madcat_zh/v1/local/prepare_data.sh b/egs/madcat_zh/v1/local/prepare_data.sh
index ba35b90b173..33086111426 100755
--- a/egs/madcat_zh/v1/local/prepare_data.sh
+++ b/egs/madcat_zh/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Chun Chieh Chang
# 2017 Ashish Arora
diff --git a/egs/madcat_zh/v1/local/score.sh b/egs/madcat_zh/v1/local/score.sh
index 31564d25326..e1befafd8b2 100755
--- a/egs/madcat_zh/v1/local/score.sh
+++ b/egs/madcat_zh/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
steps/scoring/score_kaldi_wer.sh "$@"
diff --git a/egs/madcat_zh/v1/local/train_lm.sh b/egs/madcat_zh/v1/local/train_lm.sh
index a8e2dc71f28..d37c8ef110a 100755
--- a/egs/madcat_zh/v1/local/train_lm.sh
+++ b/egs/madcat_zh/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/madcat_zh/v1/run.sh b/egs/madcat_zh/v1/run.sh
index b3ef370c830..c9dba0443ec 100755
--- a/egs/madcat_zh/v1/run.sh
+++ b/egs/madcat_zh/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Chun Chieh Chang
# 2017 Ashish Arora
diff --git a/egs/madcat_zh/v1/run_end2end.sh b/egs/madcat_zh/v1/run_end2end.sh
index 7e0fc1e25d1..eefb19a85b6 100755
--- a/egs/madcat_zh/v1/run_end2end.sh
+++ b/egs/madcat_zh/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
set -e
diff --git a/egs/malach/s5/local/chain/compare_wer_general.sh b/egs/malach/s5/local/chain/compare_wer_general.sh
index 9bd017414ab..7d36c298396 100755
--- a/egs/malach/s5/local/chain/compare_wer_general.sh
+++ b/egs/malach/s5/local/chain/compare_wer_general.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
echo -n "System "
for x in $*; do printf " % 10s" $x; done
diff --git a/egs/malach/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/malach/s5/local/chain/tuning/run_tdnn_1a.sh
index 007e94ef1a3..69995f0b8aa 100644
--- a/egs/malach/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/malach/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 IBM Corp. (Author: Michael Picheny) Adapted AMI recipe to MALACH corpus
diff --git a/egs/malach/s5/local/malach_data_prep.sh b/egs/malach/s5/local/malach_data_prep.sh
index 174adf9ce0e..4b812e7ee39 100755
--- a/egs/malach/s5/local/malach_data_prep.sh
+++ b/egs/malach/s5/local/malach_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 University of Edinburgh (Author: Pawel Swietojanski)
# 2016 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/malach/s5/local/malach_prepare_dict.sh b/egs/malach/s5/local/malach_prepare_dict.sh
index 4c3c039f74a..e4638b7693e 100755
--- a/egs/malach/s5/local/malach_prepare_dict.sh
+++ b/egs/malach/s5/local/malach_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#adapted from fisher dict preparation script, Author: Pawel Swietojanski
# Copyright 2019 IBM Corp. (Author: Michael Picheny) Adapted AMI recipe to MALACH corpus
diff --git a/egs/malach/s5/local/malach_scoring_data_prep.sh b/egs/malach/s5/local/malach_scoring_data_prep.sh
index 8c9c79a1fd6..1b1b1c83492 100755
--- a/egs/malach/s5/local/malach_scoring_data_prep.sh
+++ b/egs/malach/s5/local/malach_scoring_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/malach/s5/local/malach_text_prep.sh b/egs/malach/s5/local/malach_text_prep.sh
index 55885c66ce9..fcb8d17fb18 100755
--- a/egs/malach/s5/local/malach_text_prep.sh
+++ b/egs/malach/s5/local/malach_text_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 IBM Corp. (Author: Michael Picheny) Adapted AMI recipe to MALACH corpus
# Copyright 2015, Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/malach/s5/local/malach_train_lms.sh b/egs/malach/s5/local/malach_train_lms.sh
index c4919022bf1..722ba4cfffd 100755
--- a/egs/malach/s5/local/malach_train_lms.sh
+++ b/egs/malach/s5/local/malach_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019, IBM Research (Author: Michael Picheny) Adapted AMI recipe to MALACH Corpus
# Copyright 2013 Arnab Ghoshal, Pawel Swietojanski
diff --git a/egs/malach/s5/local/nnet3/prepare_lores_feats.sh b/egs/malach/s5/local/nnet3/prepare_lores_feats.sh
index 5601fcf7dd6..17822f0283d 100755
--- a/egs/malach/s5/local/nnet3/prepare_lores_feats.sh
+++ b/egs/malach/s5/local/nnet3/prepare_lores_feats.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/malach/s5/local/nnet3/run_ivector_common.sh b/egs/malach/s5/local/nnet3/run_ivector_common.sh
index d841b8f50ef..e179f319815 100755
--- a/egs/malach/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/malach/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
index 7205a3adcd0..392b9e6c819 100755
--- a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
+++ b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -x
diff --git a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
index db01b3ecbf4..72fcd135a0a 100755
--- a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
+++ b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson
# 2017 Hainan Xu
diff --git a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
index 278ee345d50..0a96b10b8a9 100755
--- a/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
+++ b/egs/malach/s5/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson
# 2017 Hainan Xu
diff --git a/egs/malach/s5/local/run_cleanup_segmentation.sh b/egs/malach/s5/local/run_cleanup_segmentation.sh
index c2c730f1a9e..778fe96d2e7 100755
--- a/egs/malach/s5/local/run_cleanup_segmentation.sh
+++ b/egs/malach/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 IBM (Michael Picheny) Adapted from AMI recipe for MALACH Corpus
# Copyright 2016 Vimal Manohar
diff --git a/egs/malach/s5/local/score.sh b/egs/malach/s5/local/score.sh
index 00cc0c0f1a6..e8859c5011c 100755
--- a/egs/malach/s5/local/score.sh
+++ b/egs/malach/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -x
diff --git a/egs/malach/s5/local/score_asclite.sh b/egs/malach/s5/local/score_asclite.sh
index a47e66581ad..2e5bb20f000 100755
--- a/egs/malach/s5/local/score_asclite.sh
+++ b/egs/malach/s5/local/score_asclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -x
diff --git a/egs/malach/s5/run.sh b/egs/malach/s5/run.sh
index b8961d4df3c..57be778bb3f 100755
--- a/egs/malach/s5/run.sh
+++ b/egs/malach/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
. ./path.sh
diff --git a/egs/mandarin_bn_bc/s5/README b/egs/mandarin_bn_bc/s5/README
new file mode 100644
index 00000000000..8c5b111acb5
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/README
@@ -0,0 +1,34 @@
+This recipe contains the following corpora from LDC:
+
+Audio:
+ Gale phase 2/3/4
+ LDC2013S08
+ LDC2013S04
+ LDC2014S09
+ LDC2015S06
+ LDC2015S13
+ LDC2016S03
+ LDC2017S25
+
+ TDT 2/3/4
+ LDC2001S93
+ LDC2001S95
+ LDC2005S11
+
+Text:
+ Gale phase 2/3/4
+ LDC2013T20
+ LDC2013T08
+ LDC2014T28
+ LDC2015T09
+ LDC2015T25
+ LDC2016T12
+ LDC2017T18
+
+ TDT 2/3/4
+ LDC2001T57
+ LDC2001T58
+ LDC2005T16
+ Besides, it uses Gigga word, simplified Mandarin for LM training and expanding dictionary:
+ Gigga word (xin:simplified, cna:traditional. Use only xin)
+ LDC2003T09
diff --git a/egs/mandarin_bn_bc/s5/RESULTS b/egs/mandarin_bn_bc/s5/RESULTS
new file mode 100644
index 00000000000..dcb541497e9
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/RESULTS
@@ -0,0 +1,15 @@
+# In the results below, "large_test" is the pruned 4-gram LM, which is used for
+# lattice generation.
+
+# Results with nnet3 tdnn+chain model
+# local/chain/run_tdnn.sh
+# (4 epoch training on speed-perturbed and volum-perturbed "cleaned" data and left-biphone model)
+# num_params=20.7 M
+%CER 8.83 [ 7901 / 89515, 929 ins, 1738 del, 5234 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_dev_large_test/cer_9_0.0
+%CER 9.03 [ 17749 / 196659, 3770 ins, 3988 del, 9991 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_eval_large_test/cer_10_0.0
+
+# Results with RNNLM rescoring of tdnn+chain model
+%CER 8.49 [ 7600 / 89515, 863 ins, 1805 del, 4932 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_dev_large_test_rnnlm_1a_nbest_rescore/cer_8_0.0
+%CER 8.47 [ 7585 / 89515, 783 ins, 2027 del, 4775 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_dev_large_test_rnnlm_1a_rescore/cer_9_0.0
+%CER 8.82 [ 17342 / 196659, 3891 ins, 3809 del, 9642 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_eval_large_test_rnnlm_1a_nbest_rescore/cer_8_0.0
+%CER 8.72 [ 17142 / 196659, 3876 ins, 3766 del, 9500 sub ] exp/chain_cleanup/tdnn_1d_sp/decode_eval_large_test_rnnlm_1a_rescore/cer_9_0.0
diff --git a/egs/mandarin_bn_bc/s5/cmd.sh b/egs/mandarin_bn_bc/s5/cmd.sh
new file mode 100644
index 00000000000..b2f193f08ac
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/cmd.sh
@@ -0,0 +1,18 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine). queue.pl works with GridEngine (qsub). slurm.pl works
+# with slurm. Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration. Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G --config conf/queue.conf --allow-a09 false"
+export decode_cmd="queue.pl --mem 4G --config conf/queue.conf --allow-a09 false"
+export mkgraph_cmd="queue.pl --mem 8G --config conf/queue.conf --allow-a09 false"
+# the use of cuda_cmd is deprecated, but it's still used in this example
+# directory.
+export cuda_cmd="queue.pl --gpu 1 --config conf/queue.conf"
diff --git a/egs/mandarin_bn_bc/s5/conf/cmu2pinyin b/egs/mandarin_bn_bc/s5/conf/cmu2pinyin
new file mode 100644
index 00000000000..c02eb600fcc
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/cmu2pinyin
@@ -0,0 +1,39 @@
+AA A
+AE A
+AH A
+AO UO
+AW U
+AY AI
+B B
+CH CH
+D D
+DH S I
+EH AI
+ER E
+EY AI
+F F
+G G
+HH H
+IH I
+IY I
+JH ZH
+K K
+L L
+M M
+N N
+NG N
+OW UO
+OY UO
+P P
+R R
+S S
+SH SH
+T T
+TH S
+UH U
+UW U
+V W
+W W
+Y Y
+Z Z
+ZH X
diff --git a/egs/mandarin_bn_bc/s5/conf/decode.config b/egs/mandarin_bn_bc/s5/conf/decode.config
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/egs/mandarin_bn_bc/s5/conf/decode_dnn.config b/egs/mandarin_bn_bc/s5/conf/decode_dnn.config
new file mode 100644
index 00000000000..89dd9929a62
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/decode_dnn.config
@@ -0,0 +1,2 @@
+beam=18.0 # beam for decoding. Was 13.0 in the scripts.
+lattice_beam=10.0 # this has most effect on size of the lattices.
diff --git a/egs/mandarin_bn_bc/s5/conf/fbank.conf b/egs/mandarin_bn_bc/s5/conf/fbank.conf
new file mode 100644
index 00000000000..62f6dc83b48
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/fbank.conf
@@ -0,0 +1,3 @@
+# No non-default options for now.
+--sample-frequency=16000
+--num-mel-bins=30
diff --git a/egs/mandarin_bn_bc/s5/conf/mfcc.conf b/egs/mandarin_bn_bc/s5/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false # only non-default option.
diff --git a/egs/mandarin_bn_bc/s5/conf/mfcc_hires.conf b/egs/mandarin_bn_bc/s5/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..c8ad04dae66
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false # use average of log energy, not energy.
+--sample-frequency=16000 # Switchboard is sampled at 8kHz
+--num-mel-bins=40 # similar to Google's setup.
+--num-ceps=40 # there is no dimensionality reduction.
+--low-freq=40 # low cutoff frequency for mel bins
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
diff --git a/egs/mandarin_bn_bc/s5/conf/online_cmvn.conf b/egs/mandarin_bn_bc/s5/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/mandarin_bn_bc/s5/conf/online_pitch.conf b/egs/mandarin_bn_bc/s5/conf/online_pitch.conf
new file mode 100644
index 00000000000..e959a19d5b8
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/online_pitch.conf
@@ -0,0 +1 @@
+--sample-frequency=16000
diff --git a/egs/mandarin_bn_bc/s5/conf/pinyin2cmu b/egs/mandarin_bn_bc/s5/conf/pinyin2cmu
new file mode 100644
index 00000000000..a6e53620479
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/pinyin2cmu
@@ -0,0 +1,58 @@
+A AA
+AI AY
+AN AE N
+ANG AE NG
+AO AW
+B B
+CH CH
+C T S
+D D
+E ER
+EI EY
+EN AH N
+ENG AH NG
+ER AA R
+F F
+G G
+H HH
+IA IY AA
+IANG IY AE NG
+IAN IY AE N
+IAO IY AW
+IE IY EH
+I IY
+ING IY NG
+IN IY N
+IONG IY UH NG
+IU IY UH
+J J
+K K
+L L
+M M
+N N
+O AO
+ONG UH NG
+OU OW
+P P
+Q Q
+R R
+SH SH
+S S
+T T
+UAI UW AY
+UANG UW AE NG
+UAN UW AE N
+UA UW AA
+UI UW IY
+UN UW AH N
+UO UW AO
+U UW
+UE IY EH
+VE IY EH
+V IY UW
+VN IY N
+W W
+X X
+Y Y
+ZH JH
+Z Z
diff --git a/egs/mandarin_bn_bc/s5/conf/pitch.conf b/egs/mandarin_bn_bc/s5/conf/pitch.conf
new file mode 100644
index 00000000000..e959a19d5b8
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/pitch.conf
@@ -0,0 +1 @@
+--sample-frequency=16000
diff --git a/egs/mandarin_bn_bc/s5/conf/queue.conf b/egs/mandarin_bn_bc/s5/conf/queue.conf
new file mode 100644
index 00000000000..dfedb6424bf
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/conf/queue.conf
@@ -0,0 +1,13 @@
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0 # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1 # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q
+option gpu=* -l gpu=$0 -q g.q
+default allow_a09=false
+option allow_a09=true
+option allow_a09=false -l 'hostname=!a09*&!a17*&!a13*&!a14*&!a10*&!a11*&!c16*'
diff --git a/egs/mandarin_bn_bc/s5/local/chain/run_chain_common.sh b/egs/mandarin_bn_bc/s5/local/chain/run_chain_common.sh
new file mode 100755
index 00000000000..85c8589dc78
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/chain/run_chain_common.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+
+# this script has common stages shared across librispeech chain recipes.
+# It generates a new topology in a new lang directory, gets the alignments as
+# lattices, and builds a tree for the new topology
+set -e
+
+stage=11
+
+# input directory names. These options are actually compulsory, and they have
+# been named for convenience
+gmm_dir=
+ali_dir=
+ali_nj=
+lores_train_data_dir=
+lang_original=
+num_leaves=6000
+
+# output directory names. They are also compulsory.
+lang=
+lang_original=
+lat_dir=
+tree_dir=
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+[ -z $lang ] && echo "Set --lang, this specifies the new lang directory which will have the new topology" && exit 1;
+[ -z $lat_dir ] && echo "Set --lat-dir, this specifies the experiment directory to store lattice" && exit 1;
+[ -z $tree_dir ] && echo "Set --tree-dir, this specifies the directory to store new tree " && exit 1;
+
+for f in $gmm_dir/final.mdl $ali_dir/ali.1.gz $lores_train_data_dir/feats.scp; do
+ [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 11 ]; then
+ echo "$0: creating lang directory with one state per phone."
+ # Create a version of the lang/ directory that has one state per phone in the
+ # topo file. [note, it really has two states.. the first one is only repeated
+ # once, the second one has zero or more repeats.]
+ if [ -d $lang ]; then
+ if [ $lang/L.fst -nt $lang_original/L.fst ]; then
+ echo "$0: $lang already exists, not overwriting it; continuing"
+ else
+ echo "$0: $lang already exists and seems to be older than data/lang..."
+ echo " ... not sure what to do. Exiting."
+ exit 1;
+ fi
+ else
+ cp -r $lang_original $lang
+ silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+ nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+ # Use our special topology... note that later on may have to tune this
+ # topology.
+ steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+ fi
+fi
+
+if [ $stage -le 12 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom).
+ # use the same num-jobs as the alignments
+ nj=$(cat ${ali_dir}/num_jobs) || exit 1;
+ steps/align_fmllr_lats.sh --nj $ali_nj --cmd "$train_cmd" ${lores_train_data_dir} \
+ $lang $gmm_dir $lat_dir
+ rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 13 ]; then
+ # Build a tree using our new topology. We know we have alignments for the
+ # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+ # those.
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
+ --context-opts "--context-width=2 --central-position=1" \
+ --cmd "$train_cmd" $num_leaves ${lores_train_data_dir} $lang $ali_dir $tree_dir
+fi
+
+exit 0;
diff --git a/egs/mandarin_bn_bc/s5/local/chain/run_tdnn.sh b/egs/mandarin_bn_bc/s5/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..e1adaa9346d
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1d.sh
\ No newline at end of file
diff --git a/egs/mandarin_bn_bc/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/mandarin_bn_bc/s5/local/chain/tuning/run_tdnn_1d.sh
new file mode 100755
index 00000000000..3c4f53eee7b
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/chain/tuning/run_tdnn_1d.sh
@@ -0,0 +1,205 @@
+#!/usr/bin/env bash
+set -e
+
+
+# configs for 'chain'
+stage=-1
+decode_nj=60
+ali_nj=80
+train_set=train_gale_tdt_cleanup
+gmm=tri6b_cleanup
+nnet3_affix=_cleanup
+lang_affix="_large_test"
+
+# The rest are configs specific to this script. Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1d
+tree_affix=
+train_stage=-8
+get_egs_stage=-10
+decode_iter=
+
+# TDNN options
+frames_per_eg=150,110,100
+remove_egs=true
+common_egs_dir=
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.5@0.50,0'
+
+test_online_decoding=true # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_sp_ali
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang${lang_affix}_chain
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn${affix:+_$affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_nopitch
+if ! cuda-compiled; then
+ cat < $dir/configs/network.xconfig
+ input dim=$ivector_dim name=ivector
+ input dim=$feat_dim name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ # the first splicing is moved before the lda layer, so no splicing here
+ relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
+ tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+ tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+ tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
+ tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
+ tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf16 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ tdnnf-layer name=tdnnf17 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
+ linear-component name=prefinal-l dim=256 $linear_opts
+
+ prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+ output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+ prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 15 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b{09,10,11,12}/$USER/kaldi-data/egs/mandarin-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+ fi
+
+ steps/nnet3/chain/train.py --stage $train_stage \
+ --cmd "$train_cmd" \
+ --feat.online-ivector-dir $train_ivector_dir \
+ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+ --chain.xent-regularize $xent_regularize \
+ --chain.leaky-hmm-coefficient 0.1 \
+ --chain.l2-regularize 0.0 \
+ --chain.apply-deriv-weights false \
+ --chain.lm-opts="--num-extra-lm-states=2000" \
+ --egs.dir "$common_egs_dir" \
+ --egs.stage $get_egs_stage \
+ --egs.opts "--frames-overlap-per-eg 0 --constrained false" \
+ --egs.chunk-width $frames_per_eg \
+ --trainer.dropout-schedule $dropout_schedule \
+ --trainer.add-option="--optimization.memory-compression-level=2" \
+ --trainer.num-chunk-per-minibatch 64 \
+ --trainer.frames-per-iter 2500000 \
+ --trainer.num-epochs 4 \
+ --trainer.optimization.num-jobs-initial 3 \
+ --trainer.optimization.num-jobs-final 16 \
+ --trainer.optimization.initial-effective-lrate 0.00015 \
+ --trainer.optimization.final-effective-lrate 0.000015 \
+ --trainer.max-param-change 2.0 \
+ --cleanup.remove-egs $remove_egs \
+ --feat-dir $train_data_dir \
+ --tree-dir $tree_dir \
+ --lat-dir $lat_dir \
+ --dir $dir || exit 1;
+
+fi
+
+echo "Train chain tdnn succeeded !"
+graph_dir=$dir/graph${lang_affix}
+if [ $stage -le 16 ]; then
+ # Note: it might appear that this $lang directory is mismatched, and it is as
+ # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+ # the lang directory.
+ utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang${lang_affix} $dir $graph_dir
+ # remove from the graph, and convert back to const-FST.
+ fstrmsymbols --apply-to-output=true --remove-arcs=true "echo 3|" $graph_dir/HCLG.fst - | \
+ fstconvert --fst_type=const > $graph_dir/temp.fst
+ mv $graph_dir/temp.fst $graph_dir/HCLG.fst
+fi
+
+echo "Decoding "
+iter_opts=
+for t in dev eval; do
+ ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${t}_hires_nopitch
+ steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+ --nj $decode_nj --cmd "$decode_cmd" $iter_opts \
+ --online-ivector-dir "$ivector_dir" \
+ $graph_dir data/${t}_hires $dir/decode_${t}_large_test || exit 1
+done
+exit 0;
diff --git a/egs/mandarin_bn_bc/s5/local/check_oov_rate.sh b/egs/mandarin_bn_bc/s5/local/check_oov_rate.sh
new file mode 100644
index 00000000000..6c655205e31
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/check_oov_rate.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (author: Jinyi Yang)
+# Apache 2.0
+
+# This script checks the Out Of Vocabulary words rate of given data set.
+
+if [ $# -ne 2 ]; then
+ echo "Usage: $0 "
+ exit 1
+fi
+lex=$1
+fname=$2
+
+cat $fname | awk '{for(n=2;n<=NF;n++) { print $n; }}' | perl -e '
+ $lex = shift @ARGV; open(L, "<$lex")||die;
+ while(){ @A=split; $seen{$A[0]}=1;}
+ while() {
+ @A=split;
+ $word=$A[0];
+ $tot++;
+ if(defined $seen{$word}) {
+ $invoc++;
+ } else {print "OOV word $word\n";}
+ }
+ $oov_rate = 100.0 * (1.0 - ($invoc / $tot));
+ printf("Seen $invoc out of $tot tokens; OOV rate is %.2f\n", $oov_rate);
+ ' $lex
diff --git a/egs/mandarin_bn_bc/s5/local/create_oov_char_lexicon.pl b/egs/mandarin_bn_bc/s5/local/create_oov_char_lexicon.pl
new file mode 100755
index 00000000000..33e2e8061c3
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/create_oov_char_lexicon.pl
@@ -0,0 +1,48 @@
+#!/usr/bin/env perl
+# Copyright 2016 Alibaba Robotics Corp. (Author: Xingyu Na)
+#
+# A script for char-based Chinese OOV lexicon generation.
+#
+# Input 1: char-based dictionary, example
+# CHAR1 ph1 ph2
+# CHAR2 ph3
+# CHAR3 ph2 ph4
+#
+# Input 2: OOV word list, example
+# WORD1
+# WORD2
+# WORD3
+#
+# where WORD1 is in the format of "CHAR1CHAR2".
+#
+# Output: OOV lexicon, in the format of normal lexicon
+
+if($#ARGV != 1) {
+ print STDERR "usage: perl create_oov_char_lexicon.pl chardict oovwordlist > oovlex\n\n";
+ print STDERR "### chardict: a dict in which each line contains the pronunciation of one Chinese char\n";
+ print STDERR "### oovwordlist: OOV word list\n";
+ print STDERR "### oovlex: output OOV lexicon\n";
+ exit;
+}
+
+use utf8;
+my %prons;
+open(DICT, $ARGV[0]) || die("Can't open dict ".$ARGV[0]."\n");
+binmode(DICT,":encoding(utf8)");
+foreach () {
+ chomp; @A = split(" ", $_); $prons{$A[0]} = $A[1];
+}
+close DICT;
+
+open(WORDS, $ARGV[1]) || die("Can't open oov word list ".$ARGV[1]."\n");
+binmode(WORDS,":encoding(utf8)");
+while () {
+ chomp;
+ print $_;
+ @A = split("", $_);
+ foreach (@A) {
+ print " $prons{$_}";
+ }
+ print "\n";
+}
+close WORDS;
diff --git a/egs/mandarin_bn_bc/s5/local/gale_bad_utts b/egs/mandarin_bn_bc/s5/local/gale_bad_utts
new file mode 100644
index 00000000000..2dd361f58a9
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_bad_utts
@@ -0,0 +1,100 @@
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20070308_040701
+CCTV2_ECONOMYANDLAW_CMN_20070426_202800
+CCTV2_ECONOMYANDLAW_CMN_20070426_202800(1)
+CCTV2_LIANGHUI_PROBLEM_20070308_213000
+CCTV4_TDYFOCUS_CMN_20070824_092801
+VOA_ISSUESANDOPINIONS_CMN_20070801_210500
+VOA_ISSUESANDOPINIONS_CMN_20070926_210500
+VOA_LISTENERSHOTLINE_CMN_20070906_223000
+VOA_LISTENERSHOTLINE_CMN_20070926_223000
+VOA_LISTENERSHOTLINE_CMN_20070927_223000
+PHOENIX_NEWSLINE_CMN_20070101_114800
+PHOENIX_NEWSLINE_CMN_20070101_114800(1)
+CCTV2_ECONOMYANDLAW_CMN_20070426_202800(1)
+CCTV2_LIANGHUI_PROBLEM_20070308_213000
+CCTV4_TDYFOCUS_CMN_20070824_092801
+CCTV4_TDYFOCUS_CMN_20071004_092800
+CCTV4_TDYFOCUS_CMN_20071008_092801
+CCTV4_TDYFOCUS_CMN_20071012_092801
+CCTVNEWS_LIANGHUIZHICHUANG_PROBLEM_20070309_085702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070403_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070404_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070405_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070409_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070703_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070704_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070705_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070706_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070712_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070713_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070716_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070717_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070718_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070719_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070810_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070813_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070814_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070815_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070816_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070817_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070820_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070821_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070822_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070823_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070824_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070827_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070828_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070829_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070830_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070905_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070906_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070907_215702
+CCTVNEWS_PEOPLEINNEWS_CMN_20070910_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070911_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070912_215701
+CCTVNEWS_PEOPLEINNEWS_CMN_20070913_215701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070401_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070408_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070708_140702
+CCTVNEWS_TELLITLIKEITIS_CMN_20070715_140702
+CCTVNEWS_TELLITLIKEITIS_CMN_20070812_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070819_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070826_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070902_140701
+CCTVNEWS_TELLITLIKEITIS_CMN_20070909_140701
+HUBEI_COMMUNICATE_CMN_20070401_230202
+HUBEI_COMMUNICATE_CMN_20070408_225927
+HUBEI_COMMUNICATE_CMN_20070701_222922
+HUBEI_COMMUNICATE_CMN_20070708_222931
+HUBEI_COMMUNICATE_CMN_20070715_222707
+HUBEI_COMMUNICATE_CMN_20070826_223006
+HUBEI_COMMUNICATE_CMN_20070902_223950
+HUBEI_COMMUNICATE_CMN_20070909_222959
+PHOENIX_ASIANJRNL_CMN_20070102_075800(1)
+PHOENIX_ASIANJRNL_CMN_20070103_075800(1)
+PHOENIX_ASIANJRNL_CMN_20070104_075800(1)
+PHOENIX_ASIANJRNL_CMN_20070108_075800(1)
+PHOENIX_ASIANJRNL_CMN_20070109_075800(1)
+PHOENIX_ASIANJRNL_CMN_20080217_085801
+PHOENIX_ASIANJRNL_CMN_20080224_085801
+PHOENIX_ASIANJRNL_CMN_20080311_085801
+PHOENIX_BEHINDHL_CMN_20080227_082800
+PHOENIX_BEHINDHL_CMN_20080306_082801
+PHOENIX_BEHINDHL_CMN_20080308_082801
+PHOENIX_DATELUYU_CMN_20080201_142801
+PHOENIX_NEWSHACK_CMN_20070407_212300
+PHOENIX_NEWSLINE_CMN_20070101_114800(1)
+PHOENIX_NEWSLINE_CMN_20080306_114801
+PHOENIX_SOCWATCH_CMN_20070802_225801
+PHOENIX_SOCWATCH_CMN_20070816_225801
+PHOENIX_SOCWATCH_CMN_20070823_225801
+PHOENIX_SOCWATCH_CMN_20070906_225800
+PHOENIX_SOCWATCH_CMN_20070913_225801
+PHOENIX_SOCWATCH_CMN_20080117_225800
+PHOENIX_SOCWATCH_CMN_20080131_225800
+PHOENIX_SOCWATCH_CMN_20080214_225801
+VOA_ISSUESANDOPINIONS_CMN_20070801_210500
+VOA_ISSUESANDOPINIONS_CMN_20070926_210500
+VOA_LISTENERSHOTLINE_CMN_20070906_223000
+VOA_LISTENERSHOTLINE_CMN_20070926_223000
+VOA_LISTENERSHOTLINE_CMN_20070927_223000
diff --git a/egs/mandarin_bn_bc/s5/local/gale_data_prep_audio.sh b/egs/mandarin_bn_bc/s5/local/gale_data_prep_audio.sh
new file mode 100755
index 00000000000..0ea6cfcf9f9
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_data_prep_audio.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
+# Apache 2.0
+
+
+echo $0 "$@"
+
+galeData=$(utils/make_absolute.sh "${@: -1}" );
+wavedir=$galeData/wav
+mkdir -p $wavedir
+
+
+length=$(($#-1))
+args=${@:1:$length}
+
+# check that sox is installed
+which sox &>/dev/null
+if [[ $? != 0 ]]; then
+ echo "$0: sox is not installed"
+ exit 1
+fi
+
+set -e -o pipefail
+
+for var in $args; do
+ CD=$(basename $var)
+ [ -d $wavedir/$CD ] && rm -rf $wavedir/$CD
+ mkdir -p $wavedir/$CD
+ find $var -type f -name *.wav | while read file; do
+ f=$(basename $file)
+ if [[ ! -L "$wavedir/$CD/$f" ]]; then
+ ln -sf $file $wavedir/$CD/$f
+ fi
+ done
+
+ #make an flac symmlink as well
+ find $var -type f -name *.flac | while read file; do
+ f=$(basename $file)
+
+ if [[ ! -L "$wavedir/$CD/$f" ]]; then
+ ln -sf $file $wavedir/$CD/$f
+ fi
+ done
+done
+
+#figure out the proper sox command line
+#the flac will be converted on the fly
+(
+ for w in `find $wavedir -name *.wav` ; do
+ base=`basename $w .wav`
+ fullpath=`utils/make_absolute.sh $w`
+ echo "$base sox $fullpath -r 16000 -t wav - |"
+ done
+
+ for w in `find $wavedir -name *.flac` ; do
+ base=`basename $w .flac`
+ fullpath=`utils/make_absolute.sh $w`
+ echo "$base sox $fullpath -r 16000 -t wav - |"
+ done
+) | sort -u > $galeData/wav.scp
+
+#clean
+rm -fr $galeData/id$$ $galeData/wav$$
+echo "$0: data prep audio succeded"
+
+exit 0
+
diff --git a/egs/mandarin_bn_bc/s5/local/gale_data_prep_split.sh b/egs/mandarin_bn_bc/s5/local/gale_data_prep_split.sh
new file mode 100755
index 00000000000..b580bb5b76e
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_data_prep_split.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 (author: Ahmed Ali, Hainan Xu)
+# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
+# Copyright 2019 Johns Hopkins Univeersity (author: Jinyi Yang)
+# Apache 2.0
+
+if [ $# -ne 2 ]; then
+ echo "Arguments should be the "; exit 1
+fi
+
+set -e -o pipefail
+#data will data/local
+mkdir -p $2
+galeData=$(utils/make_absolute.sh $1)
+dir=$(utils/make_absolute.sh $2)
+
+
+# some problem with the text data; same utt id but different transcription
+cat $galeData/all | awk '{print$2}' | \
+ sort | uniq -c | awk '{if($1!="1")print$2}' > $galeData/dup.list
+
+# same time duration but different transcription (multiple speaker speaks at same time)
+cat $galeData/all | awk '{print $1" "$3" "$4}' | \
+ sort | uniq -c | awk '{if($1!="1")print $2" "$3" "$4}' > $galeData/dup.segment
+awk 'NR==FNR{a[$1$2$3];next} $1$3$4 in a {print $2}' $galeData/dup.segment $galeData/all >> $galeData/dup.list
+
+utils/filter_scp.pl --exclude -f 2 \
+ $galeData/dup.list $galeData/all > $galeData/all.nodup
+
+mv $galeData/all $galeData/all.orig
+mv $galeData/all.nodup $galeData/all
+
+diff <(awk '{print $1}' $galeData/all | sort | uniq) \
+ <(awk '{print $1}' $galeData/wav.scp | sort | uniq) |\
+ grep '>\|<' | cut -d " " -f2- > $galeData/bad_utts
+grep -f <(cat local/gale_dev/test.LDC*) $galeData/all | grep -v -F -f $galeData/bad_utts > $galeData/all.dev
+
+grep -f <(cat local/gale_eval/test.LDC*) $galeData/all | grep -v -F -f $galeData/bad_utts > $galeData/all.eval
+
+# Only parts of the eval transcriptions will be used. We select them from the given segmentation information
+mv $galeData/all.eval $galeData/all.eval.tmp
+cat local/gale_eval/test.*.segment > $galeData/eval.segments.dur
+awk 'NR==FNR{a[$1$2$3];next} $1$3$4 in a {print $0}' $galeData/eval.segments.dur $galeData/all.eval.tmp \
+ > $galeData/all.eval
+rm $galeData/all.eval.tmp
+
+grep -v -f <(cat local/gale_dev/test.LDC*) $galeData/all |\
+ grep -v -f <(cat local/gale_eval/test.LDC*) |\
+ grep -v -F -f $galeData/bad_utts > $galeData/all.train
+
+cat $galeData/all.dev | awk '{print$2}' > $galeData/dev_utt_list
+cat $galeData/all.eval | awk '{print$2}' > $galeData/eval_utt_list
+cat $galeData/all.train | awk '{print$2}' > $galeData/train_utt_list
+
+mkdir -p $dir/dev
+mkdir -p $dir/eval
+mkdir -p $dir/train
+utils/filter_scp.pl -f 1 $galeData/dev_utt_list $galeData/utt2spk > $dir/dev/utt2spk
+utils/utt2spk_to_spk2utt.pl $dir/dev/utt2spk | sort -u > $dir/dev/spk2utt
+
+utils/filter_scp.pl -f 1 $galeData/eval_utt_list $galeData/utt2spk > $dir/eval/utt2spk
+utils/utt2spk_to_spk2utt.pl $dir/eval/utt2spk | sort -u > $dir/eval/spk2utt
+
+utils/filter_scp.pl -f 1 $galeData/train_utt_list $galeData/utt2spk > $dir/train/utt2spk
+utils/utt2spk_to_spk2utt.pl $dir/train/utt2spk | sort -u > $dir/train/spk2utt
+
+for x in dev eval train; do
+ outdir=$dir/$x
+ file=$galeData/all.$x
+ mkdir -p $outdir
+ awk '{print $2 " " $1 " " $3 " " $4}' $file | sort -u > $outdir/segments
+ awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $file | sort -u > $outdir/text
+done
+
+cat $dir/dev/segments | awk '{print$2}' | sort -u > $galeData/dev.wav.list
+cat $dir/eval/segments | awk '{print$2}' | sort -u > $galeData/eval.wav.list
+cat $dir/train/segments | awk '{print$2}' | sort -u > $galeData/train.wav.list
+
+utils/filter_scp.pl -f 1 $galeData/dev.wav.list $galeData/wav.scp > $dir/dev/wav.scp
+utils/filter_scp.pl -f 1 $galeData/eval.wav.list $galeData/wav.scp > $dir/eval/wav.scp
+utils/filter_scp.pl -f 1 $galeData/train.wav.list $galeData/wav.scp > $dir/train/wav.scp
+
+cat $galeData/wav.scp | awk -v seg=$dir/train/segments 'BEGIN{while((getline0) {seen[$2]=1;}}
+ {if (seen[$1]) { print $0}}' > $dir/train/wav.scp
+
+
+echo Gale data prep split succeeded
diff --git a/egs/mandarin_bn_bc/s5/local/gale_data_prep_txt.sh b/egs/mandarin_bn_bc/s5/local/gale_data_prep_txt.sh
new file mode 100755
index 00000000000..53d391f88a6
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_data_prep_txt.sh
@@ -0,0 +1,126 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 (author: Ahmed Ali, Hainan Xu)
+# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
+# Apache 2.0
+
+echo $0 "$@"
+export LC_ALL=C
+
+galeData=$(utils/make_absolute.sh "${@: -1}" );
+
+length=$(($#-1))
+args=${@:1:$length}
+
+top_pwd=`pwd`
+txtdir=$galeData/txt
+mkdir -p $txtdir
+
+cd $txtdir
+
+for cdx in ${args[@]}; do
+ echo "Preparing $cdx"
+ if [[ $cdx == *.tgz ]] ; then
+ tar -xvf $cdx
+ elif [ -d "$cdx" ]; then
+ tgt=$(basename $cdx)
+ test -x $tgt || ln -s $cdx `basename $tgt`
+ else
+ echo "I don't really know what I shall do with $cdx " >&2
+ fi
+done
+
+find -L . -type f -name *.tdf | while read file; do
+sed '1,3d' $file
+done > all.tmp
+
+perl -e '
+ ($inFile,$idFile,$txtFile,$spk,$mapf)= split /\s+/, $ARGV[0];
+ open(IN, "$inFile");
+ open(ID, ">$idFile");
+ open(TXT, ">$txtFile");
+ open(SPK, ">$spk");
+ open(MAP, ">$mapf");
+ while () {
+ @arr= split /\t/,$_;
+ $arr[4] =~ s/ //g;
+ $arr[4] = sprintf("%020s", $arr[4]);
+ $spkid = "$arr[0]_$arr[4]";
+ $spkfix = sprintf("%080s", $spkid);
+
+ $start=sprintf ("%0.3f",$arr[2]);
+ $rStart=$start;
+ $start=~s/\.//;
+ $start=~s/^0+$/0/;
+ $start=~s/^0+([^0])/$1/; # remove zeros at the beginning
+ $start = sprintf("%09s", $start);
+
+ $end=sprintf ("%0.3f",$arr[3]);
+ $rEnd=$end;
+ $end=~s/^0+([^0])/$1/;
+ $end=~s/\.//;
+ $end = sprintf("%09s", $end);
+
+ $id="$arr[11] $arr[0] ${spkfix}_$arr[0]_${start}_${end} $rStart $rEnd\n";
+ next if ($rStart == $rEnd);
+ $id =~ s/.sph//g;
+ print ID $id;
+ print TXT "$arr[7]\n";
+ print SPK "${spkfix}_$arr[0]_${start}_${end} ${spkfix}\n";
+ print MAP "$arr[0] ${spkfix}_$arr[0]\n";
+ }' "all.tmp allid.tmp contentall.tmp utt2spk.tmp map.tmp"
+
+perl -p -i -e 's=/.$==g' contentall.tmp
+
+cd $top_pwd
+
+
+pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'`
+export PYTHONPATH=$PYTHONPATH:`pwd`/tools/mmseg-1.3.0/lib/python${pyver}/site-packages
+if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+ echo "--- Downloading mmseg-1.3.0 ..."
+ echo "NOTE: it assumes that you have Python, Setuptools installed on your system!"
+ wget -P tools http://pypi.python.org/packages/source/m/mmseg/mmseg-1.3.0.tar.gz
+ tar xf tools/mmseg-1.3.0.tar.gz -C tools
+ cd tools/mmseg-1.3.0
+ mkdir -p lib/python${pyver}/site-packages
+ CC=gcc CXX=g++ python setup.py build
+ python setup.py install --prefix=.
+ cd ../..
+ if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+ echo "mmseg is not found - installation failed?"
+ exit 1
+ fi
+fi
+
+cat $txtdir/contentall.tmp |\
+ sed -e 's/,//g' |\
+ sed -e 's// /g' |\
+ sed -e 's// /g' |\
+ sed -e 's/<\/foreign>/ /g' |\
+ perl -pe 's// /g' |\
+ sed -e 's/\[NS\]//g' |\
+ sed -e 's/\[ns\]//g' |\
+ sed -e 's/\(.\+\)<\/noise>/\1/g' |\
+ sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\
+ perl local/mandarin_text_normalize.pl | \
+ python local/mandarin_segment.py > $txtdir/text || exit 1;
+
+paste $txtdir/allid.tmp $txtdir/text | sed 's: $::' | awk '{if (NF>5) {print
+$0}}' > $txtdir/all_1.tmp
+
+awk '{print $3}' $txtdir/all_1.tmp > $txtdir/uttid
+cut -d " " -f6- $txtdir/all_1.tmp > $txtdir/text
+
+awk '{$1="";print $0}' $txtdir/all_1.tmp | sed 's:^ ::' > $txtdir/../all
+
+cat $txtdir/utt2spk.tmp | awk 'NR==FNR{a[$1];next} $1 in a{print $0}' $txtdir/uttid - |\
+ sort -u > $txtdir/../utt2spk
+cat $txtdir/map.tmp | awk 'NR==FNR{a[$1];next} $2 in a{print $0}' $txtdir/uttid -|\
+ sort -u > $txtdir/../map
+
+sort -c $txtdir/../utt2spk
+
+utils/utt2spk_to_spk2utt.pl $txtdir/../utt2spk | sort -u > $txtdir/../spk2utt
+
+echo "Gale data prep text succeeded !"
diff --git a/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S04 b/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S04
new file mode 100644
index 00000000000..92f69180735
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S04
@@ -0,0 +1,7 @@
+CCTV4_ACROSSSTRAIT_CMN_20070108_073033
+PHOENIX_NEWSLINE_CMN_20070101_114800
+CCTV4_TDYFOCUS_CMN_20070111_082801
+CCTV2_ECONOMYANDLAW_CMN_20070126_203005
+PHOENIX_BEHINDHL_CMN_20061004_052800
+PHOENIX_NEWSHACK_CMN_20060923_212301
+PHOENIX_NEWSLINE_CMN_20070102_114800
diff --git a/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S08 b/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S08
new file mode 100644
index 00000000000..75868edcf85
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_dev/test.LDC2013S08
@@ -0,0 +1,7 @@
+CCTV4_DAILYNEWS_CMN_20061023_135801
+CCTV4_DAILYNEWS_CMN_20060923_135800
+PHOENIX_PHNXWRLD_CMN_20070101_111800
+CCTV4_NEWS3_CMN_20060921_085800
+CCTV7_MILITARYNEWS1_CMN_20070102_193006
+PHOENIX_PHNXWRLD_CMN_20061024_112500
+CCTV7_MILITARYNEWS1_CMN_20070113_193011
diff --git a/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03 b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03
new file mode 100644
index 00000000000..f7506dcb290
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03
@@ -0,0 +1,109 @@
+BEIJING_TWOWAYLANES_CMN_20080322_130507
+BEIJING_TWOWAYLANES_CMN_20080412_130002
+CCTV1_LEGALREPORT_CMN_20080311_123601
+CCTV1_LEGALREPORT_CMN_20080312_123601
+CCTV1_LEGALREPORT_CMN_20080319_123601
+CCTV1_LEGALREPORT_CMN_20080320_123601
+CCTV1_LEGALREPORT_CMN_20080321_123602
+CCTV1_LEGALREPORT_CMN_20080323_123601
+CCTV1_LEGALREPORT_CMN_20080324_123601
+CCTV1_LEGALREPORT_CMN_20080325_123601
+CCTV1_LEGALREPORT_CMN_20080326_123601
+CCTV1_LEGALREPORT_CMN_20080327_123801
+CCTV1_LEGALREPORT_CMN_20080328_123802
+CCTV1_LEGALREPORT_CMN_20080329_123802
+CCTV1_LEGALREPORT_CMN_20080330_123801
+CCTV1_LEGALREPORT_CMN_20080407_123801
+CCTV1_LEGALREPORT_CMN_20080408_123801
+CCTV1_LEGALREPORT_CMN_20080410_123801
+CCTV1_LEGALREPORT_CMN_20080422_123801
+CCTV2_ACROSSSTRAIT_CMN_20080312_073000
+CCTV2_BUSINESSHOUR_CMN_20080326_220802
+CCTV2_DIALOG_CMN_20080309_222803
+CCTV2_DIALOG_CMN_20080316_214834
+CCTV2_DIALOG_CMN_20080323_220801
+CCTV2_DIALOG_CMN_20080330_220803
+CCTV2_DIALOG_CMN_20080413_220801
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802
+CCTV2_ECONOMYANDLAW_CMN_20080322_202802
+CCTV2_ECONOMYANDLAW_CMN_20080324_202802
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802
+CCTV2_ECONOMYANDLAW_CMN_20080326_203035
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821
+CCTV2_ECONOMYANDLAW_CMN_20080328_202802
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815
+CCTV2_ECONOMYANDLAW_CMN_20080401_202802
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815
+CCTV2_ECONOMYANDLAW_CMN_20080422_202802
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000
+CCTV4_ACROSSSTRAIT_CMN_20080322_073002
+CCTV4_ACROSSSTRAIT_CMN_20080323_073002
+CCTV4_ACROSSSTRAIT_CMN_20080324_073002
+CCTV4_ACROSSSTRAIT_CMN_20080325_073002
+CCTV4_ACROSSSTRAIT_CMN_20080326_073002
+CCTV4_ACROSSSTRAIT_CMN_20080327_073002
+CCTV4_ACROSSSTRAIT_CMN_20080328_073002
+CCTV4_ACROSSSTRAIT_CMN_20080329_073002
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002
+CCTV4_ACROSSSTRAIT_CMN_20080331_073002
+CCTV4_ACROSSSTRAIT_CMN_20080401_073002
+CCTV4_ACROSSSTRAIT_CMN_20080402_073002
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002
+CCTV4_ACROSSSTRAIT_CMN_20080425_073002
+CCTV4_ACROSSSTRAIT_CMN_20080426_073002
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201
+CCTVNEWS_PEOPLEINNEWS_CMN_20080324_202401
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701
+CCTVNEWS_PEOPLEINNEWS_CMN_20080422_202701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701
+HUBEI_COMMUNICATE_CMN_20080330_230009
+VOA_FOCUSDIALOGUE_CMN_20080405_210500
+VOA_FOCUSDIALOGUE_CMN_20080406_160500
+VOA_FOCUSDIALOGUE_CMN_20080414_160500
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500
+VOA_LISTENERSHOTLINE_CMN_20080403_223000
+VOA_LISTENERSHOTLINE_CMN_20080404_223000
+VOA_LISTENERSHOTLINE_CMN_20080412_223000
+VOA_LISTENERSHOTLINE_CMN_20080418_223000
+VOA_LISTENERSHOTLINE_CMN_20080423_223000
+VOA_STRAITSTALK_CMN_20080407_210500
+VOA_STRAITSTALK_CMN_20080414_210500
diff --git a/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03.segment b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03.segment
new file mode 100644
index 00000000000..3ef21919d23
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2016S03.segment
@@ -0,0 +1,1504 @@
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 175.122 186.816
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 268.681 274.885
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 268.681 274.885
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 316.040 323.775
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 333.528 336.982
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 340.838 346.705
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 387.065 393.495
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 424.507 427.475
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 509.798 522.713
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 834.360 842.314
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 2419.176 2425.717
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 2790.157 2802.478
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 2887.429 2895.022
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 2895.022 2896.194
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 2923.216 2926.875
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 3017.943 3020.333
+CCTV2_BUSINESSHOUR_CMN_20080326_220802 3095.726 3098.319
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 501.872 516.874
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 550.503 582.685
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 709.541 718.738
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 823.793 833.518
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 976.397 1003.134
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1048.942 1063.660
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1082.157 1092.840
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1092.840 1113.463
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1156.777 1168.181
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1221.170 1239.972
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1399.487 1439.237
+VOA_LISTENERSHOTLINE_CMN_20080403_223000 1657.955 1677.021
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000 356.875 366.830
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000 436.514 447.247
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000 741.081 756.331
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000 1015.560 1024.623
+CCTV4_ACROSSSTRAIT_CMN_20080321_073000 1024.623 1027.060
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815 184.594 188.331
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815 243.090 252.093
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815 452.760 464.257
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815 521.680 534.700
+CCTV2_ECONOMYANDLAW_CMN_20080415_202815 870.502 881.245
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 436.477 450.154
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 521.177 529.219
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 592.976 609.487
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 609.487 618.415
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 624.097 632.426
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 1294.040 1298.243
+CCTV4_ACROSSSTRAIT_CMN_20080423_073002 1446.220 1450.297
+CCTV1_LEGALREPORT_CMN_20080329_123802 209.918 221.517
+CCTV1_LEGALREPORT_CMN_20080329_123802 331.055 344.508
+CCTV1_LEGALREPORT_CMN_20080329_123802 363.914 371.628
+CCTV1_LEGALREPORT_CMN_20080329_123802 385.517 398.432
+CCTV1_LEGALREPORT_CMN_20080329_123802 605.849 611.269
+CCTV1_LEGALREPORT_CMN_20080329_123802 667.416 692.940
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 322.505 335.743
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 371.421 386.885
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 386.885 396.340
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 460.052 478.842
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 500.649 504.018
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 726.393 737.829
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 797.180 806.942
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 806.942 819.195
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 835.576 849.122
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 853.810 862.075
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 920.349 932.207
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 932.207 949.097
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 932.207 949.097
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1224.115 1232.158
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1318.505 1336.569
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1336.569 1349.707
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1349.707 1358.084
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1414.359 1418.694
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1443.803 1452.569
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1452.569 1470.357
+VOA_FOCUSDIALOGUE_CMN_20080406_160500 1507.575 1518.723
+CCTV2_ECONOMYANDLAW_CMN_20080328_202802 230.150 244.815
+CCTV2_ECONOMYANDLAW_CMN_20080328_202802 968.564 976.612
+CCTV2_ECONOMYANDLAW_CMN_20080328_202802 1027.831 1040.143
+CCTV2_ECONOMYANDLAW_CMN_20080328_202802 1373.596 1385.986
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 1213.176 1244.700
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 1350.627 1370.834
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 1479.523 1491.229
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 1818.545 1837.272
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2073.163 2088.414
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2225.131 2256.343
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2431.393 2452.251
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2569.068 2590.218
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2617.970 2633.241
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2691.662 2716.147
+CCTVNEWS_TELLITLIKEITIS_CMN_20080324_110701 2810.594 2828.706
+CCTV1_LEGALREPORT_CMN_20080326_123601 370.283 378.662
+CCTV1_LEGALREPORT_CMN_20080326_123601 378.662 385.080
+CCTV1_LEGALREPORT_CMN_20080326_123601 525.547 536.532
+CCTV1_LEGALREPORT_CMN_20080326_123601 683.887 691.825
+CCTV1_LEGALREPORT_CMN_20080326_123601 703.684 712.489
+CCTV1_LEGALREPORT_CMN_20080326_123601 712.489 714.363
+CCTV1_LEGALREPORT_CMN_20080326_123601 714.363 719.825
+CCTV1_LEGALREPORT_CMN_20080326_123601 1094.555 1102.118
+CCTV1_LEGALREPORT_CMN_20080325_123601 348.364 354.020
+CCTV1_LEGALREPORT_CMN_20080325_123601 381.976 386.163
+CCTV1_LEGALREPORT_CMN_20080325_123601 386.163 403.078
+CCTV1_LEGALREPORT_CMN_20080325_123601 519.053 529.472
+CCTV1_LEGALREPORT_CMN_20080325_123601 706.516 711.306
+CCTV1_LEGALREPORT_CMN_20080325_123601 730.473 743.987
+CCTV1_LEGALREPORT_CMN_20080325_123601 813.758 824.057
+CCTV1_LEGALREPORT_CMN_20080325_123601 896.534 907.227
+CCTV1_LEGALREPORT_CMN_20080325_123601 961.640 974.947
+CCTV1_LEGALREPORT_CMN_20080325_123601 1224.469 1237.017
+CCTV1_LEGALREPORT_CMN_20080330_123801 177.772 186.515
+CCTV1_LEGALREPORT_CMN_20080330_123801 388.078 394.066
+CCTV1_LEGALREPORT_CMN_20080330_123801 676.982 691.620
+CCTV1_LEGALREPORT_CMN_20080330_123801 676.982 691.620
+CCTV1_LEGALREPORT_CMN_20080330_123801 691.620 701.021
+CCTV1_LEGALREPORT_CMN_20080330_123801 709.371 729.012
+CCTV1_LEGALREPORT_CMN_20080330_123801 786.881 807.597
+CCTV1_LEGALREPORT_CMN_20080330_123801 858.310 871.889
+CCTV1_LEGALREPORT_CMN_20080330_123801 958.857 965.929
+CCTV1_LEGALREPORT_CMN_20080330_123801 965.929 974.725
+BEIJING_TWOWAYLANES_CMN_20080322_130507 189.791 200.072
+BEIJING_TWOWAYLANES_CMN_20080322_130507 515.508 517.368
+BEIJING_TWOWAYLANES_CMN_20080322_130507 848.607 853.279
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1284.486 1295.045
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1502.737 1506.305
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1545.449 1553.996
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1909.202 1913.634
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1932.385 1933.977
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1950.594 1955.406
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1977.556 1988.722
+BEIJING_TWOWAYLANES_CMN_20080322_130507 1993.191 2004.354
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2004.354 2008.436
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2028.425 2045.672
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2045.672 2050.609
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2061.847 2069.863
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2069.863 2076.926
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2076.926 2084.004
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2084.004 2091.457
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2118.061 2127.717
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2310.792 2315.495
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2337.853 2340.386
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2418.575 2424.775
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2433.823 2451.474
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2509.194 2515.251
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2596.556 2600.540
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2818.236 2827.875
+BEIJING_TWOWAYLANES_CMN_20080322_130507 2830.328 2833.359
+CCTV4_ACROSSSTRAIT_CMN_20080325_073002 1303.243 1313.932
+CCTV4_ACROSSSTRAIT_CMN_20080325_073002 1313.932 1327.431
+CCTV4_ACROSSSTRAIT_CMN_20080325_073002 1370.507 1384.105
+CCTV4_ACROSSSTRAIT_CMN_20080327_073002 476.312 483.187
+CCTV4_ACROSSSTRAIT_CMN_20080327_073002 1151.028 1157.778
+CCTV4_ACROSSSTRAIT_CMN_20080327_073002 1563.053 1575.412
+CCTV4_ACROSSSTRAIT_CMN_20080327_073002 1613.240 1615.584
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 390.532 406.190
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 1452.594 1465.941
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 1736.290 1745.379
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 1745.379 1753.239
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 1914.223 1939.397
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2022.052 2043.373
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2043.373 2059.456
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2290.993 2312.234
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2326.150 2342.674
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2393.123 2409.854
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2409.854 2433.531
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2433.531 2446.850
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2446.850 2464.390
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2464.390 2483.911
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2464.390 2483.911
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2483.911 2492.621
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2492.621 2521.981
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2599.502 2617.167
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2736.103 2763.048
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2875.835 2905.697
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2905.697 2922.428
+VOA_ISSUESANDOPINIONS_CMN_20080401_210500 2922.428 2941.697
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1268.022 1292.130
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1521.012 1531.695
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1734.963 1743.161
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1791.739 1808.611
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1926.230 1950.898
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1977.120 1986.478
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 1986.478 2005.264
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 2600.275 2619.773
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 2663.954 2685.162
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 2685.162 2697.102
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 2819.708 2836.259
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 2849.786 2859.241
+VOA_ISSUESANDOPINIONS_CMN_20080402_210500 3287.512 3301.847
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002 451.597 464.083
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002 884.562 896.566
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002 954.839 965.200
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002 1118.568 1133.694
+CCTV4_ACROSSSTRAIT_CMN_20080411_073002 1562.402 1578.469
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 242.313 251.167
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 374.326 383.123
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 849.574 865.909
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 985.771 1002.684
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 1510.644 1520.045
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 2023.706 2044.991
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 2438.502 2447.484
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 2532.259 2562.747
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 2858.064 2868.390
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 3622.217 3658.111
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 3905.538 3913.397
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 4583.963 4593.681
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 4716.259 4730.538
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 4773.794 4792.400
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 4792.400 4802.263
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 4848.270 4866.075
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5012.069 5021.111
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5402.471 5420.594
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5608.358 5622.041
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5622.041 5634.466
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5684.122 5690.419
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5845.904 5854.806
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 5923.051 5938.561
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6253.688 6269.497
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6540.433 6562.602
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6633.971 6647.208
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6735.663 6749.122
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6828.984 6850.257
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6850.257 6864.801
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6864.801 6881.641
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6935.763 6958.069
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 6958.069 6979.206
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7121.465 7136.116
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7316.780 7336.376
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7336.376 7351.460
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7362.747 7375.791
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7454.403 7470.365
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7470.365 7482.534
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7482.534 7492.815
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7545.048 7552.965
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7817.409 7828.917
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_083701 7817.409 7828.917
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 813.841 823.998
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1110.610 1130.382
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1342.236 1347.360
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1506.751 1512.111
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1600.563 1605.783
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1631.606 1633.496
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1636.120 1638.370
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1722.302 1724.240
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 1762.947 1772.695
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 2180.772 2181.803
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 3018.694 3024.146
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 3049.587 3056.446
+VOA_ISSUESANDOPINIONS_CMN_20080410_210500 3342.701 3347.139
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 112.562 118.937
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 396.743 407.196
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 455.434 456.887
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 690.132 694.366
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 716.672 745.827
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080310_122702 1791.892 1802.500
+CCTV4_ACROSSSTRAIT_CMN_20080331_073002 261.849 269.722
+CCTV4_ACROSSSTRAIT_CMN_20080331_073002 269.722 275.255
+CCTV4_ACROSSSTRAIT_CMN_20080331_073002 388.273 403.194
+CCTV4_ACROSSSTRAIT_CMN_20080331_073002 1328.176 1333.800
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 386.764 390.763
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 1219.366 1224.765
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 1700.658 1706.846
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 1706.846 1710.734
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 1995.640 2006.396
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2013.664 2021.148
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2048.784 2051.480
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2055.567 2064.911
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2127.521 2130.131
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2155.458 2157.430
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2167.002 2169.581
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2177.838 2180.921
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2244.506 2248.865
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2258.896 2261.897
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2320.942 2324.067
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2524.349 2533.138
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2570.895 2580.961
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2670.623 2676.713
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2715.575 2723.482
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2723.482 2735.437
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2744.936 2763.378
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2770.861 2790.642
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2790.642 2801.001
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 2958.823 2973.141
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 3145.175 3149.330
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 4320.758 4328.274
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 4331.322 4338.416
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 4425.932 4434.973
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 4434.973 4443.240
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 4555.869 4564.317
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 5923.558 5930.710
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 5935.203 5944.050
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 5978.989 5990.241
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 6005.959 6016.216
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 6386.011 6394.027
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 6906.547 6930.592
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 7682.765 7695.852
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 7746.172 7765.267
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 8117.121 8135.107
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 8139.571 8143.124
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 8870.212 8913.647
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 9171.797 9181.172
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 9587.497 9588.599
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 9947.700 9966.294
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 10137.827 10144.843
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 10224.900 10234.801
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 10506.088 10524.616
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11102.139 11117.127
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11166.248 11185.279
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11478.559 11484.372
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11674.677 11677.802
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11791.333 11811.611
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 11913.901 11927.976
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 12043.723 12053.566
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080306_122702 12053.566 12072.666
+CCTVNEWS_PEOPLEINNEWS_CMN_20080422_202701 1545.087 1576.993
+CCTV4_ACROSSSTRAIT_CMN_20080402_073002 195.748 214.724
+CCTV4_ACROSSSTRAIT_CMN_20080402_073002 1316.568 1322.112
+CCTV4_ACROSSSTRAIT_CMN_20080402_073002 1434.190 1437.221
+CCTV4_ACROSSSTRAIT_CMN_20080326_073002 289.797 295.688
+CCTV4_ACROSSSTRAIT_CMN_20080326_073002 333.937 340.312
+CCTV4_ACROSSSTRAIT_CMN_20080326_073002 729.948 739.229
+CCTV4_ACROSSSTRAIT_CMN_20080326_073002 818.680 828.212
+CCTV2_ECONOMYANDLAW_CMN_20080326_203035 678.014 696.608
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000 193.294 214.275
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000 1171.570 1192.304
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000 1217.656 1225.168
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000 1284.391 1300.479
+CCTV4_ACROSSSTRAIT_CMN_20080320_073000 1421.296 1434.660
+VOA_LISTENERSHOTLINE_CMN_20080412_223000 1373.501 1382.689
+VOA_LISTENERSHOTLINE_CMN_20080412_223000 1422.296 1436.219
+VOA_LISTENERSHOTLINE_CMN_20080412_223000 1700.244 1707.820
+CCTV1_LEGALREPORT_CMN_20080320_123601 431.623 444.016
+CCTV1_LEGALREPORT_CMN_20080320_123601 453.717 460.124
+CCTV1_LEGALREPORT_CMN_20080320_123601 558.567 573.807
+CCTV1_LEGALREPORT_CMN_20080320_123601 616.918 619.132
+CCTV1_LEGALREPORT_CMN_20080320_123601 619.132 629.042
+CCTV1_LEGALREPORT_CMN_20080320_123601 756.415 765.851
+CCTV1_LEGALREPORT_CMN_20080320_123601 1159.064 1170.441
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 161.218 177.642
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 217.235 228.614
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 234.552 243.484
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 281.512 297.070
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 445.770 455.248
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 463.595 495.687
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 524.817 531.863
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 806.934 819.015
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 945.741 959.398
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 1094.420 1102.763
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 1182.793 1201.808
+CCTV2_ECONOMYANDLAW_CMN_20080423_202802 1344.563 1358.234
+CCTV4_ACROSSSTRAIT_CMN_20080329_073002 109.647 123.693
+CCTV4_ACROSSSTRAIT_CMN_20080329_073002 305.320 315.380
+CCTV4_ACROSSSTRAIT_CMN_20080329_073002 505.101 518.634
+CCTV1_LEGALREPORT_CMN_20080328_123802 237.445 258.287
+CCTV1_LEGALREPORT_CMN_20080328_123802 289.455 300.247
+CCTV1_LEGALREPORT_CMN_20080328_123802 549.949 570.632
+CCTV1_LEGALREPORT_CMN_20080328_123802 638.909 652.586
+CCTV1_LEGALREPORT_CMN_20080328_123802 899.675 905.777
+CCTV1_LEGALREPORT_CMN_20080328_123802 1016.378 1036.368
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802 260.556 269.589
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802 269.589 280.849
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802 1034.323 1042.029
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802 1127.101 1140.004
+CCTV2_ECONOMYANDLAW_CMN_20080428_202802 1156.362 1176.296
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 356.523 367.797
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 632.675 640.199
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 652.924 664.640
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 998.153 1006.076
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 1367.896 1388.299
+CCTV4_ACROSSSTRAIT_CMN_20080403_073002 1606.557 1613.244
+CCTV2_DIALOG_CMN_20080323_220801 1321.827 1341.319
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701 391.159 403.915
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701 777.048 797.124
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701 833.582 855.372
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701 1624.624 1649.703
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080318_212701 1649.703 1668.778
+VOA_STRAITSTALK_CMN_20080414_210500 207.242 215.715
+VOA_STRAITSTALK_CMN_20080414_210500 262.527 266.868
+VOA_STRAITSTALK_CMN_20080414_210500 494.710 503.125
+VOA_STRAITSTALK_CMN_20080414_210500 511.748 523.276
+VOA_STRAITSTALK_CMN_20080414_210500 525.401 533.066
+VOA_STRAITSTALK_CMN_20080414_210500 606.323 613.539
+VOA_STRAITSTALK_CMN_20080414_210500 613.539 625.240
+VOA_STRAITSTALK_CMN_20080414_210500 1078.325 1096.189
+VOA_STRAITSTALK_CMN_20080414_210500 1353.950 1366.936
+VOA_STRAITSTALK_CMN_20080414_210500 1366.936 1380.502
+VOA_STRAITSTALK_CMN_20080414_210500 1380.502 1392.322
+VOA_STRAITSTALK_CMN_20080414_210500 1392.322 1400.851
+VOA_STRAITSTALK_CMN_20080414_210500 1400.851 1418.149
+VOA_STRAITSTALK_CMN_20080414_210500 1418.149 1426.755
+VOA_STRAITSTALK_CMN_20080414_210500 1440.688 1442.814
+VOA_STRAITSTALK_CMN_20080414_210500 1442.814 1463.053
+VOA_STRAITSTALK_CMN_20080414_210500 1475.375 1496.954
+VOA_STRAITSTALK_CMN_20080414_210500 1541.927 1566.969
+VOA_STRAITSTALK_CMN_20080414_210500 1839.847 1841.566
+VOA_STRAITSTALK_CMN_20080414_210500 1841.566 1868.241
+VOA_STRAITSTALK_CMN_20080414_210500 1945.631 1957.812
+VOA_STRAITSTALK_CMN_20080414_210500 2125.735 2146.839
+VOA_STRAITSTALK_CMN_20080414_210500 2732.199 2733.777
+VOA_STRAITSTALK_CMN_20080414_210500 2770.946 2782.647
+VOA_STRAITSTALK_CMN_20080414_210500 2863.110 2885.080
+VOA_STRAITSTALK_CMN_20080414_210500 2903.230 2923.793
+VOA_STRAITSTALK_CMN_20080414_210500 3234.231 3256.558
+VOA_STRAITSTALK_CMN_20080414_210500 3309.376 3315.509
+VOA_STRAITSTALK_CMN_20080414_210500 3315.509 3324.023
+VOA_STRAITSTALK_CMN_20080414_210500 3324.023 3329.514
+VOA_STRAITSTALK_CMN_20080414_210500 3337.754 3351.358
+CCTV2_ECONOMYANDLAW_CMN_20080324_202802 993.159 1004.075
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 100.038 110.008
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 110.008 121.148
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 302.278 311.481
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 317.325 326.842
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 326.842 344.304
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 532.746 540.916
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 804.269 815.848
+CCTV2_ECONOMYANDLAW_CMN_20080321_202802 1174.986 1187.501
+CCTV4_ACROSSSTRAIT_CMN_20080328_073002 606.003 612.222
+CCTV4_ACROSSSTRAIT_CMN_20080328_073002 1317.238 1323.004
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 434.957 442.007
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 908.187 931.355
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 1003.670 1027.287
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 1761.105 1763.366
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 1794.368 1819.904
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 2096.000 2111.144
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 2281.960 2299.738
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 2675.919 2678.237
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 2990.851 3018.085
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 3075.112 3086.675
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 3586.194 3604.157
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 3636.939 3650.526
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 3675.993 3698.023
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 3832.037 3849.222
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4050.684 4086.249
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4121.887 4143.019
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4167.714 4190.852
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4409.823 4413.900
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4454.639 4475.409
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4501.449 4518.812
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4561.903 4601.809
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4670.951 4678.490
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 4724.543 4742.142
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 5316.415 5337.715
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 5564.584 5568.411
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 5594.680 5628.696
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6168.428 6193.103
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6219.478 6251.299
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6330.311 6362.012
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6362.012 6379.281
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6420.252 6439.408
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6502.285 6508.326
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6567.228 6589.508
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6589.508 6613.843
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6769.366 6798.919
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 6954.252 6956.138
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 7261.811 7277.763
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 7513.530 7529.476
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 7946.686 7963.716
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 7978.291 8008.271
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 8060.732 8063.906
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080311_122701 8363.042 8387.243
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 580.303 581.475
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 639.793 644.230
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 900.720 908.033
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1089.250 1090.500
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1320.160 1334.416
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1455.962 1460.055
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1528.445 1540.915
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1696.343 1704.232
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1880.324 1893.434
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 1893.434 1895.027
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 2014.865 2028.896
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 2266.555 2284.280
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 2296.756 2312.357
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 2323.628 2329.175
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080308_122701 2364.044 2365.763
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 934.479 951.520
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1374.975 1398.172
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1436.432 1459.595
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1489.211 1503.057
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1503.057 1511.084
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1511.084 1528.239
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1528.239 1544.999
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1584.729 1606.307
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1730.993 1745.976
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 1745.976 1760.489
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 2677.717 2699.815
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 2779.102 2792.660
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 2800.172 2821.770
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 2840.379 2854.828
+VOA_ISSUESANDOPINIONS_CMN_20080403_210500 3262.386 3277.658
+CCTV2_DIALOG_CMN_20080330_220803 329.331 337.048
+CCTV2_DIALOG_CMN_20080330_220803 1340.264 1354.585
+CCTV2_DIALOG_CMN_20080330_220803 2037.464 2060.272
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 39.211 55.912
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 154.736 163.311
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 229.580 233.019
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 305.849 316.342
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 334.495 341.214
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 489.437 493.193
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 493.193 498.482
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 546.570 560.759
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 690.803 714.930
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 885.507 914.481
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 914.481 921.876
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 957.556 976.807
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1023.889 1031.400
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1046.247 1055.460
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1462.323 1466.401
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1514.674 1519.347
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1519.347 1534.440
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1569.442 1575.180
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1575.180 1586.358
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1592.419 1601.278
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1603.309 1617.558
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1652.855 1657.838
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1706.099 1715.443
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1832.383 1849.260
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1943.812 1961.752
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 1987.723 1997.474
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2192.136 2201.472
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2201.472 2214.638
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2265.606 2282.279
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2341.502 2351.265
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2351.265 2372.341
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2913.550 2930.035
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2913.550 2930.035
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2940.160 2945.847
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 2993.306 3016.286
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 3078.481 3085.172
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_105601 3240.077 3248.175
+CCTVNEWS_PEOPLEINNEWS_CMN_20080324_202401 449.873 475.376
+CCTVNEWS_PEOPLEINNEWS_CMN_20080324_202401 475.376 482.915
+CCTVNEWS_PEOPLEINNEWS_CMN_20080324_202401 1795.081 1815.246
+CCTV1_LEGALREPORT_CMN_20080327_123801 212.113 225.544
+CCTV1_LEGALREPORT_CMN_20080327_123801 391.267 397.627
+CCTV1_LEGALREPORT_CMN_20080327_123801 528.989 532.005
+CCTV1_LEGALREPORT_CMN_20080327_123801 633.707 643.174
+CCTV1_LEGALREPORT_CMN_20080327_123801 679.053 691.652
+CCTV1_LEGALREPORT_CMN_20080327_123801 859.370 866.885
+CCTV1_LEGALREPORT_CMN_20080327_123801 866.885 869.120
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 510.751 520.993
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 610.606 618.001
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 656.414 658.449
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 963.038 966.990
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 971.865 980.608
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1123.676 1142.150
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1298.431 1313.459
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1471.075 1483.669
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1588.058 1612.689
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1681.798 1689.103
+CCTVNEWS_PEOPLEINNEWS_CMN_20080325_202401 1948.055 1957.682
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 871.846 881.393
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1152.716 1162.886
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1211.639 1212.483
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1295.780 1309.767
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1319.256 1326.348
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1326.348 1328.036
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1381.529 1396.124
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1430.419 1444.154
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1461.602 1474.280
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1504.783 1516.135
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1581.733 1593.486
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1597.385 1601.932
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1607.870 1611.136
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1659.150 1668.416
+VOA_FOCUSDIALOGUE_CMN_20080405_210500 1731.216 1743.893
+CCTV4_ACROSSSTRAIT_CMN_20080324_073002 324.632 333.882
+CCTV4_ACROSSSTRAIT_CMN_20080324_073002 1295.605 1303.229
+CCTV4_ACROSSSTRAIT_CMN_20080324_073002 1493.629 1499.050
+CCTV4_ACROSSSTRAIT_CMN_20080323_073002 339.387 352.600
+CCTV4_ACROSSSTRAIT_CMN_20080323_073002 675.952 683.920
+CCTV4_ACROSSSTRAIT_CMN_20080323_073002 1354.442 1365.441
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 358.704 388.240
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 689.092 709.688
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 883.248 891.546
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 928.874 939.373
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 980.662 995.710
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1154.951 1168.191
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1250.547 1255.864
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1275.017 1296.901
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1446.622 1462.387
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1465.590 1479.606
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1562.994 1578.157
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1578.157 1593.026
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1665.124 1685.334
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1775.037 1791.295
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1869.655 1888.414
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1930.774 1941.211
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1966.507 1988.760
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 1988.760 1998.510
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2062.122 2075.225
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2364.435 2401.100
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2650.012 2659.744
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2695.642 2709.517
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2845.166 2859.008
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 2845.166 2859.008
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3000.085 3017.908
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3129.338 3139.494
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3150.374 3157.250
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3207.737 3222.463
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3207.737 3222.463
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3283.250 3286.850
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3785.698 3805.119
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3805.119 3807.572
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3826.777 3842.763
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3855.673 3868.749
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3868.749 3881.129
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3911.574 3929.486
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3911.574 3929.486
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 3929.486 3937.799
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4000.645 4017.610
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4000.645 4017.610
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4115.458 4121.444
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4126.843 4129.546
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4148.038 4163.709
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4317.698 4332.466
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4338.082 4356.603
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4363.355 4375.331
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4718.604 4727.144
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4756.968 4766.803
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 4828.812 4841.125
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5086.733 5092.952
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5204.183 5241.880
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5271.774 5276.930
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5331.146 5345.381
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5385.226 5396.328
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5513.497 5521.697
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5634.856 5653.647
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5720.917 5730.558
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5736.001 5745.114
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5825.338 5829.379
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5901.056 5906.089
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 5989.401 5996.148
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6050.515 6059.616
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6082.801 6090.678
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6217.539 6233.863
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6233.863 6238.812
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6238.812 6244.592
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6244.592 6254.992
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6254.992 6261.593
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6261.593 6267.408
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6267.408 6274.281
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6274.281 6283.473
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6283.473 6290.991
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6290.991 6302.665
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6302.665 6307.238
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6338.240 6350.432
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 6473.181 6478.790
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7041.786 7048.548
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7129.548 7147.933
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7343.837 7351.456
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7623.514 7639.319
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7663.208 7665.166
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7705.454 7712.604
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7733.204 7739.282
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7739.282 7749.278
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7749.278 7758.568
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7758.568 7764.571
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7767.249 7785.326
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 7810.096 7822.536
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 8180.257 8204.346
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 8222.251 8240.932
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080307_122702 8308.543 8314.292
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 461.577 463.280
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 496.855 501.899
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 734.845 740.452
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 740.452 752.619
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 1541.085 1545.725
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 1561.275 1569.775
+CCTVNEWS_PEOPLEINNEWS_CMN_20080331_202701 1652.401 1658.635
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 148.919 162.499
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 179.492 185.534
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 185.534 192.136
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 192.136 203.015
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 226.707 235.340
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 235.340 241.668
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 268.434 273.787
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 325.047 335.829
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 462.554 475.640
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 485.007 492.730
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 542.532 568.781
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 576.362 589.995
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 840.064 851.230
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 851.230 857.978
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 857.978 864.943
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 870.879 881.875
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 965.317 977.338
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 1004.188 1015.549
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 1331.932 1341.729
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 1389.462 1395.305
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 1414.874 1420.906
+CCTV2_ECONOMYANDLAW_CMN_20080325_202802 1450.275 1457.837
+CCTV1_LEGALREPORT_CMN_20080422_123801 422.064 425.148
+CCTV1_LEGALREPORT_CMN_20080422_123801 882.276 906.163
+CCTV1_LEGALREPORT_CMN_20080422_123801 1048.379 1069.002
+CCTV1_LEGALREPORT_CMN_20080422_123801 1078.158 1098.167
+CCTV2_DIALOG_CMN_20080309_222803 523.446 544.410
+CCTV2_DIALOG_CMN_20080309_222803 544.410 563.206
+CCTV2_DIALOG_CMN_20080309_222803 584.423 601.345
+CCTV2_DIALOG_CMN_20080309_222803 888.358 902.456
+CCTV2_DIALOG_CMN_20080309_222803 1379.715 1392.733
+CCTV2_DIALOG_CMN_20080309_222803 1443.541 1476.184
+CCTV2_DIALOG_CMN_20080309_222803 1679.190 1712.581
+CCTV2_DIALOG_CMN_20080309_222803 1992.685 2009.277
+CCTV2_DIALOG_CMN_20080309_222803 2464.131 2483.586
+CCTV2_DIALOG_CMN_20080309_222803 2524.525 2558.650
+CCTV2_DIALOG_CMN_20080309_222803 2588.577 2608.491
+CCTV2_DIALOG_CMN_20080309_222803 2836.204 2852.105
+CCTV2_DIALOG_CMN_20080309_222803 2852.105 2881.348
+CCTV2_DIALOG_CMN_20080309_222803 2938.325 2975.540
+CCTV2_DIALOG_CMN_20080309_222803 2975.540 2983.827
+CCTV2_DIALOG_CMN_20080309_222803 3051.037 3071.396
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701 657.445 661.414
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701 1041.427 1049.925
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701 1069.942 1075.973
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701 1716.267 1726.892
+CCTVNEWS_PEOPLEINNEWS_CMN_20080327_202701 1766.782 1773.907
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 212.260 243.932
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 547.222 565.187
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 641.691 645.517
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 659.667 674.024
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 1838.611 1857.279
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 1879.746 1883.782
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 1960.658 1980.371
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2363.964 2389.355
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2389.355 2412.008
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2467.097 2481.788
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2547.292 2571.964
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2623.927 2639.370
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 2659.574 2685.916
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 5414.746 5441.790
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 8200.062 8218.732
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 8287.655 8305.972
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 8498.341 8512.467
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 8713.764 8728.212
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9167.094 9183.286
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9183.286 9199.448
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9199.448 9214.358
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9433.436 9454.735
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9481.236 9496.020
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9496.020 9498.619
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9535.504 9557.075
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9603.183 9612.738
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9665.241 9680.894
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9836.623 9865.334
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 9927.451 9959.428
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10325.673 10343.062
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10325.673 10343.062
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10509.403 10524.718
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10558.347 10593.337
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10622.808 10642.622
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10622.808 10642.622
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10642.622 10674.657
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 10968.320 10998.969
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 11360.345 11393.217
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 11411.325 11440.152
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 11448.280 11474.076
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 11690.929 11705.594
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 12129.332 12147.527
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_122701 12169.093 12201.839
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 330.583 337.026
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 345.789 353.633
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 353.633 371.526
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 390.851 411.783
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 485.641 498.839
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 498.839 524.213
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 528.095 545.345
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 528.095 545.345
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 812.006 831.431
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 847.094 854.429
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1096.875 1105.438
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1096.875 1105.438
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1134.545 1147.533
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1450.162 1460.877
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1660.093 1684.357
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 1877.724 1887.478
+CCTVNEWS_PEOPLEINNEWS_CMN_20080328_202701 2248.026 2254.432
+CCTV1_LEGALREPORT_CMN_20080324_123601 365.563 372.204
+CCTV1_LEGALREPORT_CMN_20080324_123601 1054.872 1061.662
+CCTV1_LEGALREPORT_CMN_20080324_123601 1294.882 1323.696
+CCTV4_ACROSSSTRAIT_CMN_20080401_073002 267.346 292.051
+CCTV4_ACROSSSTRAIT_CMN_20080401_073002 340.648 353.161
+CCTV4_ACROSSSTRAIT_CMN_20080401_073002 964.927 973.614
+CCTV4_ACROSSSTRAIT_CMN_20080401_073002 1527.876 1543.970
+HUBEI_COMMUNICATE_CMN_20080330_230009 106.754 127.639
+HUBEI_COMMUNICATE_CMN_20080330_230009 510.440 526.802
+HUBEI_COMMUNICATE_CMN_20080330_230009 546.030 569.138
+HUBEI_COMMUNICATE_CMN_20080330_230009 598.702 622.698
+HUBEI_COMMUNICATE_CMN_20080330_230009 918.941 939.460
+HUBEI_COMMUNICATE_CMN_20080330_230009 1028.755 1046.414
+HUBEI_COMMUNICATE_CMN_20080330_230009 1046.414 1058.953
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 620.246 639.075
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 639.075 653.275
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 682.899 689.031
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 689.031 714.166
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 747.337 760.989
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 1036.875 1053.519
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 1429.272 1462.301
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 1563.238 1570.890
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 1613.387 1634.821
+VOA_LISTENERSHOTLINE_CMN_20080423_223000 1655.430 1683.376
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 176.872 189.801
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 296.455 302.169
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 391.798 412.470
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 577.881 589.846
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 1035.900 1045.744
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 1045.744 1050.229
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 1088.227 1097.618
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 1137.329 1147.414
+CCTV2_ECONOMYANDLAW_CMN_20080329_202815 1467.699 1490.511
+CCTV2_ECONOMYANDLAW_CMN_20080422_202802 150.841 163.027
+CCTV2_ECONOMYANDLAW_CMN_20080422_202802 184.162 198.770
+CCTV2_ECONOMYANDLAW_CMN_20080422_202802 438.806 449.046
+CCTV2_ECONOMYANDLAW_CMN_20080422_202802 740.471 758.222
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 1085.429 1102.523
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 1250.244 1257.399
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 1525.719 1532.060
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 1562.654 1573.454
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 1587.438 1602.251
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 2011.591 2014.060
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 2131.905 2134.608
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 2166.622 2170.341
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 2279.746 2298.318
+CCTVNEWS_NEWSPROBE_CMN_20080303_221201 2362.475 2367.569
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 119.934 127.263
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 140.561 144.645
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 189.389 192.646
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 375.679 382.461
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 440.982 452.507
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 665.216 678.739
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 852.910 865.480
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 877.416 892.030
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 892.030 903.796
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1335.685 1355.715
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1450.033 1458.253
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1588.432 1606.172
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1606.172 1609.838
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1609.838 1616.153
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 1798.932 1808.603
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 2089.485 2093.061
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 2093.061 2109.322
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 2194.211 2206.002
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 2521.753 2525.707
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 2674.648 2684.234
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3169.278 3177.745
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3169.278 3177.745
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3199.498 3205.959
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3219.352 3233.277
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3240.981 3248.680
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3269.990 3278.023
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3278.023 3291.211
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3294.320 3298.587
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3445.988 3454.317
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3574.777 3583.610
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3646.704 3662.393
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3662.393 3667.501
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3662.393 3667.501
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3667.501 3673.861
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3686.654 3696.441
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3705.340 3710.605
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3719.716 3722.979
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3719.716 3722.979
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3754.156 3757.702
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3757.702 3763.549
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3757.702 3763.549
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3789.032 3807.102
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3807.102 3817.533
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 3959.937 3964.015
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4092.714 4100.559
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4102.061 4105.295
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4144.488 4153.911
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4202.960 4213.402
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4202.960 4213.402
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4263.955 4274.142
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4284.767 4300.948
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4309.355 4319.858
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4497.661 4512.302
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4534.477 4544.984
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4582.461 4596.896
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4610.165 4615.947
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4676.278 4688.855
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4696.355 4703.995
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4703.995 4713.315
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4713.315 4731.835
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4731.835 4738.225
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4738.225 4745.872
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4745.872 4754.991
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4761.834 4772.444
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4772.444 4776.303
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4776.303 4787.538
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4796.191 4807.547
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4819.579 4831.924
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4882.167 4904.612
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4913.109 4936.477
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4985.323 4990.719
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 4990.719 4996.335
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5112.626 5128.524
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5618.208 5626.100
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5705.375 5728.384
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5784.708 5795.738
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5795.738 5810.401
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5810.401 5823.610
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5845.642 5855.019
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5917.685 5922.733
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 5970.424 5974.713
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6103.472 6121.632
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6121.632 6147.005
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6239.148 6246.958
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6431.822 6452.078
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6646.314 6650.785
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6646.314 6650.785
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6667.789 6692.398
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6701.907 6718.850
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6726.942 6736.590
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6736.590 6747.732
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6762.223 6779.183
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6796.095 6806.688
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 6981.422 6988.998
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7201.297 7233.808
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7267.161 7277.896
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7284.124 7295.476
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7309.305 7326.401
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7344.303 7358.798
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_122701 7378.571 7386.441
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 282.541 311.145
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 449.138 459.997
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 494.593 514.625
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 707.915 731.377
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 1513.257 1525.570
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 1856.251 1857.173
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 1866.059 1868.634
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 2033.643 2036.409
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 2236.470 2255.264
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 3101.208 3115.669
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 3422.377 3441.161
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 3474.049 3495.959
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 3495.959 3512.231
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 4562.052 4569.830
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 4749.950 4760.827
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 6085.863 6097.073
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 8801.231 8807.016
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 9645.243 9650.423
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 10556.680 10572.742
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 10620.072 10633.936
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_083701 10633.936 10648.341
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 271.884 278.118
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 300.854 307.198
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 458.963 464.792
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 510.711 521.134
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 597.853 606.103
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 636.506 642.631
+CCTV2_ECONOMYANDLAW_CMN_20080410_202820 1076.912 1096.786
+CCTV2_DIALOG_CMN_20080413_220801 394.231 427.503
+CCTV2_DIALOG_CMN_20080413_220801 526.611 550.643
+CCTV2_DIALOG_CMN_20080413_220801 634.145 636.385
+CCTV2_DIALOG_CMN_20080413_220801 726.432 743.871
+CCTV2_DIALOG_CMN_20080413_220801 1596.005 1612.419
+CCTV2_DIALOG_CMN_20080413_220801 1856.240 1863.515
+CCTV2_DIALOG_CMN_20080413_220801 1863.515 1873.066
+CCTV2_DIALOG_CMN_20080413_220801 1876.354 1878.474
+CCTV2_DIALOG_CMN_20080413_220801 1892.994 1901.345
+CCTV2_DIALOG_CMN_20080413_220801 1892.994 1901.345
+CCTV2_DIALOG_CMN_20080413_220801 1943.532 1949.267
+CCTV2_DIALOG_CMN_20080413_220801 1985.331 1989.398
+CCTV2_DIALOG_CMN_20080413_220801 1995.382 2014.866
+CCTV2_DIALOG_CMN_20080413_220801 2102.298 2108.204
+CCTV2_DIALOG_CMN_20080413_220801 2314.775 2332.179
+CCTV2_DIALOG_CMN_20080413_220801 2999.507 3009.181
+CCTV2_DIALOG_CMN_20080413_220801 3030.684 3044.239
+CCTV2_DIALOG_CMN_20080413_220801 3030.684 3044.239
+CCTV2_DIALOG_CMN_20080413_220801 3054.797 3061.767
+CCTV2_DIALOG_CMN_20080413_220801 3054.797 3061.767
+CCTV2_DIALOG_CMN_20080413_220801 3084.319 3100.712
+CCTV2_DIALOG_CMN_20080413_220801 3100.712 3114.330
+CCTV2_DIALOG_CMN_20080413_220801 3100.712 3114.330
+CCTV1_LEGALREPORT_CMN_20080312_123601 898.991 907.458
+CCTV1_LEGALREPORT_CMN_20080312_123601 1273.836 1282.213
+CCTV1_LEGALREPORT_CMN_20080312_123601 1348.560 1360.570
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 734.245 753.784
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1144.712 1160.126
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1184.559 1186.684
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1461.929 1484.215
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1756.860 1770.844
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1770.844 1785.969
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1785.969 1805.792
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 1830.878 1843.282
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 2028.871 2051.359
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 2446.226 2461.879
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 2566.118 2582.370
+VOA_ISSUESANDOPINIONS_CMN_20080409_210500 3156.066 3164.719
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 319.367 328.179
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 420.445 444.684
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 475.750 489.683
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 489.683 511.155
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 520.999 536.855
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 714.464 722.912
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 843.133 869.528
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 869.528 896.901
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 946.012 962.184
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1055.517 1071.673
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1107.259 1130.214
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1186.197 1210.293
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1219.489 1227.890
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1227.890 1233.531
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1320.483 1323.619
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1344.737 1361.056
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1467.794 1470.281
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1693.338 1708.062
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1711.968 1740.837
+CCTVNEWS_PEOPLEINNEWS_CMN_20080421_202701 1921.061 1947.367
+CCTV4_ACROSSSTRAIT_CMN_20080425_073002 424.066 438.812
+CCTV4_ACROSSSTRAIT_CMN_20080425_073002 1308.407 1332.950
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 721.538 731.767
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 731.767 736.767
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 1411.444 1419.835
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 1860.402 1877.555
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 1884.208 1884.849
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 1981.351 1998.883
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 1998.883 2016.543
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2130.535 2143.553
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2300.049 2323.432
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2397.635 2410.859
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2441.316 2459.076
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2702.466 2735.265
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 2954.623 2980.818
+VOA_ISSUESANDOPINIONS_CMN_20080415_210500 3173.815 3194.406
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 372.148 391.957
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 458.578 459.831
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 549.374 564.519
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 573.314 581.489
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 709.526 711.619
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 709.526 711.619
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 800.846 807.307
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 807.307 819.933
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 819.933 826.527
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 836.339 841.324
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 854.105 863.283
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 863.283 878.428
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 878.428 883.431
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 896.017 898.600
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 909.740 916.519
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 983.479 985.273
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 1368.295 1378.492
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 1378.492 1394.545
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 1401.809 1410.152
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 1466.307 1481.028
+VOA_LISTENERSHOTLINE_CMN_20080404_223000 1637.819 1654.100
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 118.759 123.463
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 461.086 465.770
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 505.139 518.550
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 603.583 622.194
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 661.499 677.136
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 717.974 724.350
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1296.998 1302.202
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1402.782 1412.267
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1438.642 1458.299
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1871.944 1879.909
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1871.944 1879.909
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1929.924 1944.124
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1954.888 1968.183
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 1968.183 1979.980
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2078.712 2085.517
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2121.449 2141.315
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2141.315 2146.521
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2184.152 2194.384
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2243.583 2257.505
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2243.583 2257.505
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2307.717 2318.201
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2318.201 2333.791
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 2603.971 2615.633
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3010.665 3025.781
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3034.317 3040.625
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3112.937 3121.219
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3159.185 3180.651
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3180.651 3198.315
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3251.429 3256.671
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3366.509 3385.014
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3366.509 3385.014
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3437.281 3451.077
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3590.330 3612.073
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3717.095 3725.798
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3869.792 3877.934
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 3977.480 4003.700
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4136.474 4150.118
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4223.030 4245.253
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4586.006 4606.640
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4642.949 4658.521
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4873.998 4884.052
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4873.998 4884.052
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 4884.052 4893.666
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5078.411 5080.646
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5250.414 5270.384
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5324.836 5343.723
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5690.857 5701.040
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5775.280 5791.706
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5831.776 5837.448
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5878.440 5888.998
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5908.640 5922.303
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 5994.440 6012.444
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6033.989 6038.208
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6038.208 6046.152
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6463.797 6475.924
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6475.924 6481.079
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6536.481 6545.904
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6624.408 6633.608
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6633.608 6645.438
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6809.859 6821.217
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 6888.586 6898.022
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7173.669 7184.547
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7419.657 7434.242
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7448.758 7470.281
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7448.758 7470.281
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7470.281 7484.549
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7517.118 7525.783
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7561.369 7592.201
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7739.110 7755.344
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7904.540 7910.273
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080310_090602 7904.540 7910.273
+CCTV1_LEGALREPORT_CMN_20080407_123801 503.360 537.155
+CCTV1_LEGALREPORT_CMN_20080407_123801 626.944 641.090
+VOA_FOCUSDIALOGUE_CMN_20080414_160500 236.828 249.206
+VOA_FOCUSDIALOGUE_CMN_20080414_160500 270.339 279.214
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 497.209 502.787
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 1237.886 1248.959
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 1260.299 1265.690
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 1437.291 1448.510
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 1448.510 1457.197
+CCTV4_ACROSSSTRAIT_CMN_20080420_073002 1599.872 1606.003
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 162.148 166.196
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 448.073 449.292
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 642.519 644.832
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 1225.349 1233.350
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 1284.343 1289.686
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 1459.978 1485.741
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 1662.847 1665.911
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 1962.247 1974.435
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080307_122702 2322.207 2323.712
+CCTV4_ACROSSSTRAIT_CMN_20080426_073002 191.190 198.363
+CCTV4_ACROSSSTRAIT_CMN_20080426_073002 1147.443 1160.662
+CCTV1_LEGALREPORT_CMN_20080410_123801 307.221 316.518
+CCTV1_LEGALREPORT_CMN_20080410_123801 327.548 329.438
+CCTV1_LEGALREPORT_CMN_20080410_123801 1055.519 1060.114
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 123.220 128.033
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 268.970 282.579
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 1121.300 1128.300
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 1152.252 1153.737
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 1235.841 1246.200
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 1392.284 1396.143
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080306_122702 2285.846 2294.557
+CCTV1_LEGALREPORT_CMN_20080319_123601 633.814 646.627
+CCTV1_LEGALREPORT_CMN_20080319_123601 723.763 731.218
+CCTV1_LEGALREPORT_CMN_20080319_123601 833.512 843.482
+CCTV1_LEGALREPORT_CMN_20080319_123601 921.703 954.679
+CCTV1_LEGALREPORT_CMN_20080319_123601 1020.289 1035.552
+CCTV2_ECONOMYANDLAW_CMN_20080322_202802 392.841 404.325
+CCTV2_ECONOMYANDLAW_CMN_20080322_202802 1001.460 1014.883
+CCTV1_LEGALREPORT_CMN_20080323_123601 314.957 321.963
+CCTV1_LEGALREPORT_CMN_20080323_123601 407.000 414.969
+CCTV1_LEGALREPORT_CMN_20080323_123601 430.047 439.765
+CCTV1_LEGALREPORT_CMN_20080323_123601 527.075 533.701
+CCTV1_LEGALREPORT_CMN_20080323_123601 1057.525 1059.771
+CCTV1_LEGALREPORT_CMN_20080323_123601 1123.912 1135.762
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800 136.367 143.410
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800 287.991 310.200
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800 616.468 632.029
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800 958.975 971.798
+CCTV2_ECONOMYANDLAW_CMN_20080320_202800 1148.657 1159.251
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 391.785 397.550
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 457.656 467.209
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 562.053 583.450
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 599.589 601.086
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 744.461 747.874
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 1319.993 1329.117
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 2490.714 2501.539
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 2524.039 2544.126
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 2602.265 2619.834
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 2914.896 2934.487
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 3010.481 3014.903
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 3520.733 3542.487
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 3932.922 3933.650
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 4130.133 4161.057
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 4321.267 4345.098
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 4753.824 4774.614
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 4794.758 4796.159
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 4915.194 4919.101
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 5587.197 5638.012
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 6091.453 6101.226
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 7208.003 7235.345
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 7235.345 7272.083
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 7981.705 8010.668
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 9184.985 9208.172
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 10021.625 10022.718
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 10442.648 10467.546
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 10486.012 10501.515
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 10501.515 10531.964
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080309_083702 11579.117 11583.296
+CCTV1_LEGALREPORT_CMN_20080408_123801 346.889 354.124
+CCTV1_LEGALREPORT_CMN_20080408_123801 444.523 466.588
+CCTV1_LEGALREPORT_CMN_20080408_123801 466.588 474.707
+CCTV1_LEGALREPORT_CMN_20080408_123801 487.629 495.058
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 1245.650 1250.712
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 1659.799 1667.285
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 1702.301 1703.911
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 2349.141 2355.708
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 2349.141 2355.708
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080305_122702 2355.708 2359.271
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 299.907 317.860
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 405.438 413.282
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 481.287 487.840
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 621.940 637.105
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 816.038 818.179
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 1013.115 1018.171
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 1525.335 1529.163
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 1840.424 1844.314
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 2059.493 2063.040
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 2170.942 2181.869
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080304_123301 2405.366 2415.834
+BEIJING_TWOWAYLANES_CMN_20080412_130002 176.566 177.754
+BEIJING_TWOWAYLANES_CMN_20080412_130002 177.754 178.988
+BEIJING_TWOWAYLANES_CMN_20080412_130002 191.952 192.843
+BEIJING_TWOWAYLANES_CMN_20080412_130002 200.952 203.718
+BEIJING_TWOWAYLANES_CMN_20080412_130002 487.921 492.405
+BEIJING_TWOWAYLANES_CMN_20080412_130002 797.239 813.498
+BEIJING_TWOWAYLANES_CMN_20080412_130002 813.498 826.230
+BEIJING_TWOWAYLANES_CMN_20080412_130002 975.014 981.538
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1353.149 1358.056
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1407.301 1413.293
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1453.499 1458.273
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1477.649 1480.617
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1491.351 1493.288
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1566.656 1577.059
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1578.137 1581.246
+BEIJING_TWOWAYLANES_CMN_20080412_130002 1672.954 1676.313
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 198.267 222.928
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 300.901 318.742
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 512.973 522.999
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 635.639 644.839
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 684.635 699.428
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 684.635 699.428
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 899.453 920.398
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 953.360 969.367
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 953.360 969.367
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1311.152 1322.329
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1453.792 1469.543
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1469.543 1478.247
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1488.358 1518.195
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1518.195 1539.297
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 1539.297 1555.666
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 2162.997 2178.590
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 2785.563 2792.383
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 3189.655 3209.026
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 3314.451 3329.888
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 3872.413 3891.838
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 3905.932 3923.261
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 4000.534 4022.840
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 4117.817 4135.570
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 4433.878 4447.704
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 4691.977 4711.600
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 7486.080 7502.900
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8213.955 8225.734
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8407.659 8428.642
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8450.744 8479.864
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8712.892 8737.796
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8814.301 8835.930
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 8895.465 8913.788
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 9608.681 9625.125
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 10021.458 10033.824
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 10356.773 10381.454
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 10521.366 10536.851
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 10850.162 10872.187
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 12390.477 12407.842
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 12661.400 12673.940
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 12661.400 12673.940
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 12919.282 12931.671
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 12994.184 13017.972
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 13186.526 13198.159
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 13239.402 13260.472
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 13329.121 13344.916
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 13887.314 13902.082
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080312_122701 14018.529 14033.380
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701 840.708 853.880
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701 1920.531 1936.749
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701 2408.951 2412.795
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701 2503.551 2508.895
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080311_122701 2515.551 2522.164
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821 193.534 200.284
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821 764.696 770.243
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821 1071.841 1082.060
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821 1177.841 1183.888
+CCTV2_ECONOMYANDLAW_CMN_20080327_202821 1205.278 1209.309
+CCTV1_LEGALREPORT_CMN_20080321_123602 426.139 433.281
+CCTV2_ECONOMYANDLAW_CMN_20080401_202802 208.545 212.374
+CCTV2_ECONOMYANDLAW_CMN_20080401_202802 936.349 941.557
+CCTV2_ECONOMYANDLAW_CMN_20080401_202802 1367.649 1378.079
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 346.199 362.290
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 362.290 368.774
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 410.105 416.876
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 467.490 472.031
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 467.490 472.031
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 498.115 507.232
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 595.858 611.176
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 611.176 616.529
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 611.176 616.529
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 681.799 695.546
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 766.201 778.375
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 793.250 798.031
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 938.170 944.266
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 944.266 964.753
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 973.644 993.565
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1003.970 1037.143
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1204.394 1215.280
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1204.394 1215.280
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1249.087 1267.718
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1352.325 1371.420
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1505.167 1509.967
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1570.502 1580.729
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1613.045 1623.846
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 1942.809 1951.246
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2036.397 2050.135
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2614.535 2626.425
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2657.608 2674.175
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2808.029 2811.566
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2811.566 2816.597
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2840.395 2851.054
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2871.330 2889.000
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2896.157 2899.569
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 2958.738 2974.705
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3002.655 3015.083
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3081.621 3086.228
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3112.140 3120.602
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3112.140 3120.602
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3177.154 3186.591
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3252.260 3260.692
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3322.053 3331.395
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3351.192 3357.614
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3374.632 3379.866
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3502.375 3505.896
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3531.533 3546.092
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3549.691 3572.817
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3748.575 3754.234
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3853.778 3872.724
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 3901.051 3909.682
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4187.987 4190.300
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4190.300 4196.924
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4295.235 4302.294
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4529.393 4539.554
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4652.269 4669.863
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4788.794 4799.310
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4827.815 4843.164
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4851.293 4858.996
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4858.996 4880.929
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4893.102 4909.773
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 4893.102 4909.773
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5187.221 5196.533
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5376.896 5388.964
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5554.819 5572.218
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5572.218 5581.719
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5581.719 5588.858
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5591.061 5603.518
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5603.518 5615.292
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5615.292 5633.867
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5633.867 5637.258
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5637.258 5641.009
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5657.379 5666.880
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5666.880 5670.755
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5670.755 5679.520
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5686.114 5691.051
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5723.021 5738.273
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5738.273 5759.396
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5787.842 5794.232
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5799.576 5821.514
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5834.453 5846.734
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5863.462 5886.898
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5925.428 5941.566
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 5941.566 5959.503
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6178.666 6188.558
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6188.558 6195.766
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6229.538 6233.881
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6229.538 6233.881
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6245.265 6251.014
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6251.014 6258.689
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6433.140 6442.235
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6433.140 6442.235
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6470.937 6475.374
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6499.905 6524.048
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6557.085 6563.600
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6596.900 6605.796
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6657.418 6678.422
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 6994.200 6999.833
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7043.181 7057.846
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7062.810 7068.050
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7085.414 7095.427
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7106.832 7113.657
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7117.970 7124.079
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7134.908 7136.612
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7136.612 7137.816
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7137.816 7140.448
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7152.999 7162.414
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7162.414 7167.460
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7237.708 7243.710
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7329.513 7341.308
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7393.610 7407.984
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7437.349 7458.191
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7575.564 7583.218
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7692.318 7697.834
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 7889.134 7893.124
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8292.319 8297.244
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8292.319 8297.244
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8420.971 8422.803
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8508.250 8514.934
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8531.939 8544.436
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8575.956 8581.701
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8581.701 8584.663
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8618.482 8630.718
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8634.844 8641.156
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8641.156 8646.321
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8647.141 8651.125
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8647.141 8651.125
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8651.125 8656.237
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 8651.125 8656.237
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9233.874 9248.691
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9366.521 9375.377
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9375.377 9380.831
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9380.831 9389.770
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9444.440 9449.190
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 9479.117 9481.336
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10056.245 10064.059
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10158.726 10166.135
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10176.842 10181.873
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10176.842 10181.873
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10264.827 10286.662
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10311.306 10328.322
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080308_083701 10358.390 10375.175
+CCTV2_DIALOG_CMN_20080316_214834 1019.619 1025.932
+CCTV2_DIALOG_CMN_20080316_214834 1098.676 1106.317
+CCTV2_DIALOG_CMN_20080316_214834 1428.152 1438.024
+CCTV2_DIALOG_CMN_20080316_214834 1462.452 1470.692
+CCTV2_DIALOG_CMN_20080316_214834 1818.570 1826.008
+CCTV2_DIALOG_CMN_20080316_214834 2145.158 2151.986
+CCTV1_LEGALREPORT_CMN_20080311_123601 519.532 524.157
+CCTV1_LEGALREPORT_CMN_20080311_123601 912.474 927.498
+CCTV1_LEGALREPORT_CMN_20080311_123601 1086.885 1098.946
+CCTV2_ACROSSSTRAIT_CMN_20080312_073000 236.970 244.359
+VOA_STRAITSTALK_CMN_20080407_210500 78.197 84.964
+VOA_STRAITSTALK_CMN_20080407_210500 380.380 407.182
+VOA_STRAITSTALK_CMN_20080407_210500 737.740 740.843
+VOA_STRAITSTALK_CMN_20080407_210500 795.439 806.815
+VOA_STRAITSTALK_CMN_20080407_210500 948.524 964.064
+VOA_STRAITSTALK_CMN_20080407_210500 1501.159 1526.216
+VOA_STRAITSTALK_CMN_20080407_210500 1597.801 1611.951
+VOA_STRAITSTALK_CMN_20080407_210500 1727.467 1743.423
+VOA_STRAITSTALK_CMN_20080407_210500 1805.738 1806.520
+VOA_STRAITSTALK_CMN_20080407_210500 1944.355 1963.072
+VOA_STRAITSTALK_CMN_20080407_210500 1995.601 2003.988
+VOA_STRAITSTALK_CMN_20080407_210500 2051.287 2055.501
+VOA_STRAITSTALK_CMN_20080407_210500 2061.349 2081.484
+VOA_STRAITSTALK_CMN_20080407_210500 2139.432 2151.020
+VOA_STRAITSTALK_CMN_20080407_210500 2209.583 2223.521
+VOA_STRAITSTALK_CMN_20080407_210500 2366.359 2367.760
+VOA_STRAITSTALK_CMN_20080407_210500 2957.323 2965.293
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 476.351 484.400
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 635.787 643.616
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 635.787 643.616
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 672.116 684.215
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 780.285 781.942
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 882.632 886.312
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 2127.338 2131.336
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 2304.524 2316.837
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 2469.692 2483.067
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 2546.683 2562.826
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 3092.559 3117.522
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 3238.544 3265.067
+CCTVNEWS_PEOPLESCONGRESS2_CMN_20080317_144102 3756.116 3779.756
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 757.576 764.718
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 951.598 977.243
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 1262.048 1269.805
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 1297.397 1314.045
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 1560.342 1599.552
+CCTV4_ACROSSSTRAIT_CMN_20080330_073002 1560.342 1599.552
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 104.551 107.067
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 261.838 264.026
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 395.582 401.847
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 592.967 596.107
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 641.144 645.395
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 668.132 676.303
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 877.132 878.975
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 1223.588 1232.050
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 1347.089 1352.709
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 1890.190 1906.136
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 1890.190 1906.136
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 2055.559 2060.655
+CCTVNEWS_XIAOCUIINTERVIEW_CMN_20080309_122701 2275.375 2279.172
+CCTV4_ACROSSSTRAIT_CMN_20080322_073002 227.639 242.544
+CCTV4_ACROSSSTRAIT_CMN_20080322_073002 776.373 782.966
+CCTV4_ACROSSSTRAIT_CMN_20080322_073002 1015.042 1021.543
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 573.428 595.324
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 635.144 655.937
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 701.713 720.737
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 742.143 754.648
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 816.851 838.195
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 889.613 903.057
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 940.372 960.383
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1181.564 1196.749
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1196.749 1232.755
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1278.786 1300.476
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1325.810 1343.728
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1391.018 1404.904
+VOA_LISTENERSHOTLINE_CMN_20080418_223000 1577.436 1597.010
diff --git a/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25 b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25
new file mode 100644
index 00000000000..79cf4135556
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25
@@ -0,0 +1,40 @@
+CCTV1_30MINNEWS_CMN_20080328_115902
+CCTV1_30MINNEWS_CMN_20080329_115901
+CCTV1_30MINNEWS_CMN_20080331_115901
+CCTV1_30MINNEWS_CMN_20080401_115901
+CCTV1_30MINNEWS_CMN_20080407_115901
+CCTV1_30MINNEWS_CMN_20080412_115901
+CCTV2_ECON30MIN_CMN_20080406_213518
+CCTV2_ECON30MIN_CMN_20080410_213502
+CCTV2_ECON30MIN_CMN_20080411_213502
+CCTV2_ECON30MIN_CMN_20080412_213501
+CCTV2_ECON30MIN_CMN_20080413_213502
+CCTV2_ECON30MIN_CMN_20080420_213502
+CCTV2_ECON30MIN_CMN_20080423_213501
+CCTV2_ECON30MIN_CMN_20080425_213502
+CCTV2_ECON30MIN_CMN_20080426_213501
+CCTV2_ECON30MIN_CMN_20080429_213502
+CCTV2_NEWSLIST_CMN_20080407_114902
+CCTV2_NEWSLIST_CMN_20080415_114902
+CCTV2_NEWSLIST_CMN_20080416_114902
+CCTV7_MILITARYNEWS1_CMN_20080325_100502
+CCTV7_MILITARYNEWS1_CMN_20080327_100812
+CCTV7_MILITARYNEWS1_CMN_20080330_100520
+CCTV7_MILITARYNEWS1_CMN_20080407_100502
+CCTV7_MILITARYNEWS1_CMN_20080416_100502
+CCTV7_MILITARYNEWS1_CMN_20080420_100515
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000
+VOA_INTNLNEWS_CMN_20080402_210000
+VOA_INTNLNEWS_CMN_20080405_210000
+VOA_INTNLNEWS_CMN_20080407_210000
+VOA_INTNLNEWS_CMN_20080410_210000
+VOA_INTNLNEWS_CMN_20080412_210000
+VOA_INTNLNEWS_CMN_20080414_210000
diff --git a/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25.segment b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25.segment
new file mode 100644
index 00000000000..0fc4387b9c1
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gale_eval/test.LDC2017S25.segment
@@ -0,0 +1,4026 @@
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 21.463 32.355
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 32.355 39.415
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 45.494 54.198
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 54.198 57.120
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 57.120 58.964
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 58.964 70.684
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 70.684 81.638
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 81.638 101.782
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 138.357 146.560
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 146.560 156.884
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 156.884 164.757
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 171.188 182.903
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 182.903 198.838
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 198.838 205.748
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 205.748 232.942
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 232.942 246.753
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 246.753 259.112
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 259.112 264.815
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 304.346 317.839
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 317.839 339.164
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 357.075 367.565
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 367.565 380.972
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 392.074 401.948
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 401.948 412.906
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 433.324 445.133
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 455.727 476.728
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 496.285 504.535
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 538.718 548.786
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 548.786 556.808
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 556.808 565.542
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 565.542 575.950
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 575.950 584.168
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 584.168 589.641
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 589.641 598.497
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 598.497 624.088
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 624.088 631.864
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 631.864 666.366
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 666.366 678.567
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 714.149 732.313
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 732.313 745.214
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 745.214 750.238
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 770.222 786.636
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 818.556 841.036
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 841.036 863.175
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 863.175 865.624
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 934.986 946.288
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 946.288 948.905
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 967.731 976.654
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1012.924 1033.778
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1053.328 1055.388
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1082.532 1092.085
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1105.869 1114.512
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1114.512 1119.941
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1143.248 1161.570
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1161.570 1174.396
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1174.396 1180.159
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1198.649 1205.819
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1212.239 1225.726
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1225.726 1238.265
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1246.695 1259.250
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1270.621 1279.500
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1290.220 1302.389
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1302.389 1306.289
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1374.030 1386.299
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1386.299 1389.580
+CCTV7_MILITARYNEWS1_CMN_20080330_100520 1389.580 1390.455
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 67.359 70.217
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 70.217 72.249
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 72.249 77.811
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 77.811 85.933
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 85.933 94.636
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 94.636 98.823
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 103.583 105.981
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 124.906 134.814
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 134.814 141.503
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 141.503 156.314
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 156.314 165.092
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 165.092 170.883
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 173.616 176.600
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 176.600 181.951
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 181.951 190.809
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 190.809 199.713
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 199.713 205.354
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 205.354 212.139
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 212.139 219.841
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 219.841 227.718
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 227.718 234.781
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 238.517 252.115
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 252.115 261.844
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 274.926 287.084
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 287.084 310.010
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 310.010 326.862
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 326.862 335.741
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 335.741 343.800
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 343.800 348.613
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 348.613 362.942
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 362.942 368.480
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 368.480 379.481
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 379.481 388.292
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 388.292 393.665
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 393.665 400.261
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 400.261 407.416
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 416.077 421.255
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 421.255 431.592
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 431.592 437.936
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 437.936 449.077
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 449.077 457.855
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 457.855 470.923
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 479.159 484.972
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 484.972 490.426
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 498.469 510.872
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 510.872 519.337
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 528.538 540.782
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 540.782 549.100
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 549.100 558.216
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 558.216 570.439
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 570.439 574.470
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 574.470 578.861
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 578.861 582.711
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 582.711 593.428
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 602.694 612.956
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 612.956 617.537
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 617.537 625.309
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 625.309 630.855
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 630.855 641.811
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 660.532 667.639
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 667.639 679.810
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 679.810 685.016
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 685.016 687.610
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 687.610 693.415
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 702.979 710.241
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 723.754 729.739
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 729.739 733.523
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 733.523 737.475
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 737.475 743.742
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 751.406 763.986
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 763.986 775.358
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 775.358 784.393
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 784.393 798.534
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 798.534 809.328
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 809.328 820.462
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 820.462 829.017
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 842.141 850.582
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 861.677 867.582
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 867.582 888.830
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 888.830 896.232
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 900.075 909.620
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 909.620 920.433
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 920.433 934.204
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 934.204 950.207
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 950.207 961.708
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 961.708 970.456
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 970.456 977.025
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 977.025 986.414
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 986.414 993.227
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 993.227 998.462
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 998.462 1016.931
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1016.931 1019.422
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1019.422 1026.000
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1026.000 1034.860
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1034.860 1041.111
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1041.111 1047.782
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1047.782 1058.523
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1058.523 1064.417
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1064.417 1073.178
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1073.178 1082.940
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1082.940 1089.033
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1089.033 1097.488
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1097.488 1100.904
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1113.768 1119.747
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1119.747 1122.895
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1122.895 1131.545
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1131.545 1136.447
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1142.001 1155.917
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1155.917 1174.762
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1174.762 1187.288
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1187.288 1197.992
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1206.264 1208.347
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1208.347 1212.540
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1212.540 1218.175
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1218.175 1231.904
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1251.072 1258.989
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1258.989 1264.612
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1264.612 1271.258
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1277.118 1290.228
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1298.668 1304.963
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1304.963 1319.471
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1319.471 1336.043
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1336.043 1343.185
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1343.185 1351.127
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1351.127 1357.886
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1357.886 1367.701
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1375.738 1383.423
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1383.423 1395.263
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1395.263 1397.972
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1397.972 1399.414
+CCTV7_MILITARYNEWS1_CMN_20080325_100502 1399.414 1400.936
+CCTV2_ECON30MIN_CMN_20080420_213502 234.355 237.652
+CCTV2_ECON30MIN_CMN_20080420_213502 253.229 262.635
+CCTV2_ECON30MIN_CMN_20080420_213502 295.066 302.269
+CCTV2_ECON30MIN_CMN_20080420_213502 302.269 312.255
+CCTV2_ECON30MIN_CMN_20080420_213502 312.255 315.359
+CCTV2_ECON30MIN_CMN_20080420_213502 315.359 318.891
+CCTV2_ECON30MIN_CMN_20080420_213502 318.891 327.688
+CCTV2_ECON30MIN_CMN_20080420_213502 327.688 333.687
+CCTV2_ECON30MIN_CMN_20080420_213502 333.687 345.022
+CCTV2_ECON30MIN_CMN_20080420_213502 345.022 351.772
+CCTV2_ECON30MIN_CMN_20080420_213502 351.772 357.444
+CCTV2_ECON30MIN_CMN_20080420_213502 357.444 363.975
+CCTV2_ECON30MIN_CMN_20080420_213502 363.975 374.241
+CCTV2_ECON30MIN_CMN_20080420_213502 374.241 380.538
+CCTV2_ECON30MIN_CMN_20080420_213502 380.538 390.917
+CCTV2_ECON30MIN_CMN_20080420_213502 390.917 399.729
+CCTV2_ECON30MIN_CMN_20080420_213502 399.729 413.464
+CCTV2_ECON30MIN_CMN_20080420_213502 440.591 443.950
+CCTV2_ECON30MIN_CMN_20080420_213502 443.950 450.341
+CCTV2_ECON30MIN_CMN_20080420_213502 450.341 460.060
+CCTV2_ECON30MIN_CMN_20080420_213502 466.707 473.692
+CCTV2_ECON30MIN_CMN_20080420_213502 481.238 490.567
+CCTV2_ECON30MIN_CMN_20080420_213502 490.567 499.833
+CCTV2_ECON30MIN_CMN_20080420_213502 499.833 509.130
+CCTV2_ECON30MIN_CMN_20080420_213502 509.130 512.140
+CCTV2_ECON30MIN_CMN_20080420_213502 512.140 513.203
+CCTV2_ECON30MIN_CMN_20080420_213502 513.203 525.047
+CCTV2_ECON30MIN_CMN_20080420_213502 525.047 534.235
+CCTV2_ECON30MIN_CMN_20080420_213502 534.235 545.579
+CCTV2_ECON30MIN_CMN_20080420_213502 545.579 553.836
+CCTV2_ECON30MIN_CMN_20080420_213502 678.405 679.968
+CCTV2_ECON30MIN_CMN_20080420_213502 679.968 685.609
+CCTV2_ECON30MIN_CMN_20080420_213502 693.842 699.920
+CCTV2_ECON30MIN_CMN_20080420_213502 706.217 713.801
+CCTV2_ECON30MIN_CMN_20080420_213502 714.879 716.160
+CCTV2_ECON30MIN_CMN_20080420_213502 716.160 716.738
+CCTV2_ECON30MIN_CMN_20080420_213502 718.644 719.535
+CCTV2_ECON30MIN_CMN_20080420_213502 738.755 741.364
+CCTV2_ECON30MIN_CMN_20080420_213502 741.364 743.239
+CCTV2_ECON30MIN_CMN_20080420_213502 743.239 751.224
+CCTV2_ECON30MIN_CMN_20080420_213502 760.460 765.007
+CCTV2_ECON30MIN_CMN_20080420_213502 765.007 770.960
+CCTV2_ECON30MIN_CMN_20080420_213502 784.289 794.633
+CCTV2_ECON30MIN_CMN_20080420_213502 794.633 798.930
+CCTV2_ECON30MIN_CMN_20080420_213502 798.930 806.088
+CCTV2_ECON30MIN_CMN_20080420_213502 806.088 808.682
+CCTV2_ECON30MIN_CMN_20080420_213502 808.682 815.813
+CCTV2_ECON30MIN_CMN_20080420_213502 815.813 820.000
+CCTV2_ECON30MIN_CMN_20080420_213502 820.000 828.171
+CCTV2_ECON30MIN_CMN_20080420_213502 828.171 834.726
+CCTV2_ECON30MIN_CMN_20080420_213502 834.726 837.633
+CCTV2_ECON30MIN_CMN_20080420_213502 837.633 842.242
+CCTV2_ECON30MIN_CMN_20080420_213502 851.836 869.664
+CCTV2_ECON30MIN_CMN_20080420_213502 875.867 888.602
+CCTV2_ECON30MIN_CMN_20080420_213502 888.602 895.439
+CCTV2_ECON30MIN_CMN_20080420_213502 895.439 900.517
+CCTV2_ECON30MIN_CMN_20080420_213502 900.517 914.238
+CCTV2_ECON30MIN_CMN_20080420_213502 914.238 918.551
+CCTV2_ECON30MIN_CMN_20080420_213502 914.238 918.551
+CCTV2_ECON30MIN_CMN_20080420_213502 924.645 930.004
+CCTV2_ECON30MIN_CMN_20080420_213502 951.003 970.987
+CCTV2_ECON30MIN_CMN_20080420_213502 1003.269 1013.660
+CCTV2_ECON30MIN_CMN_20080420_213502 1020.583 1024.286
+CCTV2_ECON30MIN_CMN_20080420_213502 1020.583 1024.286
+CCTV2_ECON30MIN_CMN_20080420_213502 1024.286 1039.083
+CCTV2_ECON30MIN_CMN_20080420_213502 1039.083 1045.585
+CCTV2_ECON30MIN_CMN_20080420_213502 1039.083 1045.585
+CCTV2_ECON30MIN_CMN_20080420_213502 1049.677 1052.021
+CCTV2_ECON30MIN_CMN_20080420_213502 1052.021 1052.505
+CCTV2_ECON30MIN_CMN_20080420_213502 1052.505 1053.037
+CCTV2_ECON30MIN_CMN_20080420_213502 1053.037 1058.031
+CCTV2_ECON30MIN_CMN_20080420_213502 1067.374 1072.187
+CCTV2_ECON30MIN_CMN_20080420_213502 1072.187 1074.999
+CCTV2_ECON30MIN_CMN_20080420_213502 1074.999 1075.670
+CCTV2_ECON30MIN_CMN_20080420_213502 1075.670 1079.870
+CCTV2_ECON30MIN_CMN_20080420_213502 1075.670 1079.870
+CCTV2_ECON30MIN_CMN_20080420_213502 1079.870 1082.073
+CCTV2_ECON30MIN_CMN_20080420_213502 1082.073 1085.730
+CCTV2_ECON30MIN_CMN_20080420_213502 1082.073 1085.730
+CCTV2_ECON30MIN_CMN_20080420_213502 1088.996 1094.089
+CCTV2_ECON30MIN_CMN_20080420_213502 1094.089 1102.260
+CCTV2_ECON30MIN_CMN_20080420_213502 1102.260 1112.476
+CCTV2_ECON30MIN_CMN_20080420_213502 1123.991 1127.070
+CCTV2_ECON30MIN_CMN_20080420_213502 1141.019 1152.161
+CCTV2_ECON30MIN_CMN_20080420_213502 1178.428 1191.506
+CCTV2_ECON30MIN_CMN_20080420_213502 1283.084 1284.287
+CCTV2_ECON30MIN_CMN_20080420_213502 1301.895 1308.302
+CCTV2_ECON30MIN_CMN_20080420_213502 1308.302 1313.927
+CCTV2_ECON30MIN_CMN_20080420_213502 1313.927 1319.068
+CCTV2_ECON30MIN_CMN_20080420_213502 1319.068 1330.584
+CCTV2_ECON30MIN_CMN_20080420_213502 1330.584 1338.693
+CCTV2_ECON30MIN_CMN_20080420_213502 1338.693 1346.021
+CCTV2_ECON30MIN_CMN_20080420_213502 1346.021 1356.834
+CCTV2_ECON30MIN_CMN_20080420_213502 1371.297 1375.642
+CCTV2_ECON30MIN_CMN_20080420_213502 1375.642 1380.486
+CCTV2_ECON30MIN_CMN_20080420_213502 1404.392 1412.094
+CCTV2_ECON30MIN_CMN_20080420_213502 1412.094 1423.359
+CCTV2_ECON30MIN_CMN_20080420_213502 1423.359 1432.343
+CCTV2_ECON30MIN_CMN_20080420_213502 1439.640 1460.031
+CCTV2_ECON30MIN_CMN_20080420_213502 1460.031 1468.906
+CCTV2_ECON30MIN_CMN_20080420_213502 1483.329 1491.032
+CCTV2_ECON30MIN_CMN_20080420_213502 1501.140 1518.047
+CCTV2_ECON30MIN_CMN_20080420_213502 1518.047 1529.531
+CCTV2_ECON30MIN_CMN_20080420_213502 1529.531 1555.125
+CCTV2_ECON30MIN_CMN_20080420_213502 1555.125 1559.343
+CCTV2_ECON30MIN_CMN_20080420_213502 1559.343 1562.843
+CCTV2_ECON30MIN_CMN_20080420_213502 1590.906 1603.857
+CCTV2_ECON30MIN_CMN_20080420_213502 1613.122 1620.508
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 29.638 41.174
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 61.174 74.985
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 74.985 80.069
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 112.640 135.703
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 135.703 150.481
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 150.481 161.646
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 161.646 174.035
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 174.035 185.040
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 185.040 193.175
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 193.175 208.438
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 222.324 234.931
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 234.931 261.970
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 261.970 269.748
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 269.748 295.797
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 309.989 316.211
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 362.786 370.488
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 370.488 386.259
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 386.259 405.897
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 429.675 455.188
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 455.188 464.853
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 489.754 515.497
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 545.652 560.815
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 560.815 570.212
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 570.212 580.775
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 580.775 598.261
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 598.261 608.680
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 656.132 664.402
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 664.402 674.431
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 695.381 704.483
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 723.729 736.418
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 759.826 777.547
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 777.547 795.385
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 843.623 853.577
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 874.685 886.505
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 886.505 896.745
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 896.745 910.092
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 910.092 925.252
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 925.252 938.143
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 938.143 951.745
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 951.745 964.943
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1019.141 1032.339
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1032.339 1034.788
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1034.788 1054.070
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1054.070 1080.835
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1141.402 1158.671
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1158.671 1173.639
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1195.100 1198.573
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1236.256 1249.507
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1260.542 1271.225
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1304.935 1307.384
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1379.623 1385.280
+CCTV7_MILITARYNEWS1_CMN_20080416_100502 1379.623 1385.280
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 24.820 31.664
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 31.664 41.447
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 41.447 49.400
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 49.400 55.977
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 59.165 61.150
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 61.150 69.807
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 69.807 74.543
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 74.543 81.496
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 81.496 91.651
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 91.651 100.649
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 106.211 109.954
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 109.954 128.138
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 128.138 138.529
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 138.529 144.545
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 144.545 154.059
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 164.200 173.216
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 173.216 178.609
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 178.609 188.421
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 203.187 210.031
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 215.388 219.389
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 219.389 225.622
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 231.748 239.497
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 239.497 244.855
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 244.855 259.219
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 286.970 292.313
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 292.313 304.779
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 304.779 316.361
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 334.019 341.924
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 346.205 351.517
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 351.517 362.363
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 362.363 375.754
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 375.754 393.754
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 393.754 401.285
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 401.285 407.723
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 407.723 418.677
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 425.287 432.099
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 432.099 439.629
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 448.833 456.051
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 468.770 477.646
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 477.646 484.442
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 484.442 496.784
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 512.971 518.628
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 518.628 533.084
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 533.084 541.646
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 541.646 554.100
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 567.085 571.460
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 571.460 577.288
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 577.288 584.523
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 584.523 598.348
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 598.348 607.426
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 616.401 627.699
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 627.699 639.104
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 639.104 644.307
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 653.398 658.383
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 658.383 669.929
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 669.929 680.272
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 691.523 699.460
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 710.741 725.602
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 725.602 743.573
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 743.573 752.479
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 752.479 759.448
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 759.448 763.341
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 763.341 777.146
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 777.146 786.369
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 786.369 793.150
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 793.150 802.042
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 802.042 810.840
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 810.840 828.825
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 828.825 844.043
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 844.043 857.419
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 857.419 869.544
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 869.544 879.935
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 891.044 896.794
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 896.794 901.638
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 918.416 924.494
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 924.494 931.103
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 931.103 947.803
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 960.943 965.792
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 965.792 970.370
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 970.370 977.778
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 977.778 986.621
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 986.621 993.606
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 993.606 995.294
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 995.294 1004.564
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1004.564 1012.970
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1044.924 1056.846
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1056.846 1065.345
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1065.345 1077.720
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1077.720 1084.360
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1084.360 1097.843
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1097.843 1100.546
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1100.546 1111.858
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1123.731 1127.075
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1127.075 1130.165
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1130.165 1139.275
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1139.275 1146.377
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1146.377 1153.737
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1153.737 1166.565
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1166.565 1174.330
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1174.330 1183.486
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1183.486 1194.515
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1194.515 1206.375
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1221.953 1234.161
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1234.161 1245.160
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1245.160 1247.482
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1247.482 1252.655
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1252.655 1258.607
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1258.607 1270.308
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1270.308 1273.192
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1273.192 1275.348
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1275.348 1281.520
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1281.520 1287.723
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1287.723 1293.458
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1293.458 1301.148
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1301.148 1304.054
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1309.820 1313.960
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1313.960 1319.772
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1319.772 1325.506
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1336.008 1342.774
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1352.726 1355.571
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1355.571 1357.634
+CCTV7_MILITARYNEWS1_CMN_20080420_100515 1357.634 1358.399
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 229.698 233.901
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 233.901 236.698
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 236.698 241.120
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 241.120 253.027
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 259.840 272.480
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 272.480 279.824
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 279.824 290.626
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 290.626 293.516
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 293.516 298.189
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 298.189 303.830
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 303.830 309.580
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 309.580 324.471
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 324.471 333.659
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 333.659 338.299
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 338.299 344.471
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 352.721 360.597
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 360.597 367.222
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 367.222 387.378
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 387.378 394.925
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 394.925 408.081
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 408.081 417.862
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 417.862 428.112
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 428.112 435.549
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 446.502 460.080
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 460.080 467.664
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 467.664 473.774
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 482.742 490.898
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 490.898 501.641
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 501.641 507.602
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 521.617 529.083
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 529.083 532.910
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 569.628 576.175
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 576.175 578.535
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 587.816 592.222
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 592.222 597.658
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 597.658 609.347
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 609.347 612.191
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 612.191 622.551
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 622.551 638.989
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 638.989 655.614
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 655.614 663.083
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 663.083 679.717
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 679.717 687.311
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 687.311 699.060
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 699.060 711.888
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 711.888 721.310
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 721.310 731.825
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 731.825 735.887
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 735.887 747.120
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 747.120 757.760
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 767.323 770.141
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 770.141 772.751
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 772.751 779.206
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 779.206 791.472
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 791.472 795.878
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 795.878 804.477
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 809.214 816.348
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 816.348 826.409
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 826.409 834.424
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 834.424 858.586
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 864.811 868.545
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 868.545 876.872
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 876.872 887.715
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 887.715 890.324
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 890.324 897.903
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 915.543 922.574
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 928.370 939.182
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 939.182 948.620
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 948.620 952.651
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 952.651 959.635
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 959.635 972.447
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 972.447 979.823
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 979.823 992.214
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 999.647 1004.788
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1004.788 1011.929
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1011.929 1021.773
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1021.773 1030.931
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1030.931 1037.574
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1037.574 1042.027
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1042.027 1049.402
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1049.402 1065.739
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1073.693 1079.005
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1079.005 1083.989
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1083.989 1094.707
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1099.969 1102.843
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1102.843 1117.965
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1117.965 1120.372
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1120.372 1124.121
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1124.121 1134.073
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1141.483 1149.718
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1149.718 1151.546
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1157.608 1166.676
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1166.676 1172.729
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1172.729 1181.213
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1181.213 1190.837
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1190.837 1196.618
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1196.618 1206.540
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1206.540 1225.071
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1237.382 1246.320
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1246.320 1258.509
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1258.509 1272.603
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1272.603 1278.286
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1295.029 1298.826
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1298.826 1305.873
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1326.209 1332.709
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1332.709 1344.586
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1344.586 1364.199
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1364.199 1374.852
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1374.852 1382.713
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1382.713 1388.166
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1388.166 1399.198
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1399.198 1405.495
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1419.548 1424.094
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1424.094 1430.015
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1437.439 1456.752
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1456.752 1469.403
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1469.403 1474.446
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1485.195 1488.335
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1488.335 1490.304
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1490.304 1502.615
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1502.615 1506.209
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1524.692 1530.378
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1591.894 1600.097
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1603.821 1621.586
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1621.586 1636.617
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1636.617 1644.055
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1644.055 1649.180
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1656.711 1672.165
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1672.165 1683.040
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1683.040 1693.837
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1693.837 1700.096
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1722.760 1736.510
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1747.322 1767.208
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1770.834 1783.287
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1783.287 1791.913
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1806.014 1809.387
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1818.301 1820.847
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1820.847 1827.761
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1827.761 1831.870
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1835.588 1845.681
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1845.681 1858.778
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1864.946 1872.040
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1872.040 1883.883
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1883.883 1891.540
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1914.196 1921.415
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1921.415 1925.306
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1949.601 1957.945
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1957.945 1965.928
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1965.928 1978.022
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1978.022 1983.444
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1983.444 1988.616
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 1988.616 1998.663
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 2005.082 2011.787
+VOA_INTNLNEWSFINANCE_CMN_20080410_100000 2011.787 2014.521
+CCTV2_ECON30MIN_CMN_20080429_213502 19.671 28.109
+CCTV2_ECON30MIN_CMN_20080429_213502 32.374 39.546
+CCTV2_ECON30MIN_CMN_20080429_213502 43.156 46.593
+CCTV2_ECON30MIN_CMN_20080429_213502 214.123 217.170
+CCTV2_ECON30MIN_CMN_20080429_213502 247.155 255.302
+CCTV2_ECON30MIN_CMN_20080429_213502 255.302 257.568
+CCTV2_ECON30MIN_CMN_20080429_213502 257.568 270.660
+CCTV2_ECON30MIN_CMN_20080429_213502 278.498 288.025
+CCTV2_ECON30MIN_CMN_20080429_213502 294.276 300.214
+CCTV2_ECON30MIN_CMN_20080429_213502 300.214 305.404
+CCTV2_ECON30MIN_CMN_20080429_213502 305.404 312.544
+CCTV2_ECON30MIN_CMN_20080429_213502 312.544 316.873
+CCTV2_ECON30MIN_CMN_20080429_213502 316.873 323.029
+CCTV2_ECON30MIN_CMN_20080429_213502 329.513 334.809
+CCTV2_ECON30MIN_CMN_20080429_213502 334.809 342.387
+CCTV2_ECON30MIN_CMN_20080429_213502 342.387 348.278
+CCTV2_ECON30MIN_CMN_20080429_213502 368.839 377.885
+CCTV2_ECON30MIN_CMN_20080429_213502 394.995 400.071
+CCTV2_ECON30MIN_CMN_20080429_213502 400.071 407.493
+CCTV2_ECON30MIN_CMN_20080429_213502 407.493 409.946
+CCTV2_ECON30MIN_CMN_20080429_213502 409.946 424.618
+CCTV2_ECON30MIN_CMN_20080429_213502 424.618 431.478
+CCTV2_ECON30MIN_CMN_20080429_213502 431.478 435.525
+CCTV2_ECON30MIN_CMN_20080429_213502 435.525 436.353
+CCTV2_ECON30MIN_CMN_20080429_213502 436.353 438.994
+CCTV2_ECON30MIN_CMN_20080429_213502 438.994 442.838
+CCTV2_ECON30MIN_CMN_20080429_213502 442.838 452.707
+CCTV2_ECON30MIN_CMN_20080429_213502 452.707 458.924
+CCTV2_ECON30MIN_CMN_20080429_213502 474.127 479.940
+CCTV2_ECON30MIN_CMN_20080429_213502 484.533 491.939
+CCTV2_ECON30MIN_CMN_20080429_213502 491.939 502.158
+CCTV2_ECON30MIN_CMN_20080429_213502 502.158 510.080
+CCTV2_ECON30MIN_CMN_20080429_213502 510.080 515.580
+CCTV2_ECON30MIN_CMN_20080429_213502 515.580 522.471
+CCTV2_ECON30MIN_CMN_20080429_213502 522.471 532.784
+CCTV2_ECON30MIN_CMN_20080429_213502 532.784 550.785
+CCTV2_ECON30MIN_CMN_20080429_213502 558.926 561.187
+CCTV2_ECON30MIN_CMN_20080429_213502 568.074 569.731
+CCTV2_ECON30MIN_CMN_20080429_213502 719.770 721.099
+CCTV2_ECON30MIN_CMN_20080429_213502 730.082 744.393
+CCTV2_ECON30MIN_CMN_20080429_213502 751.236 767.189
+CCTV2_ECON30MIN_CMN_20080429_213502 767.189 769.470
+CCTV2_ECON30MIN_CMN_20080429_213502 769.470 770.048
+CCTV2_ECON30MIN_CMN_20080429_213502 770.048 771.798
+CCTV2_ECON30MIN_CMN_20080429_213502 772.360 773.766
+CCTV2_ECON30MIN_CMN_20080429_213502 773.766 775.781
+CCTV2_ECON30MIN_CMN_20080429_213502 773.766 775.781
+CCTV2_ECON30MIN_CMN_20080429_213502 775.781 776.844
+CCTV2_ECON30MIN_CMN_20080429_213502 775.781 776.844
+CCTV2_ECON30MIN_CMN_20080429_213502 787.500 792.624
+CCTV2_ECON30MIN_CMN_20080429_213502 792.624 796.859
+CCTV2_ECON30MIN_CMN_20080429_213502 803.247 808.903
+CCTV2_ECON30MIN_CMN_20080429_213502 830.185 833.748
+CCTV2_ECON30MIN_CMN_20080429_213502 833.748 841.405
+CCTV2_ECON30MIN_CMN_20080429_213502 841.405 847.702
+CCTV2_ECON30MIN_CMN_20080429_213502 847.702 852.170
+CCTV2_ECON30MIN_CMN_20080429_213502 852.170 861.421
+CCTV2_ECON30MIN_CMN_20080429_213502 861.421 873.507
+CCTV2_ECON30MIN_CMN_20080429_213502 880.819 888.139
+CCTV2_ECON30MIN_CMN_20080429_213502 888.139 895.515
+CCTV2_ECON30MIN_CMN_20080429_213502 895.515 902.406
+CCTV2_ECON30MIN_CMN_20080429_213502 919.296 925.437
+CCTV2_ECON30MIN_CMN_20080429_213502 925.437 932.267
+CCTV2_ECON30MIN_CMN_20080429_213502 932.267 939.831
+CCTV2_ECON30MIN_CMN_20080429_213502 939.831 950.347
+CCTV2_ECON30MIN_CMN_20080429_213502 950.347 962.268
+CCTV2_ECON30MIN_CMN_20080429_213502 962.268 972.596
+CCTV2_ECON30MIN_CMN_20080429_213502 984.611 988.248
+CCTV2_ECON30MIN_CMN_20080429_213502 988.248 1008.414
+CCTV2_ECON30MIN_CMN_20080429_213502 1036.446 1042.134
+CCTV2_ECON30MIN_CMN_20080429_213502 1042.134 1049.055
+CCTV2_ECON30MIN_CMN_20080429_213502 1049.055 1064.962
+CCTV2_ECON30MIN_CMN_20080429_213502 1064.962 1074.275
+CCTV2_ECON30MIN_CMN_20080429_213502 1074.275 1081.603
+CCTV2_ECON30MIN_CMN_20080429_213502 1081.603 1084.759
+CCTV2_ECON30MIN_CMN_20080429_213502 1084.759 1092.087
+CCTV2_ECON30MIN_CMN_20080429_213502 1092.087 1098.899
+CCTV2_ECON30MIN_CMN_20080429_213502 1098.899 1099.524
+CCTV2_ECON30MIN_CMN_20080429_213502 1099.524 1103.743
+CCTV2_ECON30MIN_CMN_20080429_213502 1109.039 1112.492
+CCTV2_ECON30MIN_CMN_20080429_213502 1139.524 1155.108
+CCTV2_ECON30MIN_CMN_20080429_213502 1155.108 1161.890
+CCTV2_ECON30MIN_CMN_20080429_213502 1178.277 1183.918
+CCTV2_ECON30MIN_CMN_20080429_213502 1183.918 1189.671
+CCTV2_ECON30MIN_CMN_20080429_213502 1183.918 1189.671
+CCTV2_ECON30MIN_CMN_20080429_213502 1189.671 1202.827
+CCTV2_ECON30MIN_CMN_20080429_213502 1202.827 1206.671
+CCTV2_ECON30MIN_CMN_20080429_213502 1225.414 1233.134
+CCTV2_ECON30MIN_CMN_20080429_213502 1243.461 1267.804
+CCTV2_ECON30MIN_CMN_20080429_213502 1267.804 1270.663
+CCTV2_ECON30MIN_CMN_20080429_213502 1427.799 1429.627
+CCTV2_ECON30MIN_CMN_20080429_213502 1429.627 1435.033
+CCTV2_ECON30MIN_CMN_20080429_213502 1435.033 1456.200
+CCTV2_ECON30MIN_CMN_20080429_213502 1456.200 1467.698
+CCTV2_ECON30MIN_CMN_20080429_213502 1475.542 1486.964
+CCTV2_ECON30MIN_CMN_20080429_213502 1486.964 1493.449
+CCTV2_ECON30MIN_CMN_20080429_213502 1504.028 1519.482
+CCTV2_ECON30MIN_CMN_20080429_213502 1519.482 1532.405
+CCTV2_ECON30MIN_CMN_20080429_213502 1532.405 1534.343
+CCTV2_ECON30MIN_CMN_20080429_213502 1553.046 1560.554
+CCTV2_ECON30MIN_CMN_20080429_213502 1600.757 1610.648
+CCTV2_ECON30MIN_CMN_20080429_213502 1610.648 1615.944
+CCTV2_ECON30MIN_CMN_20080429_213502 1615.944 1620.664
+CCTV1_30MINNEWS_CMN_20080331_115901 73.617 74.726
+CCTV1_30MINNEWS_CMN_20080331_115901 80.039 81.273
+CCTV1_30MINNEWS_CMN_20080331_115901 81.273 83.273
+CCTV1_30MINNEWS_CMN_20080331_115901 83.273 91.837
+CCTV1_30MINNEWS_CMN_20080331_115901 91.837 104.369
+CCTV1_30MINNEWS_CMN_20080331_115901 104.369 115.744
+CCTV1_30MINNEWS_CMN_20080331_115901 119.213 129.463
+CCTV1_30MINNEWS_CMN_20080331_115901 160.767 172.268
+CCTV1_30MINNEWS_CMN_20080331_115901 172.268 180.705
+CCTV1_30MINNEWS_CMN_20080331_115901 241.745 256.432
+CCTV1_30MINNEWS_CMN_20080331_115901 263.026 281.104
+CCTV1_30MINNEWS_CMN_20080331_115901 317.540 326.435
+CCTV1_30MINNEWS_CMN_20080331_115901 326.435 330.040
+CCTV1_30MINNEWS_CMN_20080331_115901 330.040 342.509
+CCTV1_30MINNEWS_CMN_20080331_115901 358.868 368.258
+CCTV1_30MINNEWS_CMN_20080331_115901 368.258 374.914
+CCTV1_30MINNEWS_CMN_20080331_115901 374.914 379.211
+CCTV1_30MINNEWS_CMN_20080331_115901 379.211 385.023
+CCTV1_30MINNEWS_CMN_20080331_115901 385.023 393.261
+CCTV1_30MINNEWS_CMN_20080331_115901 393.261 399.898
+CCTV1_30MINNEWS_CMN_20080331_115901 399.898 407.117
+CCTV1_30MINNEWS_CMN_20080331_115901 407.117 411.743
+CCTV1_30MINNEWS_CMN_20080331_115901 411.743 418.165
+CCTV1_30MINNEWS_CMN_20080331_115901 431.499 437.077
+CCTV1_30MINNEWS_CMN_20080331_115901 459.808 464.745
+CCTV1_30MINNEWS_CMN_20080331_115901 489.702 498.905
+CCTV1_30MINNEWS_CMN_20080331_115901 505.249 523.030
+CCTV1_30MINNEWS_CMN_20080331_115901 523.030 525.812
+CCTV1_30MINNEWS_CMN_20080331_115901 525.812 529.171
+CCTV1_30MINNEWS_CMN_20080331_115901 529.171 536.467
+CCTV1_30MINNEWS_CMN_20080331_115901 544.186 547.390
+CCTV1_30MINNEWS_CMN_20080331_115901 547.390 552.718
+CCTV1_30MINNEWS_CMN_20080331_115901 559.780 562.702
+CCTV1_30MINNEWS_CMN_20080331_115901 562.702 571.076
+CCTV1_30MINNEWS_CMN_20080331_115901 579.545 581.358
+CCTV1_30MINNEWS_CMN_20080331_115901 595.700 602.029
+CCTV1_30MINNEWS_CMN_20080331_115901 602.029 622.670
+CCTV1_30MINNEWS_CMN_20080331_115901 622.670 626.592
+CCTV1_30MINNEWS_CMN_20080331_115901 626.592 642.310
+CCTV1_30MINNEWS_CMN_20080331_115901 642.310 647.277
+CCTV1_30MINNEWS_CMN_20080331_115901 657.824 672.325
+CCTV1_30MINNEWS_CMN_20080331_115901 672.325 683.684
+CCTV1_30MINNEWS_CMN_20080331_115901 691.637 704.684
+CCTV1_30MINNEWS_CMN_20080331_115901 704.684 716.388
+CCTV1_30MINNEWS_CMN_20080331_115901 727.200 734.200
+CCTV1_30MINNEWS_CMN_20080331_115901 734.200 739.404
+CCTV1_30MINNEWS_CMN_20080331_115901 739.404 753.747
+CCTV1_30MINNEWS_CMN_20080331_115901 753.747 769.246
+CCTV1_30MINNEWS_CMN_20080331_115901 769.246 776.356
+CCTV1_30MINNEWS_CMN_20080331_115901 776.356 782.591
+CCTV1_30MINNEWS_CMN_20080331_115901 782.591 792.498
+CCTV1_30MINNEWS_CMN_20080331_115901 798.516 808.045
+CCTV1_30MINNEWS_CMN_20080331_115901 808.045 817.280
+CCTV1_30MINNEWS_CMN_20080331_115901 821.077 826.984
+CCTV1_30MINNEWS_CMN_20080331_115901 826.984 833.734
+CCTV1_30MINNEWS_CMN_20080331_115901 833.734 851.468
+CCTV1_30MINNEWS_CMN_20080331_115901 851.468 862.796
+CCTV1_30MINNEWS_CMN_20080331_115901 862.796 866.515
+CCTV1_30MINNEWS_CMN_20080331_115901 873.765 881.358
+CCTV1_30MINNEWS_CMN_20080331_115901 881.358 886.076
+CCTV1_30MINNEWS_CMN_20080331_115901 901.453 909.078
+CCTV1_30MINNEWS_CMN_20080331_115901 909.078 914.546
+CCTV1_30MINNEWS_CMN_20080331_115901 914.546 928.921
+CCTV1_30MINNEWS_CMN_20080331_115901 928.921 932.890
+CCTV1_30MINNEWS_CMN_20080331_115901 932.890 944.984
+CCTV1_30MINNEWS_CMN_20080331_115901 944.984 955.482
+CCTV1_30MINNEWS_CMN_20080331_115901 955.482 963.498
+CCTV1_30MINNEWS_CMN_20080331_115901 963.498 967.934
+CCTV1_30MINNEWS_CMN_20080331_115901 981.646 989.476
+CCTV1_30MINNEWS_CMN_20080331_115901 989.476 994.055
+CCTV1_30MINNEWS_CMN_20080331_115901 994.055 999.685
+CCTV1_30MINNEWS_CMN_20080331_115901 999.685 1002.986
+CCTV1_30MINNEWS_CMN_20080331_115901 1002.986 1007.358
+CCTV1_30MINNEWS_CMN_20080331_115901 1022.343 1024.796
+CCTV1_30MINNEWS_CMN_20080331_115901 1024.796 1033.076
+CCTV1_30MINNEWS_CMN_20080331_115901 1033.076 1038.904
+CCTV1_30MINNEWS_CMN_20080331_115901 1170.476 1172.413
+CCTV1_30MINNEWS_CMN_20080331_115901 1172.413 1185.274
+CCTV1_30MINNEWS_CMN_20080331_115901 1185.274 1197.461
+CCTV1_30MINNEWS_CMN_20080331_115901 1197.461 1199.852
+CCTV1_30MINNEWS_CMN_20080331_115901 1199.852 1211.524
+CCTV1_30MINNEWS_CMN_20080331_115901 1211.524 1216.060
+CCTV1_30MINNEWS_CMN_20080331_115901 1216.060 1224.138
+CCTV1_30MINNEWS_CMN_20080331_115901 1224.138 1229.075
+CCTV1_30MINNEWS_CMN_20080331_115901 1238.449 1247.293
+CCTV1_30MINNEWS_CMN_20080331_115901 1247.293 1252.653
+CCTV1_30MINNEWS_CMN_20080331_115901 1252.653 1255.934
+CCTV1_30MINNEWS_CMN_20080331_115901 1255.934 1258.209
+CCTV1_30MINNEWS_CMN_20080331_115901 1258.209 1260.319
+CCTV1_30MINNEWS_CMN_20080331_115901 1260.319 1262.084
+CCTV1_30MINNEWS_CMN_20080331_115901 1262.084 1263.787
+CCTV1_30MINNEWS_CMN_20080331_115901 1263.787 1265.037
+CCTV1_30MINNEWS_CMN_20080331_115901 1265.037 1266.787
+CCTV1_30MINNEWS_CMN_20080331_115901 1266.787 1270.902
+CCTV1_30MINNEWS_CMN_20080331_115901 1270.902 1274.011
+CCTV1_30MINNEWS_CMN_20080331_115901 1274.011 1286.870
+CCTV1_30MINNEWS_CMN_20080331_115901 1286.870 1288.917
+CCTV1_30MINNEWS_CMN_20080331_115901 1288.917 1289.433
+CCTV1_30MINNEWS_CMN_20080331_115901 1289.433 1292.465
+CCTV1_30MINNEWS_CMN_20080331_115901 1292.465 1294.265
+CCTV1_30MINNEWS_CMN_20080331_115901 1294.265 1295.437
+CCTV1_30MINNEWS_CMN_20080331_115901 1295.437 1296.327
+CCTV1_30MINNEWS_CMN_20080331_115901 1296.327 1297.296
+CCTV1_30MINNEWS_CMN_20080331_115901 1297.296 1299.422
+CCTV1_30MINNEWS_CMN_20080331_115901 1299.422 1311.845
+CCTV1_30MINNEWS_CMN_20080331_115901 1311.845 1315.578
+CCTV1_30MINNEWS_CMN_20080331_115901 1315.578 1319.797
+CCTV1_30MINNEWS_CMN_20080331_115901 1319.797 1320.765
+CCTV1_30MINNEWS_CMN_20080331_115901 1319.797 1320.765
+CCTV1_30MINNEWS_CMN_20080331_115901 1320.765 1332.406
+CCTV1_30MINNEWS_CMN_20080331_115901 1320.765 1332.406
+CCTV1_30MINNEWS_CMN_20080331_115901 1332.406 1337.515
+CCTV1_30MINNEWS_CMN_20080331_115901 1337.515 1341.140
+CCTV1_30MINNEWS_CMN_20080331_115901 1343.702 1345.577
+CCTV1_30MINNEWS_CMN_20080331_115901 1345.577 1348.954
+CCTV1_30MINNEWS_CMN_20080331_115901 1351.329 1353.640
+CCTV1_30MINNEWS_CMN_20080331_115901 1353.640 1355.233
+CCTV1_30MINNEWS_CMN_20080331_115901 1355.233 1368.092
+CCTV1_30MINNEWS_CMN_20080331_115901 1368.092 1375.092
+CCTV1_30MINNEWS_CMN_20080331_115901 1375.092 1383.467
+CCTV1_30MINNEWS_CMN_20080331_115901 1393.358 1397.264
+CCTV1_30MINNEWS_CMN_20080331_115901 1397.264 1400.952
+CCTV1_30MINNEWS_CMN_20080331_115901 1400.952 1404.608
+CCTV1_30MINNEWS_CMN_20080331_115901 1404.608 1407.062
+CCTV1_30MINNEWS_CMN_20080331_115901 1407.062 1417.687
+CCTV1_30MINNEWS_CMN_20080331_115901 1417.687 1428.921
+CCTV1_30MINNEWS_CMN_20080331_115901 1428.921 1443.156
+CCTV1_30MINNEWS_CMN_20080331_115901 1443.156 1461.169
+CCTV1_30MINNEWS_CMN_20080331_115901 1461.169 1474.654
+CCTV1_30MINNEWS_CMN_20080331_115901 1489.887 1498.260
+CCTV1_30MINNEWS_CMN_20080331_115901 1498.260 1504.838
+CCTV1_30MINNEWS_CMN_20080331_115901 1504.838 1519.034
+CCTV1_30MINNEWS_CMN_20080331_115901 1519.034 1535.248
+CCTV1_30MINNEWS_CMN_20080331_115901 1547.687 1549.578
+CCTV1_30MINNEWS_CMN_20080331_115901 1549.578 1554.985
+CCTV1_30MINNEWS_CMN_20080331_115901 1561.313 1566.219
+CCTV1_30MINNEWS_CMN_20080331_115901 1566.219 1570.172
+CCTV1_30MINNEWS_CMN_20080331_115901 1570.172 1581.719
+CCTV1_30MINNEWS_CMN_20080331_115901 1581.719 1588.672
+CCTV1_30MINNEWS_CMN_20080331_115901 1588.672 1601.765
+CCTV1_30MINNEWS_CMN_20080331_115901 1601.765 1608.937
+CCTV1_30MINNEWS_CMN_20080331_115901 1608.937 1616.234
+CCTV1_30MINNEWS_CMN_20080331_115901 1616.234 1625.828
+CCTV1_30MINNEWS_CMN_20080331_115901 1846.629 1848.832
+CCTV1_30MINNEWS_CMN_20080331_115901 1850.125 1850.781
+VOA_INTNLNEWS_CMN_20080405_210000 241.323 247.150
+VOA_INTNLNEWS_CMN_20080405_210000 247.150 251.744
+VOA_INTNLNEWS_CMN_20080405_210000 251.744 255.400
+VOA_INTNLNEWS_CMN_20080405_210000 255.400 257.431
+VOA_INTNLNEWS_CMN_20080405_210000 257.431 267.384
+VOA_INTNLNEWS_CMN_20080405_210000 278.525 287.353
+VOA_INTNLNEWS_CMN_20080405_210000 300.697 314.009
+VOA_INTNLNEWS_CMN_20080405_210000 314.009 324.435
+VOA_INTNLNEWS_CMN_20080405_210000 324.435 336.433
+VOA_INTNLNEWS_CMN_20080405_210000 345.128 356.003
+VOA_INTNLNEWS_CMN_20080405_210000 356.003 364.050
+VOA_INTNLNEWS_CMN_20080405_210000 388.740 394.638
+VOA_INTNLNEWS_CMN_20080405_210000 394.638 403.500
+VOA_INTNLNEWS_CMN_20080405_210000 410.141 417.594
+VOA_INTNLNEWS_CMN_20080405_210000 417.594 433.219
+VOA_INTNLNEWS_CMN_20080405_210000 433.219 444.904
+VOA_INTNLNEWS_CMN_20080405_210000 451.888 456.731
+VOA_INTNLNEWS_CMN_20080405_210000 456.731 465.545
+VOA_INTNLNEWS_CMN_20080405_210000 474.531 480.203
+VOA_INTNLNEWS_CMN_20080405_210000 480.203 492.173
+VOA_INTNLNEWS_CMN_20080405_210000 492.173 498.877
+VOA_INTNLNEWS_CMN_20080405_210000 508.627 515.190
+VOA_INTNLNEWS_CMN_20080405_210000 515.190 526.337
+VOA_INTNLNEWS_CMN_20080405_210000 533.728 537.589
+VOA_INTNLNEWS_CMN_20080405_210000 537.589 541.542
+VOA_INTNLNEWS_CMN_20080405_210000 541.542 546.667
+VOA_INTNLNEWS_CMN_20080405_210000 546.667 554.995
+VOA_INTNLNEWS_CMN_20080405_210000 554.995 561.245
+VOA_INTNLNEWS_CMN_20080405_210000 561.245 574.462
+VOA_INTNLNEWS_CMN_20080405_210000 574.462 580.541
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 194.217 196.091
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 224.759 232.647
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 232.647 236.478
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 236.478 246.296
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 246.296 257.193
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 257.193 263.645
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 306.465 318.667
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 334.963 345.449
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 345.449 357.893
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 357.893 366.346
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 374.234 381.156
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 381.156 394.735
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 394.735 403.516
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 403.516 407.391
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 407.391 414.399
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 429.306 434.618
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 434.618 442.602
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 442.602 452.473
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 452.473 456.395
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 456.395 467.000
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 476.077 481.061
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 494.779 501.368
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 501.368 511.505
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 511.505 513.373
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 513.373 515.929
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 515.929 517.534
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 515.929 517.534
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 520.732 537.795
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 520.732 537.795
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 556.955 563.067
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 572.161 591.108
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 591.108 601.233
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 605.483 614.831
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 614.831 617.626
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 617.626 618.724
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 618.724 634.905
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 634.905 641.994
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 641.994 648.898
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 648.898 658.165
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 658.165 663.817
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 663.817 672.755
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 692.757 708.900
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 708.900 716.876
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 716.876 722.007
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 716.876 722.007
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 722.007 736.251
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 722.007 736.251
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 753.188 759.204
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 759.204 769.884
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 769.884 779.588
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 779.588 790.806
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 790.806 808.312
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 858.909 877.177
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 877.177 880.099
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 880.099 886.446
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 886.446 894.568
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 902.255 913.428
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 918.992 933.258
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 938.398 941.008
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 941.008 948.540
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 948.540 952.509
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 984.493 994.446
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 994.446 1001.087
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1001.087 1007.149
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1013.260 1020.865
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1020.865 1026.865
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1026.865 1032.937
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1039.890 1050.215
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1059.761 1092.511
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1092.511 1102.448
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1102.448 1106.449
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1117.543 1127.884
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1142.950 1146.668
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1146.668 1161.809
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1161.809 1173.591
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1173.591 1178.669
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1178.669 1194.075
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1209.637 1211.934
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1211.934 1222.203
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1222.203 1234.531
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1242.343 1246.827
+CCTVNEWS_EVENINGNEWS_CMN_20080409_225701 1246.827 1255.561
+CCTV2_ECON30MIN_CMN_20080413_213502 208.213 211.369
+CCTV2_ECON30MIN_CMN_20080413_213502 211.369 218.374
+CCTV2_ECON30MIN_CMN_20080413_213502 233.129 242.004
+CCTV2_ECON30MIN_CMN_20080413_213502 242.004 246.754
+CCTV2_ECON30MIN_CMN_20080413_213502 246.754 264.056
+CCTV2_ECON30MIN_CMN_20080413_213502 273.068 288.756
+CCTV2_ECON30MIN_CMN_20080413_213502 298.741 312.650
+CCTV2_ECON30MIN_CMN_20080413_213502 312.650 326.290
+CCTV2_ECON30MIN_CMN_20080413_213502 326.290 336.873
+CCTV2_ECON30MIN_CMN_20080413_213502 346.589 355.948
+CCTV2_ECON30MIN_CMN_20080413_213502 362.400 371.540
+CCTV2_ECON30MIN_CMN_20080413_213502 371.540 386.955
+CCTV2_ECON30MIN_CMN_20080413_213502 405.161 421.808
+CCTV2_ECON30MIN_CMN_20080413_213502 421.808 430.292
+CCTV2_ECON30MIN_CMN_20080413_213502 660.711 672.368
+CCTV2_ECON30MIN_CMN_20080413_213502 672.368 682.375
+CCTV2_ECON30MIN_CMN_20080413_213502 682.375 688.956
+CCTV2_ECON30MIN_CMN_20080413_213502 688.956 698.549
+CCTV2_ECON30MIN_CMN_20080413_213502 720.636 732.885
+CCTV2_ECON30MIN_CMN_20080413_213502 732.885 743.069
+CCTV2_ECON30MIN_CMN_20080413_213502 743.069 756.537
+CCTV2_ECON30MIN_CMN_20080413_213502 756.537 768.318
+CCTV2_ECON30MIN_CMN_20080413_213502 768.318 775.506
+CCTV2_ECON30MIN_CMN_20080413_213502 795.163 806.256
+CCTV2_ECON30MIN_CMN_20080413_213502 806.256 821.639
+CCTV2_ECON30MIN_CMN_20080413_213502 821.639 831.932
+CCTV2_ECON30MIN_CMN_20080413_213502 831.932 837.499
+CCTV2_ECON30MIN_CMN_20080413_213502 854.461 868.133
+CCTV2_ECON30MIN_CMN_20080413_213502 906.759 914.539
+CCTV2_ECON30MIN_CMN_20080413_213502 941.805 949.137
+CCTV2_ECON30MIN_CMN_20080413_213502 959.950 967.309
+CCTV2_ECON30MIN_CMN_20080413_213502 967.309 973.907
+CCTV2_ECON30MIN_CMN_20080413_213502 973.907 981.032
+CCTV2_ECON30MIN_CMN_20080413_213502 981.032 991.667
+CCTV2_ECON30MIN_CMN_20080413_213502 1002.235 1019.177
+CCTV2_ECON30MIN_CMN_20080413_213502 1019.177 1024.302
+CCTV2_ECON30MIN_CMN_20080413_213502 1047.938 1061.093
+CCTV2_ECON30MIN_CMN_20080413_213502 1061.093 1073.890
+CCTV2_ECON30MIN_CMN_20080413_213502 1073.890 1094.129
+CCTV2_ECON30MIN_CMN_20080413_213502 1094.129 1108.023
+CCTV2_ECON30MIN_CMN_20080413_213502 1264.879 1276.133
+CCTV2_ECON30MIN_CMN_20080413_213502 1276.133 1287.357
+CCTV2_ECON30MIN_CMN_20080413_213502 1287.357 1301.355
+CCTV2_ECON30MIN_CMN_20080413_213502 1329.068 1334.870
+CCTV2_ECON30MIN_CMN_20080413_213502 1361.145 1373.848
+CCTV2_ECON30MIN_CMN_20080413_213502 1390.971 1397.675
+CCTV2_ECON30MIN_CMN_20080413_213502 1397.675 1415.110
+CCTV2_ECON30MIN_CMN_20080413_213502 1415.110 1424.236
+CCTV2_ECON30MIN_CMN_20080413_213502 1424.236 1438.064
+CCTV2_ECON30MIN_CMN_20080413_213502 1456.827 1464.000
+CCTV2_ECON30MIN_CMN_20080413_213502 1464.000 1486.858
+CCTV2_ECON30MIN_CMN_20080413_213502 1486.858 1499.305
+CCTV2_ECON30MIN_CMN_20080413_213502 1499.305 1515.107
+CCTV2_ECON30MIN_CMN_20080413_213502 1515.107 1520.123
+CCTV2_ECON30MIN_CMN_20080413_213502 1520.123 1530.536
+CCTV2_ECON30MIN_CMN_20080413_213502 1530.536 1540.050
+CCTV2_ECON30MIN_CMN_20080413_213502 1540.050 1545.190
+CCTV2_ECON30MIN_CMN_20080413_213502 1566.258 1575.446
+CCTV2_ECON30MIN_CMN_20080413_213502 1575.446 1581.649
+CCTV2_ECON30MIN_CMN_20080413_213502 1581.649 1587.055
+CCTV2_ECON30MIN_CMN_20080413_213502 1587.055 1596.101
+CCTV2_ECON30MIN_CMN_20080413_213502 1596.101 1609.832
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 41.494 55.430
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 63.633 70.334
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 70.334 82.217
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 85.069 86.918
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 86.918 104.433
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 104.433 122.361
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 122.361 139.830
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 139.830 161.504
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 182.247 190.984
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 214.840 233.859
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 233.859 252.835
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 252.835 271.994
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 271.994 280.401
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 280.401 294.922
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 294.922 309.979
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 309.979 321.865
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 321.865 333.689
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 360.506 369.039
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 386.794 410.411
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 410.411 423.219
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 423.219 431.124
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 431.124 444.651
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 444.651 454.196
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 454.196 476.136
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 476.136 484.636
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 484.636 498.139
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 498.139 512.507
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 512.507 520.292
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 520.292 541.753
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 541.753 554.984
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 577.799 583.837
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 583.837 592.310
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 631.139 636.376
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 636.376 648.503
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 648.503 665.774
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 683.499 693.733
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 710.185 723.930
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 741.368 754.566
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 754.566 770.758
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 770.758 788.656
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 832.151 840.624
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 867.918 877.762
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 877.762 894.080
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 894.080 909.343
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 909.343 918.295
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 918.295 927.038
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 927.038 938.559
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 938.559 949.571
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 997.048 1006.323
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1006.323 1016.137
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1016.137 1023.473
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1048.644 1059.284
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1059.284 1062.182
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1062.182 1079.547
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1079.547 1093.676
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1093.676 1112.846
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1112.846 1128.445
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1128.445 1140.062
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1140.062 1142.520
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1224.588 1240.750
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1240.750 1254.433
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1254.433 1261.433
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1307.295 1314.985
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1336.393 1351.139
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1398.387 1404.010
+CCTV7_MILITARYNEWS1_CMN_20080407_100502 1398.387 1404.010
+CCTV1_30MINNEWS_CMN_20080412_115901 80.926 83.940
+CCTV1_30MINNEWS_CMN_20080412_115901 83.940 86.956
+CCTV1_30MINNEWS_CMN_20080412_115901 86.956 95.722
+CCTV1_30MINNEWS_CMN_20080412_115901 95.722 102.331
+CCTV1_30MINNEWS_CMN_20080412_115901 102.331 104.050
+CCTV1_30MINNEWS_CMN_20080412_115901 104.050 104.738
+CCTV1_30MINNEWS_CMN_20080412_115901 108.866 109.679
+CCTV1_30MINNEWS_CMN_20080412_115901 109.679 112.710
+CCTV1_30MINNEWS_CMN_20080412_115901 112.710 119.007
+CCTV1_30MINNEWS_CMN_20080412_115901 126.272 129.179
+CCTV1_30MINNEWS_CMN_20080412_115901 129.179 133.695
+CCTV1_30MINNEWS_CMN_20080412_115901 133.695 145.680
+CCTV1_30MINNEWS_CMN_20080412_115901 163.445 174.538
+CCTV1_30MINNEWS_CMN_20080412_115901 174.538 184.553
+CCTV1_30MINNEWS_CMN_20080412_115901 184.553 197.568
+CCTV1_30MINNEWS_CMN_20080412_115901 197.568 205.833
+CCTV1_30MINNEWS_CMN_20080412_115901 205.833 213.754
+CCTV1_30MINNEWS_CMN_20080412_115901 213.754 218.723
+CCTV1_30MINNEWS_CMN_20080412_115901 218.723 225.676
+CCTV1_30MINNEWS_CMN_20080412_115901 225.676 233.880
+CCTV1_30MINNEWS_CMN_20080412_115901 233.880 253.645
+CCTV1_30MINNEWS_CMN_20080412_115901 253.645 263.879
+CCTV1_30MINNEWS_CMN_20080412_115901 263.879 271.129
+CCTV1_30MINNEWS_CMN_20080412_115901 271.129 282.957
+CCTV1_30MINNEWS_CMN_20080412_115901 319.911 330.457
+CCTV1_30MINNEWS_CMN_20080412_115901 330.457 338.597
+CCTV1_30MINNEWS_CMN_20080412_115901 345.644 349.644
+CCTV1_30MINNEWS_CMN_20080412_115901 349.644 360.925
+CCTV1_30MINNEWS_CMN_20080412_115901 360.925 367.784
+CCTV1_30MINNEWS_CMN_20080412_115901 367.784 371.175
+CCTV1_30MINNEWS_CMN_20080412_115901 371.175 378.488
+CCTV1_30MINNEWS_CMN_20080412_115901 378.488 386.160
+CCTV1_30MINNEWS_CMN_20080412_115901 386.160 397.328
+CCTV1_30MINNEWS_CMN_20080412_115901 397.328 401.468
+CCTV1_30MINNEWS_CMN_20080412_115901 401.468 408.671
+CCTV1_30MINNEWS_CMN_20080412_115901 408.671 424.587
+CCTV1_30MINNEWS_CMN_20080412_115901 431.477 441.383
+CCTV1_30MINNEWS_CMN_20080412_115901 441.383 446.402
+CCTV1_30MINNEWS_CMN_20080412_115901 446.402 458.857
+CCTV1_30MINNEWS_CMN_20080412_115901 458.857 472.053
+CCTV1_30MINNEWS_CMN_20080412_115901 481.927 496.896
+CCTV1_30MINNEWS_CMN_20080412_115901 506.767 513.564
+CCTV1_30MINNEWS_CMN_20080412_115901 513.564 523.861
+CCTV1_30MINNEWS_CMN_20080412_115901 571.548 580.423
+CCTV1_30MINNEWS_CMN_20080412_115901 580.423 605.876
+CCTV1_30MINNEWS_CMN_20080412_115901 615.611 627.142
+CCTV1_30MINNEWS_CMN_20080412_115901 627.142 635.502
+CCTV1_30MINNEWS_CMN_20080412_115901 649.205 658.799
+CCTV1_30MINNEWS_CMN_20080412_115901 658.799 676.893
+CCTV1_30MINNEWS_CMN_20080412_115901 685.096 702.752
+CCTV1_30MINNEWS_CMN_20080412_115901 702.752 715.221
+CCTV1_30MINNEWS_CMN_20080412_115901 715.221 727.174
+CCTV1_30MINNEWS_CMN_20080412_115901 745.080 772.361
+CCTV1_30MINNEWS_CMN_20080412_115901 791.486 795.361
+CCTV1_30MINNEWS_CMN_20080412_115901 833.033 851.267
+CCTV1_30MINNEWS_CMN_20080412_115901 851.267 875.314
+CCTV1_30MINNEWS_CMN_20080412_115901 875.314 886.939
+CCTV1_30MINNEWS_CMN_20080412_115901 912.139 917.780
+CCTV1_30MINNEWS_CMN_20080412_115901 917.780 929.780
+CCTV1_30MINNEWS_CMN_20080412_115901 933.608 949.881
+CCTV1_30MINNEWS_CMN_20080412_115901 958.408 965.627
+CCTV1_30MINNEWS_CMN_20080412_115901 965.627 976.409
+CCTV1_30MINNEWS_CMN_20080412_115901 976.409 984.925
+CCTV1_30MINNEWS_CMN_20080412_115901 984.925 995.519
+CCTV1_30MINNEWS_CMN_20080412_115901 1000.409 1005.034
+CCTV1_30MINNEWS_CMN_20080412_115901 1005.034 1018.659
+CCTV1_30MINNEWS_CMN_20080412_115901 1018.659 1030.800
+CCTV1_30MINNEWS_CMN_20080412_115901 1045.160 1051.316
+CCTV1_30MINNEWS_CMN_20080412_115901 1051.316 1059.441
+CCTV1_30MINNEWS_CMN_20080412_115901 1089.312 1101.710
+CCTV1_30MINNEWS_CMN_20080412_115901 1101.710 1112.179
+CCTV1_30MINNEWS_CMN_20080412_115901 1112.179 1118.024
+CCTV1_30MINNEWS_CMN_20080412_115901 1124.539 1147.571
+CCTV1_30MINNEWS_CMN_20080412_115901 1147.571 1158.104
+CCTV1_30MINNEWS_CMN_20080412_115901 1158.104 1171.181
+CCTV1_30MINNEWS_CMN_20080412_115901 1171.181 1180.818
+CCTV1_30MINNEWS_CMN_20080412_115901 1193.083 1197.263
+CCTV1_30MINNEWS_CMN_20080412_115901 1197.263 1204.529
+CCTV1_30MINNEWS_CMN_20080412_115901 1204.529 1218.545
+CCTV1_30MINNEWS_CMN_20080412_115901 1218.545 1231.561
+CCTV1_30MINNEWS_CMN_20080412_115901 1252.429 1260.523
+CCTV1_30MINNEWS_CMN_20080412_115901 1272.023 1275.383
+CCTV1_30MINNEWS_CMN_20080412_115901 1298.226 1305.149
+CCTV1_30MINNEWS_CMN_20080412_115901 1305.149 1316.836
+CCTV1_30MINNEWS_CMN_20080412_115901 1323.367 1332.101
+CCTV1_30MINNEWS_CMN_20080412_115901 1332.101 1342.210
+CCTV1_30MINNEWS_CMN_20080412_115901 1342.210 1349.007
+CCTV1_30MINNEWS_CMN_20080412_115901 1366.695 1382.445
+CCTV1_30MINNEWS_CMN_20080412_115901 1540.370 1546.854
+CCTV1_30MINNEWS_CMN_20080412_115901 1546.854 1550.791
+CCTV1_30MINNEWS_CMN_20080412_115901 1550.791 1561.557
+CCTV1_30MINNEWS_CMN_20080412_115901 1568.588 1571.698
+CCTV1_30MINNEWS_CMN_20080412_115901 1571.698 1580.170
+CCTV1_30MINNEWS_CMN_20080412_115901 1580.170 1588.310
+CCTV1_30MINNEWS_CMN_20080412_115901 1588.310 1600.342
+CCTV1_30MINNEWS_CMN_20080412_115901 1600.342 1610.046
+CCTV1_30MINNEWS_CMN_20080412_115901 1610.046 1613.703
+CCTV1_30MINNEWS_CMN_20080412_115901 1619.016 1623.672
+CCTV1_30MINNEWS_CMN_20080412_115901 1623.672 1629.328
+CCTV1_30MINNEWS_CMN_20080412_115901 1629.328 1637.673
+CCTV1_30MINNEWS_CMN_20080412_115901 1637.673 1648.391
+CCTV1_30MINNEWS_CMN_20080412_115901 1854.095 1856.252
+CCTV1_30MINNEWS_CMN_20080412_115901 1856.252 1857.267
+CCTV1_30MINNEWS_CMN_20080412_115901 1857.267 1857.783
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 0.000 6.209
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 6.209 14.137
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 14.137 25.409
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 56.230 67.279
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 67.279 75.786
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 75.786 91.538
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 91.538 97.838
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 107.527 112.089
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 112.089 117.851
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 177.413 187.973
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 187.973 197.117
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 197.117 206.431
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 216.552 222.820
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 232.880 244.537
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 244.537 257.031
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 257.031 271.126
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 271.126 277.080
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 277.080 284.303
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 284.303 292.725
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 292.725 299.345
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 299.345 307.728
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 307.728 316.964
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 330.867 339.701
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 339.701 348.106
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 383.836 394.883
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 394.883 403.519
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 403.519 411.191
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 417.562 422.476
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 422.476 428.471
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 428.471 434.140
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 439.906 446.165
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 460.084 471.696
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 471.696 483.546
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 489.277 492.839
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 492.839 496.932
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 496.932 499.181
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 506.400 510.542
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 510.542 522.075
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 532.889 543.500
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 553.763 561.616
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 561.616 565.194
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 565.194 576.406
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 576.406 582.604
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 582.604 585.370
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 585.370 590.525
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 590.525 594.384
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 594.384 600.650
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 600.650 605.541
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 605.541 612.902
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 612.902 627.800
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 648.252 660.430
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 660.430 666.944
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 666.944 680.285
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 680.285 685.957
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 685.957 690.644
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 701.901 707.461
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 707.461 716.447
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 716.447 730.499
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 730.499 738.106
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 738.106 752.569
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 754.147 764.707
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 764.707 783.005
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 783.005 790.243
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 795.550 799.011
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 799.011 806.711
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 806.711 815.031
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 815.031 818.687
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 818.687 826.091
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 826.091 836.154
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 847.478 854.207
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 865.210 870.939
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 870.939 881.520
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 881.520 894.535
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 929.264 935.125
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 935.125 942.689
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 942.689 948.783
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 948.783 954.407
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 954.407 962.304
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 962.304 971.676
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 971.676 986.820
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 986.820 994.328
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 994.328 1001.110
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1001.110 1007.420
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1007.420 1015.874
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1015.874 1022.275
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1022.275 1028.525
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1028.525 1032.088
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1032.088 1037.024
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1037.024 1047.630
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1052.618 1059.477
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1059.477 1063.976
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1063.976 1070.377
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1077.798 1086.407
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1100.922 1114.396
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1114.396 1125.215
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1125.215 1129.683
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1129.683 1131.987
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1136.777 1140.322
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1140.322 1152.498
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1162.401 1170.247
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1170.247 1189.349
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1189.349 1196.433
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1196.433 1205.715
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1205.715 1212.479
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1212.479 1223.854
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1223.854 1230.425
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1230.425 1237.377
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1237.377 1244.424
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1244.424 1254.561
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1254.561 1265.790
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1265.790 1270.665
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1277.494 1280.134
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1289.368 1292.259
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1292.259 1298.733
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1298.733 1302.608
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1302.608 1316.293
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1316.293 1326.167
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1333.263 1338.404
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1338.404 1346.823
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1352.621 1356.664
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1356.664 1363.276
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1363.276 1372.490
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1372.490 1379.177
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1379.177 1383.066
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1383.066 1387.386
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1387.386 1397.544
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1404.185 1414.703
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1414.703 1422.074
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1422.074 1431.980
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1431.980 1441.341
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1441.341 1447.137
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1447.137 1459.038
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1459.038 1475.083
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1475.083 1483.076
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1493.375 1497.437
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1514.216 1524.013
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1524.013 1527.857
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1527.857 1539.279
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1565.558 1577.942
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1622.428 1632.069
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1632.069 1638.460
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1638.460 1650.273
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1650.273 1655.414
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1667.326 1674.123
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1674.123 1679.279
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1679.279 1688.012
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1688.012 1698.197
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1719.824 1726.402
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1726.402 1737.182
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1737.182 1748.083
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1748.083 1756.557
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1756.557 1766.777
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1775.551 1788.988
+VOA_INTNLNEWSFINANCE_CMN_20080415_100000 1860.292 1864.465
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 229.180 237.415
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 243.947 254.619
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 264.244 276.026
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 288.369 294.651
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 308.307 313.432
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 313.432 320.619
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 320.619 329.103
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 332.337 344.759
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 344.759 354.103
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 364.118 375.884
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 375.884 385.353
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 406.227 413.322
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 413.322 424.729
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 424.729 433.920
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 452.153 460.502
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 460.502 466.299
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 466.299 469.565
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 469.565 474.753
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 474.753 488.612
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 488.612 498.597
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 498.597 504.144
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 504.144 508.097
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 525.810 533.163
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 533.163 537.476
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 537.476 546.600
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 546.600 553.570
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 560.402 568.074
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 580.003 582.206
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 590.860 595.842
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 595.842 609.405
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 609.405 620.888
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 620.888 632.544
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 632.544 642.998
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 642.998 653.451
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 653.451 667.795
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 687.561 695.092
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 695.092 699.733
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 699.733 715.342
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 730.780 744.987
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 744.987 755.440
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 796.736 800.174
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 800.174 817.909
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 817.909 824.753
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 824.753 836.908
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 836.908 846.471
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 846.471 861.268
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 861.268 874.314
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 884.834 888.397
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 888.397 895.982
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 895.982 899.138
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 899.138 903.748
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 903.748 913.935
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 913.935 919.154
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 919.154 922.607
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 922.607 929.652
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 929.652 934.232
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 934.232 943.607
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 943.607 950.935
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 950.935 957.185
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 957.185 973.558
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 973.558 977.152
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 977.152 980.980
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 985.996 989.731
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 989.731 998.090
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 998.090 1007.763
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1007.763 1009.638
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1009.638 1021.888
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1021.888 1033.873
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1033.873 1045.576
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1045.576 1050.295
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1086.728 1096.728
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1096.728 1103.462
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1103.462 1109.239
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1109.239 1118.910
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1118.910 1129.004
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1129.004 1142.035
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1142.035 1155.051
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1168.342 1171.998
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1171.998 1182.890
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1182.890 1191.624
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1191.624 1195.624
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1195.624 1199.534
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1199.534 1209.394
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1212.972 1218.441
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1218.441 1235.176
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1235.176 1241.364
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1248.768 1255.424
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1255.424 1266.361
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1266.361 1276.876
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1276.876 1292.220
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1292.220 1307.377
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1322.478 1328.166
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1328.166 1337.447
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1337.447 1345.728
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1345.728 1351.729
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1351.729 1358.275
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1379.307 1385.697
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1385.697 1390.916
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1390.916 1398.745
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1398.745 1412.479
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1412.479 1425.304
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1425.304 1440.275
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1448.205 1452.111
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1452.111 1457.689
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1457.689 1461.111
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1461.111 1476.548
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1476.548 1485.204
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1491.967 1499.498
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1499.498 1506.389
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1506.389 1520.295
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1520.295 1527.420
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1547.699 1551.184
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1551.184 1561.871
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1561.871 1568.934
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1568.934 1572.355
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1575.449 1579.027
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1579.027 1587.698
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1587.698 1604.995
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1604.995 1613.573
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1613.573 1621.761
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1621.761 1632.432
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1632.432 1641.151
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1641.151 1646.135
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1646.135 1658.603
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1673.385 1681.588
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1681.588 1686.761
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1686.761 1703.323
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1753.838 1762.369
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1786.430 1800.117
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1809.304 1821.382
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1830.975 1835.415
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1844.977 1853.587
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1853.587 1863.275
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1863.275 1871.963
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1883.900 1904.103
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1904.103 1914.353
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1914.353 1922.291
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1922.291 1935.119
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1935.119 1948.479
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1948.479 1950.854
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1967.191 1969.659
+VOA_INTNLNEWSFINANCE_CMN_20080411_100000 1969.659 1972.737
+CCTV2_ECON30MIN_CMN_20080423_213501 257.111 259.656
+CCTV2_ECON30MIN_CMN_20080423_213501 270.987 293.998
+CCTV2_ECON30MIN_CMN_20080423_213501 293.998 295.295
+CCTV2_ECON30MIN_CMN_20080423_213501 295.295 314.875
+CCTV2_ECON30MIN_CMN_20080423_213501 314.875 329.497
+CCTV2_ECON30MIN_CMN_20080423_213501 329.497 338.712
+CCTV2_ECON30MIN_CMN_20080423_213501 356.732 361.929
+CCTV2_ECON30MIN_CMN_20080423_213501 426.477 440.252
+CCTV2_ECON30MIN_CMN_20080423_213501 440.252 456.741
+CCTV2_ECON30MIN_CMN_20080423_213501 456.741 469.049
+CCTV2_ECON30MIN_CMN_20080423_213501 469.049 475.314
+CCTV2_ECON30MIN_CMN_20080423_213501 508.324 525.129
+CCTV2_ECON30MIN_CMN_20080423_213501 525.129 537.675
+CCTV2_ECON30MIN_CMN_20080423_213501 553.289 558.181
+CCTV2_ECON30MIN_CMN_20080423_213501 572.881 587.284
+CCTV2_ECON30MIN_CMN_20080423_213501 817.822 832.679
+CCTV2_ECON30MIN_CMN_20080423_213501 832.679 841.440
+CCTV2_ECON30MIN_CMN_20080423_213501 841.440 847.785
+CCTV2_ECON30MIN_CMN_20080423_213501 890.420 901.191
+CCTV2_ECON30MIN_CMN_20080423_213501 901.191 912.491
+CCTV2_ECON30MIN_CMN_20080423_213501 912.491 918.909
+CCTV2_ECON30MIN_CMN_20080423_213501 918.909 930.183
+CCTV2_ECON30MIN_CMN_20080423_213501 930.183 937.214
+CCTV2_ECON30MIN_CMN_20080423_213501 937.214 953.527
+CCTV2_ECON30MIN_CMN_20080423_213501 968.529 986.358
+CCTV2_ECON30MIN_CMN_20080423_213501 986.358 994.568
+CCTV2_ECON30MIN_CMN_20080423_213501 1028.755 1038.023
+CCTV2_ECON30MIN_CMN_20080423_213501 1045.818 1057.145
+CCTV2_ECON30MIN_CMN_20080423_213501 1057.145 1078.709
+CCTV2_ECON30MIN_CMN_20080423_213501 1078.709 1097.516
+CCTV2_ECON30MIN_CMN_20080423_213501 1114.016 1121.094
+CCTV2_ECON30MIN_CMN_20080423_213501 1324.918 1340.824
+CCTV2_ECON30MIN_CMN_20080423_213501 1340.824 1353.066
+CCTV2_ECON30MIN_CMN_20080423_213501 1353.066 1359.097
+CCTV2_ECON30MIN_CMN_20080423_213501 1391.998 1399.365
+CCTV2_ECON30MIN_CMN_20080423_213501 1399.365 1406.887
+CCTV2_ECON30MIN_CMN_20080423_213501 1406.887 1410.143
+CCTV2_ECON30MIN_CMN_20080423_213501 1410.143 1418.096
+CCTV2_ECON30MIN_CMN_20080423_213501 1418.096 1428.893
+CCTV2_ECON30MIN_CMN_20080423_213501 1428.893 1433.531
+CCTV2_ECON30MIN_CMN_20080423_213501 1456.409 1462.994
+CCTV2_ECON30MIN_CMN_20080423_213501 1462.994 1481.916
+CCTV2_ECON30MIN_CMN_20080423_213501 1481.916 1488.323
+CCTV2_ECON30MIN_CMN_20080423_213501 1505.084 1515.523
+CCTV2_ECON30MIN_CMN_20080423_213501 1515.523 1530.069
+CCTV2_ECON30MIN_CMN_20080423_213501 1530.069 1535.456
+CCTV2_ECON30MIN_CMN_20080423_213501 1554.411 1568.680
+CCTV2_ECON30MIN_CMN_20080423_213501 1568.680 1583.993
+CCTV2_ECON30MIN_CMN_20080423_213501 1583.993 1593.065
+CCTV2_ECON30MIN_CMN_20080423_213501 1593.065 1600.186
+CCTV2_ECON30MIN_CMN_20080423_213501 1600.186 1609.109
+CCTV2_ECON30MIN_CMN_20080423_213501 1609.109 1620.586
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 228.670 235.140
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 235.140 241.281
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 241.281 256.851
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 256.851 268.256
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 268.256 271.662
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 271.662 280.334
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 280.334 287.334
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 287.334 302.708
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 342.838 352.322
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 371.353 379.040
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 393.133 404.040
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 404.040 411.118
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 411.118 419.876
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 419.876 427.518
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 443.440 453.533
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 453.533 465.909
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 477.347 493.488
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 493.488 505.346
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 505.346 507.502
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 524.803 532.773
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 532.773 536.740
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 536.740 544.603
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 544.603 552.325
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 574.973 583.386
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 583.386 585.324
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 595.953 601.206
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 610.646 616.245
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 616.245 620.483
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 620.483 625.968
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 632.408 641.799
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 641.799 656.066
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 656.066 665.494
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 665.494 674.462
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 674.462 686.311
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 686.311 702.718
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 702.718 712.045
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 712.045 721.967
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 721.967 732.983
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 732.983 740.842
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 740.842 748.546
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 748.546 758.374
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 768.404 774.435
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 774.435 793.214
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 793.214 802.782
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 802.782 809.888
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 809.888 823.026
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 823.026 839.997
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 844.840 862.114
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 862.114 869.449
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 875.199 877.730
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 902.958 906.661
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 906.661 926.504
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 926.504 932.162
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 956.694 966.225
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 985.527 993.902
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 993.902 997.292
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1001.995 1009.291
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1018.187 1037.327
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1037.327 1045.826
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1056.277 1065.386
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1065.386 1068.602
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1082.352 1086.228
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1086.228 1096.932
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1096.932 1116.244
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1116.244 1121.573
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1121.573 1130.073
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1130.073 1136.079
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1149.807 1153.057
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1153.057 1168.401
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1171.636 1182.964
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1182.964 1192.762
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1206.058 1217.215
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1217.215 1227.840
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1246.206 1260.893
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1260.893 1270.159
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1270.159 1280.331
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1290.518 1301.143
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1319.558 1322.651
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1322.651 1328.870
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1328.870 1336.168
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1336.168 1345.808
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1349.683 1356.731
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1356.731 1374.168
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1374.168 1382.510
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1389.009 1392.665
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1392.665 1402.228
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1409.462 1412.602
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1423.993 1429.801
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1459.942 1475.678
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1475.678 1488.006
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1513.202 1526.216
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1526.216 1539.716
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1568.467 1585.357
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1597.434 1612.745
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1612.745 1621.478
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1632.400 1639.025
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1655.683 1658.557
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1667.837 1675.163
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1691.413 1695.117
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1704.339 1707.356
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1707.356 1716.528
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1716.528 1729.587
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1732.477 1744.962
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1751.024 1757.102
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1768.321 1774.070
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1774.070 1789.398
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1789.398 1794.399
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1806.649 1817.460
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1831.199 1841.760
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1871.079 1883.047
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1897.064 1914.168
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1914.168 1925.184
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1942.262 1945.615
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1951.428 1959.475
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1959.475 1970.481
+VOA_INTNLNEWSFINANCE_CMN_20080403_100000 1970.481 1975.716
+CCTV2_ECON30MIN_CMN_20080410_213502 225.090 227.481
+CCTV2_ECON30MIN_CMN_20080410_213502 227.481 235.622
+CCTV2_ECON30MIN_CMN_20080410_213502 240.606 249.762
+CCTV2_ECON30MIN_CMN_20080410_213502 257.513 265.842
+CCTV2_ECON30MIN_CMN_20080410_213502 265.842 272.701
+CCTV2_ECON30MIN_CMN_20080410_213502 272.701 277.638
+CCTV2_ECON30MIN_CMN_20080410_213502 277.638 285.185
+CCTV2_ECON30MIN_CMN_20080410_213502 285.185 290.951
+CCTV2_ECON30MIN_CMN_20080410_213502 311.701 317.263
+CCTV2_ECON30MIN_CMN_20080410_213502 317.263 327.200
+CCTV2_ECON30MIN_CMN_20080410_213502 327.200 335.013
+CCTV2_ECON30MIN_CMN_20080410_213502 335.013 340.045
+CCTV2_ECON30MIN_CMN_20080410_213502 340.045 347.374
+CCTV2_ECON30MIN_CMN_20080410_213502 347.374 358.265
+CCTV2_ECON30MIN_CMN_20080410_213502 365.786 369.505
+CCTV2_ECON30MIN_CMN_20080410_213502 369.505 374.177
+CCTV2_ECON30MIN_CMN_20080410_213502 388.365 393.114
+CCTV2_ECON30MIN_CMN_20080410_213502 393.114 401.630
+CCTV2_ECON30MIN_CMN_20080410_213502 401.630 406.193
+CCTV2_ECON30MIN_CMN_20080410_213502 406.193 414.770
+CCTV2_ECON30MIN_CMN_20080410_213502 414.770 426.552
+CCTV2_ECON30MIN_CMN_20080410_213502 426.552 433.099
+CCTV2_ECON30MIN_CMN_20080410_213502 433.099 443.490
+CCTV2_ECON30MIN_CMN_20080410_213502 462.037 477.474
+CCTV2_ECON30MIN_CMN_20080410_213502 492.693 513.350
+CCTV2_ECON30MIN_CMN_20080410_213502 696.780 698.296
+CCTV2_ECON30MIN_CMN_20080410_213502 698.296 710.390
+CCTV2_ECON30MIN_CMN_20080410_213502 710.390 719.952
+CCTV2_ECON30MIN_CMN_20080410_213502 719.952 723.077
+CCTV2_ECON30MIN_CMN_20080410_213502 734.647 742.085
+CCTV2_ECON30MIN_CMN_20080410_213502 752.757 765.305
+CCTV2_ECON30MIN_CMN_20080410_213502 765.305 774.820
+CCTV2_ECON30MIN_CMN_20080410_213502 774.820 788.883
+CCTV2_ECON30MIN_CMN_20080410_213502 803.226 809.507
+CCTV2_ECON30MIN_CMN_20080410_213502 809.507 827.668
+CCTV2_ECON30MIN_CMN_20080410_213502 827.668 838.527
+CCTV2_ECON30MIN_CMN_20080410_213502 838.527 857.199
+CCTV2_ECON30MIN_CMN_20080410_213502 857.199 871.230
+CCTV2_ECON30MIN_CMN_20080410_213502 871.230 886.668
+CCTV2_ECON30MIN_CMN_20080410_213502 886.668 896.292
+CCTV2_ECON30MIN_CMN_20080410_213502 896.292 908.886
+CCTV2_ECON30MIN_CMN_20080410_213502 913.808 926.136
+CCTV2_ECON30MIN_CMN_20080410_213502 930.855 932.043
+CCTV2_ECON30MIN_CMN_20080410_213502 932.043 936.637
+CCTV2_ECON30MIN_CMN_20080410_213502 936.637 945.698
+CCTV2_ECON30MIN_CMN_20080410_213502 957.105 966.277
+CCTV2_ECON30MIN_CMN_20080410_213502 988.324 993.496
+CCTV2_ECON30MIN_CMN_20080410_213502 993.496 1001.808
+CCTV2_ECON30MIN_CMN_20080410_213502 1001.808 1005.512
+CCTV2_ECON30MIN_CMN_20080410_213502 1005.512 1018.247
+CCTV2_ECON30MIN_CMN_20080410_213502 1018.247 1034.600
+CCTV2_ECON30MIN_CMN_20080410_213502 1034.600 1035.960
+CCTV2_ECON30MIN_CMN_20080410_213502 1035.960 1037.491
+CCTV2_ECON30MIN_CMN_20080410_213502 1037.491 1038.444
+CCTV2_ECON30MIN_CMN_20080410_213502 1038.444 1045.928
+CCTV2_ECON30MIN_CMN_20080410_213502 1045.928 1055.772
+CCTV2_ECON30MIN_CMN_20080410_213502 1055.772 1059.084
+CCTV2_ECON30MIN_CMN_20080410_213502 1059.084 1063.834
+CCTV2_ECON30MIN_CMN_20080410_213502 1063.834 1074.537
+CCTV2_ECON30MIN_CMN_20080410_213502 1074.537 1088.068
+CCTV2_ECON30MIN_CMN_20080410_213502 1088.068 1093.545
+CCTV2_ECON30MIN_CMN_20080410_213502 1220.024 1221.727
+CCTV2_ECON30MIN_CMN_20080410_213502 1221.727 1231.145
+CCTV2_ECON30MIN_CMN_20080410_213502 1239.350 1241.569
+CCTV2_ECON30MIN_CMN_20080410_213502 1241.569 1253.335
+CCTV2_ECON30MIN_CMN_20080410_213502 1253.335 1259.333
+CCTV2_ECON30MIN_CMN_20080410_213502 1266.469 1275.360
+CCTV2_ECON30MIN_CMN_20080410_213502 1275.360 1280.281
+CCTV2_ECON30MIN_CMN_20080410_213502 1280.281 1286.203
+CCTV2_ECON30MIN_CMN_20080410_213502 1286.203 1297.750
+CCTV2_ECON30MIN_CMN_20080410_213502 1297.750 1309.984
+CCTV2_ECON30MIN_CMN_20080410_213502 1317.016 1328.313
+CCTV2_ECON30MIN_CMN_20080410_213502 1328.313 1334.734
+CCTV2_ECON30MIN_CMN_20080410_213502 1361.375 1367.984
+CCTV2_ECON30MIN_CMN_20080410_213502 1367.984 1378.327
+CCTV2_ECON30MIN_CMN_20080410_213502 1386.014 1402.983
+CCTV2_ECON30MIN_CMN_20080410_213502 1402.983 1418.014
+CCTV2_ECON30MIN_CMN_20080410_213502 1418.014 1422.592
+CCTV2_ECON30MIN_CMN_20080410_213502 1422.592 1428.530
+CCTV2_ECON30MIN_CMN_20080410_213502 1428.530 1436.826
+CCTV2_ECON30MIN_CMN_20080410_213502 1436.826 1451.029
+CCTV2_ECON30MIN_CMN_20080410_213502 1459.560 1465.950
+CCTV2_ECON30MIN_CMN_20080410_213502 1465.950 1478.440
+CCTV2_ECON30MIN_CMN_20080410_213502 1495.753 1503.222
+CCTV2_ECON30MIN_CMN_20080410_213502 1503.222 1519.346
+CCTV2_ECON30MIN_CMN_20080410_213502 1519.346 1525.048
+CCTV2_ECON30MIN_CMN_20080410_213502 1525.048 1531.063
+CCTV2_ECON30MIN_CMN_20080410_213502 1531.063 1541.047
+CCTV2_ECON30MIN_CMN_20080410_213502 1544.984 1558.626
+CCTV2_ECON30MIN_CMN_20080410_213502 1574.580 1581.252
+CCTV2_ECON30MIN_CMN_20080410_213502 1581.252 1598.721
+CCTV2_ECON30MIN_CMN_20080410_213502 1598.721 1601.393
+CCTV2_ECON30MIN_CMN_20080410_213502 1601.393 1616.362
+CCTV2_ECON30MIN_CMN_20080410_213502 1616.362 1620.664
+VOA_INTNLNEWS_CMN_20080414_210000 82.073 91.340
+VOA_INTNLNEWS_CMN_20080414_210000 91.340 104.121
+VOA_INTNLNEWS_CMN_20080414_210000 104.121 117.911
+VOA_INTNLNEWS_CMN_20080414_210000 117.911 128.005
+VOA_INTNLNEWS_CMN_20080414_210000 141.247 146.906
+VOA_INTNLNEWS_CMN_20080414_210000 241.538 249.738
+VOA_INTNLNEWS_CMN_20080414_210000 251.238 258.783
+VOA_INTNLNEWS_CMN_20080414_210000 258.783 268.638
+VOA_INTNLNEWS_CMN_20080414_210000 284.448 289.745
+VOA_INTNLNEWS_CMN_20080414_210000 289.745 292.152
+VOA_INTNLNEWS_CMN_20080414_210000 292.152 303.074
+VOA_INTNLNEWS_CMN_20080414_210000 311.325 314.435
+VOA_INTNLNEWS_CMN_20080414_210000 314.435 322.060
+VOA_INTNLNEWS_CMN_20080414_210000 322.060 329.721
+VOA_INTNLNEWS_CMN_20080414_210000 329.721 340.456
+VOA_INTNLNEWS_CMN_20080414_210000 340.456 349.638
+VOA_INTNLNEWS_CMN_20080414_210000 349.638 356.842
+VOA_INTNLNEWS_CMN_20080414_210000 356.842 364.437
+VOA_INTNLNEWS_CMN_20080414_210000 372.703 380.018
+VOA_INTNLNEWS_CMN_20080414_210000 395.253 401.985
+VOA_INTNLNEWS_CMN_20080414_210000 401.985 411.500
+VOA_INTNLNEWS_CMN_20080414_210000 422.194 429.763
+VOA_INTNLNEWS_CMN_20080414_210000 452.385 458.816
+VOA_INTNLNEWS_CMN_20080414_210000 458.816 465.895
+VOA_INTNLNEWS_CMN_20080414_210000 465.895 472.326
+VOA_INTNLNEWS_CMN_20080414_210000 472.326 481.544
+VOA_INTNLNEWS_CMN_20080414_210000 481.544 486.544
+VOA_INTNLNEWS_CMN_20080414_210000 486.544 497.197
+VOA_INTNLNEWS_CMN_20080414_210000 507.341 515.887
+VOA_INTNLNEWS_CMN_20080414_210000 515.887 520.481
+VOA_INTNLNEWS_CMN_20080414_210000 527.916 532.573
+VOA_INTNLNEWS_CMN_20080414_210000 532.573 536.504
+VOA_INTNLNEWS_CMN_20080414_210000 536.504 549.290
+VOA_INTNLNEWS_CMN_20080414_210000 549.290 558.026
+VOA_INTNLNEWS_CMN_20080414_210000 558.026 562.568
+VOA_INTNLNEWS_CMN_20080414_210000 562.568 566.506
+VOA_INTNLNEWS_CMN_20080414_210000 578.866 584.798
+VOA_INTNLNEWS_CMN_20080414_210000 590.454 593.891
+VOA_INTNLNEWS_CMN_20080414_210000 593.891 603.734
+VOA_INTNLNEWS_CMN_20080414_210000 620.629 628.771
+VOA_INTNLNEWS_CMN_20080414_210000 628.771 636.857
+VOA_INTNLNEWS_CMN_20080414_210000 644.956 650.628
+VOA_INTNLNEWS_CMN_20080414_210000 658.018 666.769
+VOA_INTNLNEWS_CMN_20080414_210000 666.769 672.722
+VOA_INTNLNEWS_CMN_20080414_210000 685.177 692.764
+VOA_INTNLNEWS_CMN_20080414_210000 692.764 702.571
+VOA_INTNLNEWS_CMN_20080414_210000 708.654 711.794
+VOA_INTNLNEWS_CMN_20080414_210000 711.794 717.701
+VOA_INTNLNEWS_CMN_20080414_210000 717.701 724.168
+VOA_INTNLNEWS_CMN_20080414_210000 724.168 734.327
+VOA_INTNLNEWS_CMN_20080414_210000 734.327 737.387
+VOA_INTNLNEWS_CMN_20080414_210000 737.387 744.859
+VOA_INTNLNEWS_CMN_20080414_210000 744.859 747.671
+VOA_INTNLNEWS_CMN_20080414_210000 747.671 753.470
+VOA_INTNLNEWS_CMN_20080414_210000 764.262 769.788
+VOA_INTNLNEWS_CMN_20080414_210000 769.788 777.624
+VOA_INTNLNEWS_CMN_20080414_210000 777.624 781.690
+VOA_INTNLNEWS_CMN_20080414_210000 788.307 795.221
+VOA_INTNLNEWS_CMN_20080414_210000 795.221 803.125
+VOA_INTNLNEWS_CMN_20080414_210000 803.125 811.720
+VOA_INTNLNEWS_CMN_20080414_210000 811.720 823.220
+VOA_INTNLNEWS_CMN_20080414_210000 825.376 833.180
+VOA_INTNLNEWS_CMN_20080414_210000 833.180 838.941
+VOA_INTNLNEWS_CMN_20080414_210000 846.844 852.814
+VOA_INTNLNEWS_CMN_20080414_210000 857.455 861.693
+VOA_INTNLNEWS_CMN_20080414_210000 861.693 870.299
+VOA_INTNLNEWS_CMN_20080414_210000 876.556 881.322
+VOA_INTNLNEWS_CMN_20080414_210000 881.322 888.547
+VOA_INTNLNEWS_CMN_20080414_210000 893.971 900.658
+VOA_INTNLNEWS_CMN_20080414_210000 900.658 906.502
+VOA_INTNLNEWS_CMN_20080414_210000 906.502 913.487
+VOA_INTNLNEWS_CMN_20080414_210000 913.487 925.228
+VOA_INTNLNEWS_CMN_20080414_210000 925.228 927.650
+VOA_INTNLNEWS_CMN_20080414_210000 939.893 941.348
+VOA_INTNLNEWS_CMN_20080414_210000 941.348 947.683
+VOA_INTNLNEWS_CMN_20080414_210000 947.683 955.816
+VOA_INTNLNEWS_CMN_20080414_210000 955.816 960.000
+CCTV2_ECON30MIN_CMN_20080412_213501 230.003 232.570
+CCTV2_ECON30MIN_CMN_20080412_213501 241.633 249.273
+CCTV2_ECON30MIN_CMN_20080412_213501 249.273 258.554
+CCTV2_ECON30MIN_CMN_20080412_213501 310.601 315.507
+CCTV2_ECON30MIN_CMN_20080412_213501 328.208 333.644
+CCTV2_ECON30MIN_CMN_20080412_213501 333.644 338.456
+CCTV2_ECON30MIN_CMN_20080412_213501 338.456 349.418
+CCTV2_ECON30MIN_CMN_20080412_213501 366.356 377.754
+CCTV2_ECON30MIN_CMN_20080412_213501 377.754 394.637
+CCTV2_ECON30MIN_CMN_20080412_213501 394.637 397.372
+CCTV2_ECON30MIN_CMN_20080412_213501 397.372 414.310
+CCTV2_ECON30MIN_CMN_20080412_213501 414.310 431.075
+CCTV2_ECON30MIN_CMN_20080412_213501 460.897 471.147
+CCTV2_ECON30MIN_CMN_20080412_213501 481.710 485.970
+CCTV2_ECON30MIN_CMN_20080412_213501 491.599 494.084
+CCTV2_ECON30MIN_CMN_20080412_213501 507.515 518.140
+CCTV2_ECON30MIN_CMN_20080412_213501 523.233 538.499
+CCTV2_ECON30MIN_CMN_20080412_213501 538.499 546.655
+CCTV2_ECON30MIN_CMN_20080412_213501 546.655 555.311
+CCTV2_ECON30MIN_CMN_20080412_213501 562.240 570.115
+CCTV2_ECON30MIN_CMN_20080412_213501 591.646 595.943
+CCTV2_ECON30MIN_CMN_20080412_213501 790.444 791.757
+CCTV2_ECON30MIN_CMN_20080412_213501 791.757 799.741
+CCTV2_ECON30MIN_CMN_20080412_213501 799.741 814.960
+CCTV2_ECON30MIN_CMN_20080412_213501 814.960 823.022
+CCTV2_ECON30MIN_CMN_20080412_213501 823.022 825.616
+CCTV2_ECON30MIN_CMN_20080412_213501 825.616 835.084
+CCTV2_ECON30MIN_CMN_20080412_213501 841.258 842.164
+CCTV2_ECON30MIN_CMN_20080412_213501 861.010 869.166
+CCTV2_ECON30MIN_CMN_20080412_213501 869.166 876.838
+CCTV2_ECON30MIN_CMN_20080412_213501 876.838 889.775
+CCTV2_ECON30MIN_CMN_20080412_213501 948.904 956.123
+CCTV2_ECON30MIN_CMN_20080412_213501 993.843 1001.155
+CCTV2_ECON30MIN_CMN_20080412_213501 1001.155 1015.952
+CCTV2_ECON30MIN_CMN_20080412_213501 1051.327 1056.874
+CCTV2_ECON30MIN_CMN_20080412_213501 1056.874 1061.968
+CCTV2_ECON30MIN_CMN_20080412_213501 1061.968 1072.672
+CCTV2_ECON30MIN_CMN_20080412_213501 1072.672 1077.703
+CCTV2_ECON30MIN_CMN_20080412_213501 1077.703 1087.063
+CCTV2_ECON30MIN_CMN_20080412_213501 1087.063 1099.782
+CCTV2_ECON30MIN_CMN_20080412_213501 1115.376 1118.423
+CCTV2_ECON30MIN_CMN_20080412_213501 1118.423 1125.585
+CCTV2_ECON30MIN_CMN_20080412_213501 1125.585 1130.366
+CCTV2_ECON30MIN_CMN_20080412_213501 1149.351 1158.899
+CCTV2_ECON30MIN_CMN_20080412_213501 1158.899 1174.383
+CCTV2_ECON30MIN_CMN_20080412_213501 1174.383 1189.883
+CCTV2_ECON30MIN_CMN_20080412_213501 1189.883 1199.320
+CCTV2_ECON30MIN_CMN_20080412_213501 1208.457 1211.988
+CCTV2_ECON30MIN_CMN_20080412_213501 1211.988 1222.581
+CCTV2_ECON30MIN_CMN_20080412_213501 1389.942 1391.270
+CCTV2_ECON30MIN_CMN_20080412_213501 1391.270 1405.864
+CCTV2_ECON30MIN_CMN_20080412_213501 1405.864 1426.352
+CCTV2_ECON30MIN_CMN_20080412_213501 1426.352 1430.414
+CCTV2_ECON30MIN_CMN_20080412_213501 1430.414 1442.632
+CCTV2_ECON30MIN_CMN_20080412_213501 1442.632 1451.585
+CCTV2_ECON30MIN_CMN_20080412_213501 1451.585 1460.022
+CCTV2_ECON30MIN_CMN_20080412_213501 1460.022 1473.334
+CCTV2_ECON30MIN_CMN_20080412_213501 1473.334 1484.584
+CCTV2_ECON30MIN_CMN_20080412_213501 1484.584 1488.506
+CCTV2_ECON30MIN_CMN_20080412_213501 1488.506 1501.132
+CCTV2_ECON30MIN_CMN_20080412_213501 1501.132 1508.052
+CCTV2_ECON30MIN_CMN_20080412_213501 1516.572 1520.182
+CCTV2_ECON30MIN_CMN_20080412_213501 1520.182 1530.340
+CCTV2_ECON30MIN_CMN_20080412_213501 1530.340 1544.714
+CCTV2_ECON30MIN_CMN_20080412_213501 1544.714 1552.714
+CCTV2_ECON30MIN_CMN_20080412_213501 1561.079 1576.573
+CCTV2_ECON30MIN_CMN_20080412_213501 1576.573 1590.170
+CCTV2_ECON30MIN_CMN_20080412_213501 1598.543 1611.565
+CCTV2_ECON30MIN_CMN_20080412_213501 1611.565 1616.752
+CCTV2_ECON30MIN_CMN_20080412_213501 1616.752 1620.586
+CCTV1_30MINNEWS_CMN_20080407_115901 72.599 80.144
+CCTV1_30MINNEWS_CMN_20080407_115901 80.144 83.857
+CCTV1_30MINNEWS_CMN_20080407_115901 83.857 105.129
+CCTV1_30MINNEWS_CMN_20080407_115901 116.117 121.357
+CCTV1_30MINNEWS_CMN_20080407_115901 150.030 161.497
+CCTV1_30MINNEWS_CMN_20080407_115901 161.497 175.629
+CCTV1_30MINNEWS_CMN_20080407_115901 213.663 230.995
+CCTV1_30MINNEWS_CMN_20080407_115901 230.995 249.179
+CCTV1_30MINNEWS_CMN_20080407_115901 291.761 304.379
+CCTV1_30MINNEWS_CMN_20080407_115901 304.379 319.932
+CCTV1_30MINNEWS_CMN_20080407_115901 319.932 333.454
+CCTV1_30MINNEWS_CMN_20080407_115901 333.454 352.479
+CCTV1_30MINNEWS_CMN_20080407_115901 370.503 377.776
+CCTV1_30MINNEWS_CMN_20080407_115901 416.755 433.159
+CCTV1_30MINNEWS_CMN_20080407_115901 451.599 461.390
+CCTV1_30MINNEWS_CMN_20080407_115901 461.390 474.803
+CCTV1_30MINNEWS_CMN_20080407_115901 474.803 484.535
+CCTV1_30MINNEWS_CMN_20080407_115901 484.535 506.059
+CCTV1_30MINNEWS_CMN_20080407_115901 506.059 526.838
+CCTV1_30MINNEWS_CMN_20080407_115901 526.838 549.979
+CCTV1_30MINNEWS_CMN_20080407_115901 549.979 564.409
+CCTV1_30MINNEWS_CMN_20080407_115901 564.409 575.066
+CCTV1_30MINNEWS_CMN_20080407_115901 575.066 589.108
+CCTV1_30MINNEWS_CMN_20080407_115901 589.108 596.139
+CCTV1_30MINNEWS_CMN_20080407_115901 596.139 629.826
+CCTV1_30MINNEWS_CMN_20080407_115901 629.826 640.245
+CCTV1_30MINNEWS_CMN_20080407_115901 640.245 661.855
+CCTV1_30MINNEWS_CMN_20080407_115901 661.855 679.314
+CCTV1_30MINNEWS_CMN_20080407_115901 682.941 700.337
+CCTV1_30MINNEWS_CMN_20080407_115901 737.969 766.080
+CCTV1_30MINNEWS_CMN_20080407_115901 945.161 963.631
+CCTV1_30MINNEWS_CMN_20080407_115901 963.631 978.990
+CCTV1_30MINNEWS_CMN_20080407_115901 978.990 990.241
+CCTV1_30MINNEWS_CMN_20080407_115901 990.241 1013.970
+CCTV1_30MINNEWS_CMN_20080407_115901 1052.265 1064.960
+CCTV1_30MINNEWS_CMN_20080407_115901 1095.197 1111.780
+CCTV1_30MINNEWS_CMN_20080407_115901 1153.333 1174.020
+CCTV1_30MINNEWS_CMN_20080407_115901 1197.400 1213.209
+CCTV1_30MINNEWS_CMN_20080407_115901 1213.209 1226.592
+CCTV1_30MINNEWS_CMN_20080407_115901 1296.113 1312.308
+CCTV1_30MINNEWS_CMN_20080407_115901 1312.308 1324.524
+CCTV1_30MINNEWS_CMN_20080407_115901 1324.524 1333.815
+CCTV1_30MINNEWS_CMN_20080407_115901 1333.815 1345.312
+CCTV1_30MINNEWS_CMN_20080407_115901 1345.312 1362.742
+CCTV1_30MINNEWS_CMN_20080407_115901 1362.742 1378.300
+CCTV1_30MINNEWS_CMN_20080407_115901 1412.752 1430.177
+CCTV1_30MINNEWS_CMN_20080407_115901 1430.177 1438.558
+CCTV1_30MINNEWS_CMN_20080407_115901 1438.558 1453.235
+CCTV1_30MINNEWS_CMN_20080407_115901 1479.408 1492.582
+CCTV1_30MINNEWS_CMN_20080407_115901 1492.582 1501.444
+CCTV1_30MINNEWS_CMN_20080407_115901 1501.444 1516.022
+CCTV1_30MINNEWS_CMN_20080407_115901 1541.756 1561.663
+CCTV1_30MINNEWS_CMN_20080407_115901 1561.663 1576.635
+CCTV1_30MINNEWS_CMN_20080407_115901 1576.635 1589.839
+CCTV1_30MINNEWS_CMN_20080407_115901 1589.839 1606.458
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 211.536 215.364
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 215.364 218.114
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 218.114 237.536
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 248.239 257.426
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 257.426 269.504
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 273.606 279.903
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 285.113 296.675
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 309.998 318.529
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 318.529 327.607
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 327.607 337.982
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 337.982 350.607
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 350.607 357.904
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 357.904 368.388
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 368.388 373.263
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 373.263 380.278
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 380.278 392.778
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 392.778 404.935
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 404.935 410.279
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 424.810 441.669
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 463.857 470.404
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 492.464 498.496
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 498.496 504.325
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 504.325 510.313
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 510.313 523.688
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 534.907 545.517
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 545.517 556.939
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 566.173 574.610
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 574.610 582.485
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 582.485 589.923
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 589.923 604.111
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 604.111 609.908
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 609.908 626.459
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 647.974 662.880
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 666.207 679.000
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 679.000 685.766
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 685.766 694.974
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 694.974 706.099
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 712.365 718.364
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 718.364 729.161
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 729.161 733.521
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 740.478 752.133
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 752.133 761.336
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 761.336 764.571
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 764.571 772.056
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 772.056 780.962
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 794.321 802.523
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 802.523 806.929
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 806.929 812.507
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 812.507 823.068
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 823.068 835.519
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 835.519 840.581
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 840.581 852.097
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 852.097 858.505
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 858.505 870.925
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 870.925 872.691
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 886.238 895.894
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 895.894 901.003
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 901.003 911.518
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 911.518 921.706
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 921.706 930.159
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 930.159 952.471
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 952.471 961.782
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 961.782 972.300
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 973.527 979.090
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 979.090 991.168
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1000.308 1011.245
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1048.746 1056.356
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1073.090 1080.730
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1080.730 1095.590
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1095.590 1103.982
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1110.373 1116.202
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1116.202 1128.130
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1128.130 1140.724
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1140.724 1149.976
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1149.976 1154.414
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1154.414 1165.240
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1183.615 1191.177
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1197.880 1204.599
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1204.599 1211.990
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1211.990 1224.131
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1224.131 1226.459
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1226.459 1233.224
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1233.224 1240.630
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1245.504 1249.956
+CCTVNEWS_EVENINGNEWS_CMN_20080405_225702 1249.956 1257.472
+VOA_INTNLNEWS_CMN_20080410_210000 241.096 247.022
+VOA_INTNLNEWS_CMN_20080410_210000 247.022 248.349
+VOA_INTNLNEWS_CMN_20080410_210000 261.004 271.226
+VOA_INTNLNEWS_CMN_20080410_210000 271.226 281.664
+VOA_INTNLNEWS_CMN_20080410_210000 289.743 295.118
+VOA_INTNLNEWS_CMN_20080410_210000 295.118 297.196
+VOA_INTNLNEWS_CMN_20080410_210000 297.196 303.774
+VOA_INTNLNEWS_CMN_20080410_210000 303.774 309.774
+VOA_INTNLNEWS_CMN_20080410_210000 325.212 335.493
+VOA_INTNLNEWS_CMN_20080410_210000 335.493 341.461
+VOA_INTNLNEWS_CMN_20080410_210000 341.461 347.883
+VOA_INTNLNEWS_CMN_20080410_210000 347.883 356.164
+VOA_INTNLNEWS_CMN_20080410_210000 356.164 360.430
+VOA_INTNLNEWS_CMN_20080410_210000 360.430 369.056
+VOA_INTNLNEWS_CMN_20080410_210000 369.056 381.087
+VOA_INTNLNEWS_CMN_20080410_210000 381.087 386.040
+VOA_INTNLNEWS_CMN_20080410_210000 386.040 389.509
+VOA_INTNLNEWS_CMN_20080410_210000 389.509 397.385
+VOA_INTNLNEWS_CMN_20080410_210000 397.385 401.682
+VOA_INTNLNEWS_CMN_20080410_210000 401.682 408.708
+VOA_INTNLNEWS_CMN_20080410_210000 408.708 414.473
+VOA_INTNLNEWS_CMN_20080410_210000 414.473 418.441
+VOA_INTNLNEWS_CMN_20080410_210000 425.457 435.818
+VOA_INTNLNEWS_CMN_20080410_210000 435.818 442.693
+VOA_INTNLNEWS_CMN_20080410_210000 442.693 449.365
+VOA_INTNLNEWS_CMN_20080410_210000 449.365 456.240
+VOA_INTNLNEWS_CMN_20080410_210000 456.240 459.788
+VOA_INTNLNEWS_CMN_20080410_210000 459.788 464.226
+VOA_INTNLNEWS_CMN_20080410_210000 470.648 475.663
+VOA_INTNLNEWS_CMN_20080410_210000 475.663 482.178
+VOA_INTNLNEWS_CMN_20080410_210000 482.178 492.412
+VOA_INTNLNEWS_CMN_20080410_210000 492.412 497.100
+VOA_INTNLNEWS_CMN_20080410_210000 497.100 500.990
+VOA_INTNLNEWS_CMN_20080410_210000 500.990 503.850
+VOA_INTNLNEWS_CMN_20080410_210000 503.850 515.757
+VOA_INTNLNEWS_CMN_20080410_210000 515.757 525.945
+VOA_INTNLNEWS_CMN_20080410_210000 525.945 537.039
+VOA_INTNLNEWS_CMN_20080410_210000 537.039 545.070
+VOA_INTNLNEWS_CMN_20080410_210000 545.070 549.976
+VOA_INTNLNEWS_CMN_20080410_210000 557.008 562.032
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 228.180 236.133
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 236.133 244.843
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 255.810 259.701
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 259.701 268.261
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 278.715 292.809
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 312.904 317.155
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 334.781 341.536
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 341.536 351.799
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 351.799 360.435
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 360.435 366.748
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 376.030 389.032
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 389.032 399.781
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 399.781 408.548
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 408.548 414.331
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 414.331 422.518
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 422.518 426.022
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 426.022 432.701
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 432.701 440.420
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 440.420 453.372
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 461.998 470.404
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 470.404 475.420
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 475.420 484.810
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 484.810 490.248
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 490.248 500.982
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 500.982 508.403
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 521.637 527.780
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 527.780 532.997
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 532.997 544.717
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 544.717 551.822
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 551.822 553.995
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 560.145 564.427
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 564.427 575.878
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 575.878 580.002
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 580.002 586.055
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 586.055 592.181
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 592.181 595.181
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 595.181 598.946
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 598.946 613.235
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 621.049 634.460
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 634.460 642.522
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 642.522 649.698
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 649.698 657.807
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 657.807 662.156
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 662.156 668.308
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 668.308 681.335
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 695.461 701.961
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 701.961 712.410
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 712.410 718.802
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 718.802 725.024
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 744.309 750.042
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 750.042 761.379
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 770.348 786.365
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 791.958 799.987
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 799.987 805.999
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 815.813 824.181
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 824.181 839.867
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 839.867 845.243
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 845.243 856.615
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 867.381 870.021
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 870.021 872.920
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 879.896 892.784
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 906.782 910.734
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 910.734 924.841
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 924.841 935.031
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 935.031 957.200
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 969.511 978.637
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 978.637 983.601
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 983.601 993.537
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 993.537 1011.337
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1011.337 1026.450
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1026.450 1030.840
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1030.840 1050.755
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1050.755 1063.656
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1063.656 1072.571
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1072.571 1078.711
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1078.711 1086.983
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1086.983 1100.777
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1100.777 1114.079
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1114.079 1126.798
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1163.086 1172.149
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1186.789 1188.726
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1192.203 1196.211
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1196.211 1207.461
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1207.461 1214.070
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1226.879 1229.215
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1236.640 1242.171
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1242.171 1249.543
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1257.477 1264.526
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1264.526 1268.292
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1283.046 1287.238
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1287.238 1294.534
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1294.534 1305.347
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1305.347 1316.003
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1344.598 1350.644
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1375.571 1379.886
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1379.886 1389.964
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1389.964 1397.933
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1414.520 1422.927
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1429.053 1435.522
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1435.522 1440.163
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1440.163 1451.210
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1451.210 1457.773
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1457.773 1465.993
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1470.852 1475.711
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1483.711 1490.899
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1490.899 1497.868
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1514.887 1522.070
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1526.976 1537.617
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1537.617 1543.257
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1543.257 1549.522
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1569.299 1573.908
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1587.114 1596.333
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1596.333 1601.880
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1601.880 1608.978
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1608.978 1614.837
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1622.775 1625.540
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1634.554 1643.945
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1651.340 1666.658
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1666.658 1676.939
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1682.510 1691.625
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1691.625 1704.187
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1727.775 1740.884
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1740.884 1754.195
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1754.195 1758.586
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1766.540 1788.461
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1788.461 1797.320
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1832.570 1845.677
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1852.192 1861.661
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1861.661 1865.881
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1877.756 1890.644
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1905.985 1917.337
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1917.337 1933.369
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1953.813 1959.610
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1959.610 1962.781
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1962.781 1974.406
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1974.406 1980.155
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 1980.155 1987.740
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2011.483 2013.780
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2013.780 2022.562
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2033.662 2037.700
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2037.700 2046.344
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2057.264 2071.271
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2090.929 2095.844
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2113.417 2120.572
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2120.572 2130.836
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2130.836 2139.166
+VOA_INTNLNEWSFINANCE_CMN_20080414_100000 2139.166 2146.237
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 0.000 6.406
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 6.406 25.687
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 25.687 42.264
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 42.264 50.139
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 50.139 57.030
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 57.030 64.796
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 64.796 69.874
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 69.874 78.733
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 78.733 87.607
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 87.607 97.373
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 97.373 102.279
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 108.826 116.358
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 119.811 132.706
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 132.706 159.347
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 159.347 169.519
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 193.519 198.629
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 198.629 209.509
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 236.962 246.151
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 252.495 259.980
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 259.980 277.386
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 277.386 291.652
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 291.652 295.544
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 295.544 302.231
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 302.231 323.173
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 332.360 337.564
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 337.564 340.952
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 347.515 356.637
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 367.262 373.388
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 402.855 411.900
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 411.900 420.685
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 420.685 428.325
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 428.325 434.171
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 438.031 446.500
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 446.500 455.751
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 455.751 464.548
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 471.355 486.134
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 486.134 497.166
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 497.166 506.574
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 506.574 514.181
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 514.181 520.135
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 520.135 533.384
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 533.384 540.275
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 540.275 547.757
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 547.757 552.976
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 552.976 558.383
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 558.383 567.271
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 567.271 575.239
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 581.411 584.629
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 584.629 591.848
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 596.348 601.958
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 601.958 617.017
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 617.017 633.601
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 640.751 645.814
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 645.814 659.437
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 659.437 663.542
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 663.542 668.322
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 668.322 679.681
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 679.681 690.271
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 690.271 703.386
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 703.386 713.979
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 713.979 721.657
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 721.657 732.046
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 732.046 743.477
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 768.381 774.067
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 774.067 782.489
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 782.489 788.896
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 788.896 797.457
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 805.003 816.628
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 816.628 823.926
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 823.926 832.112
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 832.112 833.815
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 839.424 852.986
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 867.282 876.986
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 876.986 892.158
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 905.111 913.809
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 913.809 916.558
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 916.558 919.496
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 919.496 927.948
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 927.948 937.875
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 937.875 955.487
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 955.487 971.142
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 979.812 988.934
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 988.934 994.982
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 994.982 1003.881
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1003.881 1009.440
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1009.440 1023.520
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1023.520 1025.488
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1025.488 1034.377
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1041.127 1045.674
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1045.674 1054.331
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1054.331 1063.112
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1063.112 1066.378
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1066.378 1076.675
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1081.284 1085.769
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1095.286 1100.661
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1100.661 1104.720
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1104.720 1113.782
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1113.782 1123.170
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1123.170 1128.733
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1128.733 1146.732
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1146.732 1151.919
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1151.919 1155.950
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1166.418 1171.902
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1171.902 1181.512
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1181.512 1187.964
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1187.964 1190.324
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1190.324 1191.746
+CCTV7_MILITARYNEWS1_CMN_20080327_100812 1191.746 1192.434
+CCTV2_ECON30MIN_CMN_20080425_213502 25.719 33.376
+CCTV2_ECON30MIN_CMN_20080425_213502 36.746 45.495
+CCTV2_ECON30MIN_CMN_20080425_213502 50.808 54.652
+CCTV2_ECON30MIN_CMN_20080425_213502 216.596 218.956
+CCTV2_ECON30MIN_CMN_20080425_213502 218.956 239.191
+CCTV2_ECON30MIN_CMN_20080425_213502 239.191 250.566
+CCTV2_ECON30MIN_CMN_20080425_213502 272.537 286.157
+CCTV2_ECON30MIN_CMN_20080425_213502 286.157 292.595
+CCTV2_ECON30MIN_CMN_20080425_213502 292.595 299.455
+CCTV2_ECON30MIN_CMN_20080425_213502 299.455 313.924
+CCTV2_ECON30MIN_CMN_20080425_213502 313.924 319.379
+CCTV2_ECON30MIN_CMN_20080425_213502 319.379 321.925
+CCTV2_ECON30MIN_CMN_20080425_213502 319.379 321.925
+CCTV2_ECON30MIN_CMN_20080425_213502 321.925 327.956
+CCTV2_ECON30MIN_CMN_20080425_213502 327.956 335.146
+CCTV2_ECON30MIN_CMN_20080425_213502 335.146 339.349
+CCTV2_ECON30MIN_CMN_20080425_213502 339.349 357.333
+CCTV2_ECON30MIN_CMN_20080425_213502 357.333 364.505
+CCTV2_ECON30MIN_CMN_20080425_213502 364.505 367.790
+CCTV2_ECON30MIN_CMN_20080425_213502 367.790 372.478
+CCTV2_ECON30MIN_CMN_20080425_213502 372.478 383.152
+CCTV2_ECON30MIN_CMN_20080425_213502 383.152 392.501
+CCTV2_ECON30MIN_CMN_20080425_213502 392.501 394.235
+CCTV2_ECON30MIN_CMN_20080425_213502 394.235 401.264
+CCTV2_ECON30MIN_CMN_20080425_213502 401.264 415.155
+CCTV2_ECON30MIN_CMN_20080425_213502 429.875 440.328
+CCTV2_ECON30MIN_CMN_20080425_213502 440.328 450.703
+CCTV2_ECON30MIN_CMN_20080425_213502 450.703 463.235
+CCTV2_ECON30MIN_CMN_20080425_213502 469.252 480.892
+CCTV2_ECON30MIN_CMN_20080425_213502 480.892 489.777
+CCTV2_ECON30MIN_CMN_20080425_213502 489.777 498.264
+CCTV2_ECON30MIN_CMN_20080425_213502 501.889 506.483
+CCTV2_ECON30MIN_CMN_20080425_213502 506.483 517.202
+CCTV2_ECON30MIN_CMN_20080425_213502 517.202 528.155
+CCTV2_ECON30MIN_CMN_20080425_213502 534.310 544.232
+CCTV2_ECON30MIN_CMN_20080425_213502 544.232 551.194
+CCTV2_ECON30MIN_CMN_20080425_213502 551.194 555.475
+CCTV2_ECON30MIN_CMN_20080425_213502 555.475 569.742
+CCTV2_ECON30MIN_CMN_20080425_213502 569.742 577.632
+CCTV2_ECON30MIN_CMN_20080425_213502 577.632 587.850
+CCTV2_ECON30MIN_CMN_20080425_213502 587.850 599.917
+CCTV2_ECON30MIN_CMN_20080425_213502 613.042 618.542
+CCTV2_ECON30MIN_CMN_20080425_213502 618.542 631.917
+CCTV2_ECON30MIN_CMN_20080425_213502 631.917 644.870
+CCTV2_ECON30MIN_CMN_20080425_213502 644.870 676.477
+CCTV2_ECON30MIN_CMN_20080425_213502 676.477 689.633
+CCTV2_ECON30MIN_CMN_20080425_213502 689.633 699.791
+CCTV2_ECON30MIN_CMN_20080425_213502 851.668 852.996
+CCTV2_ECON30MIN_CMN_20080425_213502 852.996 876.511
+CCTV2_ECON30MIN_CMN_20080425_213502 876.511 881.604
+CCTV2_ECON30MIN_CMN_20080425_213502 881.604 903.275
+CCTV2_ECON30MIN_CMN_20080425_213502 903.275 904.447
+CCTV2_ECON30MIN_CMN_20080425_213502 903.275 904.447
+CCTV2_ECON30MIN_CMN_20080425_213502 904.447 906.790
+CCTV2_ECON30MIN_CMN_20080425_213502 906.790 911.665
+CCTV2_ECON30MIN_CMN_20080425_213502 914.868 923.118
+CCTV2_ECON30MIN_CMN_20080425_213502 923.118 929.744
+CCTV2_ECON30MIN_CMN_20080425_213502 929.744 942.631
+CCTV2_ECON30MIN_CMN_20080425_213502 942.631 946.850
+CCTV2_ECON30MIN_CMN_20080425_213502 946.850 949.709
+CCTV2_ECON30MIN_CMN_20080425_213502 949.709 950.224
+CCTV2_ECON30MIN_CMN_20080425_213502 961.945 975.632
+CCTV2_ECON30MIN_CMN_20080425_213502 975.632 980.478
+CCTV2_ECON30MIN_CMN_20080425_213502 980.478 982.056
+CCTV2_ECON30MIN_CMN_20080425_213502 982.056 989.393
+CCTV2_ECON30MIN_CMN_20080425_213502 989.393 1002.122
+CCTV2_ECON30MIN_CMN_20080425_213502 1002.122 1003.029
+CCTV2_ECON30MIN_CMN_20080425_213502 1002.122 1003.029
+CCTV2_ECON30MIN_CMN_20080425_213502 1003.029 1008.045
+CCTV2_ECON30MIN_CMN_20080425_213502 1008.045 1019.029
+CCTV2_ECON30MIN_CMN_20080425_213502 1026.061 1033.936
+CCTV2_ECON30MIN_CMN_20080425_213502 1033.936 1046.405
+CCTV2_ECON30MIN_CMN_20080425_213502 1046.405 1061.850
+CCTV2_ECON30MIN_CMN_20080425_213502 1061.850 1070.678
+CCTV2_ECON30MIN_CMN_20080425_213502 1070.678 1077.507
+CCTV2_ECON30MIN_CMN_20080425_213502 1077.507 1084.179
+CCTV2_ECON30MIN_CMN_20080425_213502 1084.179 1093.492
+CCTV2_ECON30MIN_CMN_20080425_213502 1111.087 1126.793
+CCTV2_ECON30MIN_CMN_20080425_213502 1135.543 1142.245
+CCTV2_ECON30MIN_CMN_20080425_213502 1142.245 1147.538
+CCTV2_ECON30MIN_CMN_20080425_213502 1147.538 1149.476
+CCTV2_ECON30MIN_CMN_20080425_213502 1160.007 1163.570
+CCTV2_ECON30MIN_CMN_20080425_213502 1163.570 1171.992
+CCTV2_ECON30MIN_CMN_20080425_213502 1183.305 1189.123
+CCTV2_ECON30MIN_CMN_20080425_213502 1189.123 1193.841
+CCTV2_ECON30MIN_CMN_20080425_213502 1320.679 1321.835
+CCTV2_ECON30MIN_CMN_20080425_213502 1321.835 1339.758
+CCTV2_ECON30MIN_CMN_20080425_213502 1339.758 1345.945
+CCTV2_ECON30MIN_CMN_20080425_213502 1345.945 1359.070
+CCTV2_ECON30MIN_CMN_20080425_213502 1359.070 1366.007
+CCTV2_ECON30MIN_CMN_20080425_213502 1366.007 1376.663
+CCTV2_ECON30MIN_CMN_20080425_213502 1376.663 1384.632
+CCTV2_ECON30MIN_CMN_20080425_213502 1384.632 1397.725
+CCTV2_ECON30MIN_CMN_20080425_213502 1397.725 1400.023
+CCTV2_ECON30MIN_CMN_20080425_213502 1400.023 1403.734
+CCTV2_ECON30MIN_CMN_20080425_213502 1403.734 1405.265
+CCTV2_ECON30MIN_CMN_20080425_213502 1405.265 1409.281
+CCTV2_ECON30MIN_CMN_20080425_213502 1409.281 1417.828
+CCTV2_ECON30MIN_CMN_20080425_213502 1417.828 1431.453
+CCTV2_ECON30MIN_CMN_20080425_213502 1431.453 1445.022
+CCTV2_ECON30MIN_CMN_20080425_213502 1445.022 1451.636
+CCTV2_ECON30MIN_CMN_20080425_213502 1451.636 1463.714
+CCTV2_ECON30MIN_CMN_20080425_213502 1463.714 1479.306
+CCTV2_ECON30MIN_CMN_20080425_213502 1486.303 1505.256
+CCTV2_ECON30MIN_CMN_20080425_213502 1505.256 1517.693
+CCTV2_ECON30MIN_CMN_20080425_213502 1517.693 1519.912
+CCTV2_ECON30MIN_CMN_20080425_213502 1526.475 1529.428
+CCTV2_ECON30MIN_CMN_20080425_213502 1529.428 1530.366
+CCTV2_ECON30MIN_CMN_20080425_213502 1530.366 1533.631
+CCTV2_ECON30MIN_CMN_20080425_213502 1533.631 1537.912
+CCTV2_ECON30MIN_CMN_20080425_213502 1551.522 1567.381
+CCTV2_ECON30MIN_CMN_20080425_213502 1572.397 1587.053
+CCTV2_ECON30MIN_CMN_20080425_213502 1587.053 1594.929
+CCTV2_ECON30MIN_CMN_20080425_213502 1594.929 1604.288
+CCTV2_ECON30MIN_CMN_20080425_213502 1604.288 1608.975
+CCTV2_ECON30MIN_CMN_20080425_213502 1608.975 1614.209
+CCTV2_ECON30MIN_CMN_20080425_213502 1614.209 1620.429
+VOA_INTNLNEWS_CMN_20080412_210000 241.515 247.437
+VOA_INTNLNEWS_CMN_20080412_210000 247.437 251.249
+VOA_INTNLNEWS_CMN_20080412_210000 251.249 255.157
+VOA_INTNLNEWS_CMN_20080412_210000 255.157 256.985
+VOA_INTNLNEWS_CMN_20080412_210000 256.985 266.042
+VOA_INTNLNEWS_CMN_20080412_210000 266.042 272.027
+VOA_INTNLNEWS_CMN_20080412_210000 285.606 294.059
+VOA_INTNLNEWS_CMN_20080412_210000 327.823 339.136
+VOA_INTNLNEWS_CMN_20080412_210000 339.136 347.120
+VOA_INTNLNEWS_CMN_20080412_210000 347.120 353.417
+VOA_INTNLNEWS_CMN_20080412_210000 371.979 379.588
+VOA_INTNLNEWS_CMN_20080412_210000 379.588 382.417
+VOA_INTNLNEWS_CMN_20080412_210000 382.417 390.261
+VOA_INTNLNEWS_CMN_20080412_210000 390.261 400.886
+VOA_INTNLNEWS_CMN_20080412_210000 400.886 409.511
+VOA_INTNLNEWS_CMN_20080412_210000 409.511 418.933
+VOA_INTNLNEWS_CMN_20080412_210000 418.933 430.136
+VOA_INTNLNEWS_CMN_20080412_210000 430.136 436.839
+VOA_INTNLNEWS_CMN_20080412_210000 445.991 456.382
+VOA_INTNLNEWS_CMN_20080412_210000 456.382 465.273
+VOA_INTNLNEWS_CMN_20080412_210000 469.772 473.928
+VOA_INTNLNEWS_CMN_20080412_210000 479.631 485.787
+VOA_INTNLNEWS_CMN_20080412_210000 485.787 490.584
+VOA_INTNLNEWS_CMN_20080412_210000 490.584 498.194
+VOA_INTNLNEWS_CMN_20080412_210000 498.194 502.054
+VOA_INTNLNEWS_CMN_20080412_210000 502.054 510.008
+VOA_INTNLNEWS_CMN_20080412_210000 510.008 516.523
+VOA_INTNLNEWS_CMN_20080412_210000 516.523 522.148
+VOA_INTNLNEWS_CMN_20080412_210000 522.148 530.038
+VOA_INTNLNEWS_CMN_20080412_210000 530.038 535.836
+VOA_INTNLNEWS_CMN_20080412_210000 535.836 541.070
+VOA_INTNLNEWS_CMN_20080412_210000 541.070 547.726
+VOA_INTNLNEWS_CMN_20080412_210000 547.726 552.539
+VOA_INTNLNEWS_CMN_20080402_210000 255.095 267.832
+VOA_INTNLNEWS_CMN_20080402_210000 267.832 277.345
+VOA_INTNLNEWS_CMN_20080402_210000 277.345 284.313
+VOA_INTNLNEWS_CMN_20080402_210000 284.313 289.595
+VOA_INTNLNEWS_CMN_20080402_210000 289.595 291.392
+VOA_INTNLNEWS_CMN_20080402_210000 291.392 295.550
+VOA_INTNLNEWS_CMN_20080402_210000 303.005 309.490
+VOA_INTNLNEWS_CMN_20080402_210000 314.473 326.161
+VOA_INTNLNEWS_CMN_20080402_210000 326.161 332.957
+VOA_INTNLNEWS_CMN_20080402_210000 332.957 338.723
+VOA_INTNLNEWS_CMN_20080402_210000 338.723 346.136
+VOA_INTNLNEWS_CMN_20080402_210000 362.290 370.555
+VOA_INTNLNEWS_CMN_20080402_210000 370.555 379.896
+VOA_INTNLNEWS_CMN_20080402_210000 386.036 392.129
+VOA_INTNLNEWS_CMN_20080402_210000 392.129 400.301
+VOA_INTNLNEWS_CMN_20080402_210000 400.301 414.724
+VOA_INTNLNEWS_CMN_20080402_210000 414.724 430.958
+VOA_INTNLNEWS_CMN_20080402_210000 430.958 435.676
+VOA_INTNLNEWS_CMN_20080402_210000 435.676 451.927
+VOA_INTNLNEWS_CMN_20080402_210000 451.927 461.149
+VOA_INTNLNEWS_CMN_20080402_210000 461.149 467.856
+VOA_INTNLNEWS_CMN_20080402_210000 467.856 474.842
+VOA_INTNLNEWS_CMN_20080402_210000 474.842 479.671
+VOA_INTNLNEWS_CMN_20080402_210000 479.671 490.540
+VOA_INTNLNEWS_CMN_20080402_210000 490.540 499.612
+VOA_INTNLNEWS_CMN_20080402_210000 499.612 509.706
+VOA_INTNLNEWS_CMN_20080402_210000 509.706 512.798
+VOA_INTNLNEWS_CMN_20080402_210000 512.798 523.002
+VOA_INTNLNEWS_CMN_20080402_210000 523.002 530.768
+VOA_INTNLNEWS_CMN_20080402_210000 530.768 534.705
+VOA_INTNLNEWS_CMN_20080402_210000 541.379 545.271
+VOA_INTNLNEWS_CMN_20080402_210000 545.271 553.535
+VOA_INTNLNEWS_CMN_20080402_210000 553.535 561.894
+VOA_INTNLNEWS_CMN_20080402_210000 561.894 564.723
+VOA_INTNLNEWS_CMN_20080402_210000 589.946 597.929
+VOA_INTNLNEWS_CMN_20080402_210000 597.929 607.307
+VOA_INTNLNEWS_CMN_20080402_210000 611.169 617.137
+VOA_INTNLNEWS_CMN_20080402_210000 617.137 626.983
+VOA_INTNLNEWS_CMN_20080402_210000 626.983 635.684
+VOA_INTNLNEWS_CMN_20080402_210000 635.684 641.301
+VOA_INTNLNEWS_CMN_20080402_210000 648.878 655.888
+VOA_INTNLNEWS_CMN_20080402_210000 655.888 659.062
+VOA_INTNLNEWS_CMN_20080402_210000 682.139 691.071
+VOA_INTNLNEWS_CMN_20080402_210000 691.071 697.602
+VOA_INTNLNEWS_CMN_20080402_210000 697.602 706.116
+VOA_INTNLNEWS_CMN_20080402_210000 706.116 715.568
+VOA_INTNLNEWS_CMN_20080402_210000 715.568 719.730
+VOA_INTNLNEWS_CMN_20080402_210000 719.730 721.761
+VOA_INTNLNEWS_CMN_20080402_210000 721.761 734.686
+VOA_INTNLNEWS_CMN_20080402_210000 734.686 738.562
+VOA_INTNLNEWS_CMN_20080402_210000 745.020 758.410
+VOA_INTNLNEWS_CMN_20080402_210000 758.410 773.659
+VOA_INTNLNEWS_CMN_20080402_210000 773.659 778.739
+VOA_INTNLNEWS_CMN_20080402_210000 778.739 786.284
+VOA_INTNLNEWS_CMN_20080402_210000 802.384 807.363
+VOA_INTNLNEWS_CMN_20080402_210000 807.363 816.378
+VOA_INTNLNEWS_CMN_20080402_210000 821.767 826.288
+VOA_INTNLNEWS_CMN_20080402_210000 841.318 852.754
+VOA_INTNLNEWS_CMN_20080402_210000 852.754 856.503
+VOA_INTNLNEWS_CMN_20080402_210000 856.503 858.941
+VOA_INTNLNEWS_CMN_20080402_210000 868.034 869.300
+VOA_INTNLNEWS_CMN_20080402_210000 869.300 881.254
+VOA_INTNLNEWS_CMN_20080402_210000 881.254 884.568
+VOA_INTNLNEWS_CMN_20080402_210000 896.069 913.723
+VOA_INTNLNEWS_CMN_20080402_210000 913.723 925.820
+VOA_INTNLNEWS_CMN_20080402_210000 941.707 947.865
+VOA_INTNLNEWS_CMN_20080402_210000 952.085 960.000
+CCTV1_30MINNEWS_CMN_20080329_115901 73.737 77.049
+CCTV1_30MINNEWS_CMN_20080329_115901 77.049 82.711
+CCTV1_30MINNEWS_CMN_20080329_115901 82.711 87.201
+CCTV1_30MINNEWS_CMN_20080329_115901 87.201 95.233
+CCTV1_30MINNEWS_CMN_20080329_115901 98.890 99.733
+CCTV1_30MINNEWS_CMN_20080329_115901 99.733 102.687
+CCTV1_30MINNEWS_CMN_20080329_115901 102.687 110.877
+CCTV1_30MINNEWS_CMN_20080329_115901 110.877 114.392
+CCTV1_30MINNEWS_CMN_20080329_115901 119.220 126.939
+CCTV1_30MINNEWS_CMN_20080329_115901 126.939 133.564
+CCTV1_30MINNEWS_CMN_20080329_115901 133.564 142.751
+CCTV1_30MINNEWS_CMN_20080329_115901 153.986 158.408
+CCTV1_30MINNEWS_CMN_20080329_115901 158.408 172.456
+CCTV1_30MINNEWS_CMN_20080329_115901 182.995 194.276
+CCTV1_30MINNEWS_CMN_20080329_115901 194.276 202.276
+CCTV1_30MINNEWS_CMN_20080329_115901 202.276 215.833
+CCTV1_30MINNEWS_CMN_20080329_115901 228.865 237.145
+CCTV1_30MINNEWS_CMN_20080329_115901 237.145 248.863
+CCTV1_30MINNEWS_CMN_20080329_115901 248.863 259.941
+CCTV1_30MINNEWS_CMN_20080329_115901 259.941 266.238
+CCTV1_30MINNEWS_CMN_20080329_115901 266.238 274.238
+CCTV1_30MINNEWS_CMN_20080329_115901 274.238 287.286
+CCTV1_30MINNEWS_CMN_20080329_115901 287.286 295.099
+CCTV1_30MINNEWS_CMN_20080329_115901 295.099 306.801
+CCTV1_30MINNEWS_CMN_20080329_115901 306.801 326.598
+CCTV1_30MINNEWS_CMN_20080329_115901 341.614 347.379
+CCTV1_30MINNEWS_CMN_20080329_115901 347.379 357.614
+CCTV1_30MINNEWS_CMN_20080329_115901 357.614 363.676
+CCTV1_30MINNEWS_CMN_20080329_115901 363.676 373.630
+CCTV1_30MINNEWS_CMN_20080329_115901 373.630 384.913
+CCTV1_30MINNEWS_CMN_20080329_115901 384.913 391.115
+CCTV1_30MINNEWS_CMN_20080329_115901 399.272 404.884
+CCTV1_30MINNEWS_CMN_20080329_115901 404.884 411.860
+CCTV1_30MINNEWS_CMN_20080329_115901 480.963 493.542
+CCTV1_30MINNEWS_CMN_20080329_115901 493.542 502.400
+CCTV1_30MINNEWS_CMN_20080329_115901 505.334 507.256
+CCTV1_30MINNEWS_CMN_20080329_115901 507.256 511.100
+CCTV1_30MINNEWS_CMN_20080329_115901 511.100 517.459
+CCTV1_30MINNEWS_CMN_20080329_115901 517.459 526.599
+CCTV1_30MINNEWS_CMN_20080329_115901 526.599 537.850
+CCTV1_30MINNEWS_CMN_20080329_115901 537.850 546.377
+CCTV1_30MINNEWS_CMN_20080329_115901 590.276 605.433
+CCTV1_30MINNEWS_CMN_20080329_115901 605.433 616.324
+CCTV1_30MINNEWS_CMN_20080329_115901 616.324 635.762
+CCTV1_30MINNEWS_CMN_20080329_115901 635.762 640.668
+CCTV1_30MINNEWS_CMN_20080329_115901 640.668 646.683
+CCTV1_30MINNEWS_CMN_20080329_115901 646.683 654.121
+CCTV1_30MINNEWS_CMN_20080329_115901 668.982 681.060
+CCTV1_30MINNEWS_CMN_20080329_115901 681.060 690.889
+CCTV1_30MINNEWS_CMN_20080329_115901 690.889 705.174
+CCTV1_30MINNEWS_CMN_20080329_115901 705.174 718.168
+CCTV1_30MINNEWS_CMN_20080329_115901 718.168 735.215
+CCTV1_30MINNEWS_CMN_20080329_115901 735.215 749.948
+CCTV1_30MINNEWS_CMN_20080329_115901 749.948 755.824
+CCTV1_30MINNEWS_CMN_20080329_115901 755.824 763.433
+CCTV1_30MINNEWS_CMN_20080329_115901 763.433 783.448
+CCTV1_30MINNEWS_CMN_20080329_115901 789.386 804.901
+CCTV1_30MINNEWS_CMN_20080329_115901 804.901 815.636
+CCTV1_30MINNEWS_CMN_20080329_115901 822.120 826.994
+CCTV1_30MINNEWS_CMN_20080329_115901 832.682 842.168
+CCTV1_30MINNEWS_CMN_20080329_115901 842.168 855.058
+CCTV1_30MINNEWS_CMN_20080329_115901 855.058 859.479
+CCTV1_30MINNEWS_CMN_20080329_115901 862.713 870.541
+CCTV1_30MINNEWS_CMN_20080329_115901 870.541 874.744
+CCTV1_30MINNEWS_CMN_20080329_115901 874.744 878.042
+CCTV1_30MINNEWS_CMN_20080329_115901 885.433 893.088
+CCTV1_30MINNEWS_CMN_20080329_115901 893.088 899.478
+CCTV1_30MINNEWS_CMN_20080329_115901 899.478 909.024
+CCTV1_30MINNEWS_CMN_20080329_115901 909.024 912.305
+CCTV1_30MINNEWS_CMN_20080329_115901 912.305 921.181
+CCTV1_30MINNEWS_CMN_20080329_115901 932.742 945.490
+CCTV1_30MINNEWS_CMN_20080329_115901 945.490 955.677
+CCTV1_30MINNEWS_CMN_20080329_115901 955.677 961.792
+CCTV1_30MINNEWS_CMN_20080329_115901 961.792 968.605
+CCTV1_30MINNEWS_CMN_20080329_115901 1030.620 1042.119
+CCTV1_30MINNEWS_CMN_20080329_115901 1059.120 1073.629
+CCTV1_30MINNEWS_CMN_20080329_115901 1095.784 1108.955
+CCTV1_30MINNEWS_CMN_20080329_115901 1108.955 1117.456
+CCTV1_30MINNEWS_CMN_20080329_115901 1245.444 1246.740
+CCTV1_30MINNEWS_CMN_20080329_115901 1246.740 1255.490
+CCTV1_30MINNEWS_CMN_20080329_115901 1255.490 1262.537
+CCTV1_30MINNEWS_CMN_20080329_115901 1266.865 1276.943
+CCTV1_30MINNEWS_CMN_20080329_115901 1276.943 1297.152
+CCTV1_30MINNEWS_CMN_20080329_115901 1304.601 1311.062
+CCTV1_30MINNEWS_CMN_20080329_115901 1322.392 1332.458
+CCTV1_30MINNEWS_CMN_20080329_115901 1350.128 1362.927
+CCTV1_30MINNEWS_CMN_20080329_115901 1362.927 1373.530
+CCTV1_30MINNEWS_CMN_20080329_115901 1373.530 1380.390
+CCTV1_30MINNEWS_CMN_20080329_115901 1380.390 1388.563
+CCTV1_30MINNEWS_CMN_20080329_115901 1388.563 1397.892
+CCTV1_30MINNEWS_CMN_20080329_115901 1404.298 1409.143
+CCTV1_30MINNEWS_CMN_20080329_115901 1409.143 1415.690
+CCTV1_30MINNEWS_CMN_20080329_115901 1415.690 1421.940
+CCTV1_30MINNEWS_CMN_20080329_115901 1421.940 1426.878
+CCTV1_30MINNEWS_CMN_20080329_115901 1426.878 1432.096
+CCTV1_30MINNEWS_CMN_20080329_115901 1432.096 1439.315
+CCTV1_30MINNEWS_CMN_20080329_115901 1439.315 1445.503
+CCTV1_30MINNEWS_CMN_20080329_115901 1445.503 1452.112
+CCTV1_30MINNEWS_CMN_20080329_115901 1452.112 1458.815
+CCTV1_30MINNEWS_CMN_20080329_115901 1458.815 1463.893
+CCTV1_30MINNEWS_CMN_20080329_115901 1477.876 1490.923
+CCTV1_30MINNEWS_CMN_20080329_115901 1490.923 1497.470
+CCTV1_30MINNEWS_CMN_20080329_115901 1497.470 1509.517
+CCTV1_30MINNEWS_CMN_20080329_115901 1509.517 1519.860
+CCTV1_30MINNEWS_CMN_20080329_115901 1519.860 1524.232
+CCTV1_30MINNEWS_CMN_20080329_115901 1524.232 1531.575
+CCTV1_30MINNEWS_CMN_20080329_115901 1531.575 1536.919
+CCTV1_30MINNEWS_CMN_20080329_115901 1536.919 1550.184
+CCTV1_30MINNEWS_CMN_20080329_115901 1550.184 1558.341
+CCTV1_30MINNEWS_CMN_20080329_115901 1558.341 1565.857
+CCTV1_30MINNEWS_CMN_20080329_115901 1565.857 1572.576
+CCTV1_30MINNEWS_CMN_20080329_115901 1572.576 1580.029
+CCTV1_30MINNEWS_CMN_20080329_115901 1580.029 1586.342
+CCTV1_30MINNEWS_CMN_20080329_115901 1586.342 1593.825
+CCTV1_30MINNEWS_CMN_20080329_115901 1593.825 1601.136
+CCTV1_30MINNEWS_CMN_20080329_115901 1601.136 1606.121
+CCTV1_30MINNEWS_CMN_20080329_115901 1606.121 1611.402
+CCTV1_30MINNEWS_CMN_20080329_115901 1616.026 1622.962
+CCTV1_30MINNEWS_CMN_20080329_115901 1844.920 1846.827
+CCTV1_30MINNEWS_CMN_20080329_115901 1848.234 1849.093
+CCTV2_NEWSLIST_CMN_20080415_114902 118.872 124.153
+CCTV2_NEWSLIST_CMN_20080415_114902 124.153 132.330
+CCTV2_NEWSLIST_CMN_20080415_114902 151.335 153.291
+CCTV2_NEWSLIST_CMN_20080415_114902 160.886 162.996
+CCTV2_NEWSLIST_CMN_20080415_114902 162.996 166.512
+CCTV2_NEWSLIST_CMN_20080415_114902 169.262 172.317
+CCTV2_NEWSLIST_CMN_20080415_114902 172.317 174.661
+CCTV2_NEWSLIST_CMN_20080415_114902 174.661 177.129
+CCTV2_NEWSLIST_CMN_20080415_114902 177.129 192.098
+CCTV2_NEWSLIST_CMN_20080415_114902 192.098 202.081
+CCTV2_NEWSLIST_CMN_20080415_114902 202.081 212.719
+CCTV2_NEWSLIST_CMN_20080415_114902 212.719 215.719
+CCTV2_NEWSLIST_CMN_20080415_114902 215.719 221.026
+CCTV2_NEWSLIST_CMN_20080415_114902 221.026 227.468
+CCTV2_NEWSLIST_CMN_20080415_114902 227.468 232.186
+CCTV2_NEWSLIST_CMN_20080415_114902 232.186 237.685
+CCTV2_NEWSLIST_CMN_20080415_114902 237.685 248.085
+CCTV2_NEWSLIST_CMN_20080415_114902 248.085 253.039
+CCTV2_NEWSLIST_CMN_20080415_114902 253.039 256.648
+CCTV2_NEWSLIST_CMN_20080415_114902 256.648 262.523
+CCTV2_NEWSLIST_CMN_20080415_114902 262.523 270.695
+CCTV2_NEWSLIST_CMN_20080415_114902 270.695 279.708
+CCTV2_NEWSLIST_CMN_20080415_114902 279.708 284.803
+CCTV2_NEWSLIST_CMN_20080415_114902 284.803 289.584
+CCTV2_NEWSLIST_CMN_20080415_114902 289.584 303.614
+CCTV2_NEWSLIST_CMN_20080415_114902 303.614 308.741
+CCTV2_NEWSLIST_CMN_20080415_114902 308.741 315.460
+CCTV2_NEWSLIST_CMN_20080415_114902 315.460 331.672
+CCTV2_NEWSLIST_CMN_20080415_114902 331.672 348.261
+CCTV2_NEWSLIST_CMN_20080415_114902 348.261 354.870
+CCTV2_NEWSLIST_CMN_20080415_114902 354.870 370.665
+CCTV2_NEWSLIST_CMN_20080415_114902 370.665 382.167
+CCTV2_NEWSLIST_CMN_20080415_114902 382.167 398.167
+CCTV2_NEWSLIST_CMN_20080415_114902 409.360 415.515
+CCTV2_NEWSLIST_CMN_20080415_114902 415.515 418.773
+CCTV2_NEWSLIST_CMN_20080415_114902 424.625 429.204
+CCTV2_NEWSLIST_CMN_20080415_114902 429.204 433.545
+CCTV2_NEWSLIST_CMN_20080415_114902 433.545 440.273
+CCTV2_NEWSLIST_CMN_20080415_114902 440.273 453.168
+CCTV2_NEWSLIST_CMN_20080415_114902 459.961 469.841
+CCTV2_NEWSLIST_CMN_20080415_114902 469.841 471.216
+CCTV2_NEWSLIST_CMN_20080415_114902 471.216 477.701
+CCTV2_NEWSLIST_CMN_20080415_114902 477.701 485.001
+CCTV2_NEWSLIST_CMN_20080415_114902 485.001 491.797
+CCTV2_NEWSLIST_CMN_20080415_114902 491.797 503.670
+CCTV2_NEWSLIST_CMN_20080415_114902 503.670 508.640
+CCTV2_NEWSLIST_CMN_20080415_114902 516.808 529.315
+CCTV2_NEWSLIST_CMN_20080415_114902 529.315 537.036
+CCTV2_NEWSLIST_CMN_20080415_114902 537.036 548.101
+CCTV2_NEWSLIST_CMN_20080415_114902 548.101 552.509
+CCTV2_NEWSLIST_CMN_20080415_114902 552.509 560.945
+CCTV2_NEWSLIST_CMN_20080415_114902 593.353 601.347
+CCTV2_NEWSLIST_CMN_20080415_114902 601.347 604.683
+CCTV2_NEWSLIST_CMN_20080415_114902 604.683 607.324
+CCTV2_NEWSLIST_CMN_20080415_114902 607.324 618.086
+CCTV2_NEWSLIST_CMN_20080415_114902 618.086 622.098
+CCTV2_NEWSLIST_CMN_20080415_114902 622.098 627.645
+CCTV2_NEWSLIST_CMN_20080415_114902 627.645 632.925
+CCTV2_NEWSLIST_CMN_20080415_114902 632.925 638.422
+CCTV2_NEWSLIST_CMN_20080415_114902 638.422 647.797
+CCTV2_NEWSLIST_CMN_20080415_114902 647.797 659.030
+CCTV2_NEWSLIST_CMN_20080415_114902 659.030 664.373
+CCTV2_NEWSLIST_CMN_20080415_114902 664.373 668.529
+CCTV2_NEWSLIST_CMN_20080415_114902 668.529 675.404
+CCTV2_NEWSLIST_CMN_20080415_114902 675.404 679.185
+CCTV2_NEWSLIST_CMN_20080415_114902 679.185 685.107
+CCTV2_NEWSLIST_CMN_20080415_114902 685.107 691.372
+CCTV2_NEWSLIST_CMN_20080415_114902 691.372 695.168
+CCTV2_NEWSLIST_CMN_20080415_114902 695.168 705.028
+CCTV2_NEWSLIST_CMN_20080415_114902 705.028 713.882
+CCTV2_NEWSLIST_CMN_20080415_114902 736.008 749.039
+CCTV2_NEWSLIST_CMN_20080415_114902 771.900 778.431
+CCTV2_NEWSLIST_CMN_20080415_114902 778.431 784.603
+CCTV2_NEWSLIST_CMN_20080415_114902 784.603 794.727
+CCTV2_NEWSLIST_CMN_20080415_114902 794.727 801.443
+CCTV2_NEWSLIST_CMN_20080415_114902 801.443 808.679
+CCTV2_NEWSLIST_CMN_20080415_114902 808.679 814.381
+CCTV2_NEWSLIST_CMN_20080415_114902 848.253 860.213
+CCTV2_NEWSLIST_CMN_20080415_114902 860.213 870.113
+CCTV2_NEWSLIST_CMN_20080415_114902 870.113 873.941
+CCTV2_NEWSLIST_CMN_20080415_114902 873.941 878.660
+CCTV2_NEWSLIST_CMN_20080415_114902 878.660 886.067
+CCTV2_NEWSLIST_CMN_20080415_114902 886.067 891.098
+CCTV2_NEWSLIST_CMN_20080415_114902 899.375 906.766
+CCTV2_NEWSLIST_CMN_20080415_114902 927.686 931.091
+CCTV2_NEWSLIST_CMN_20080415_114902 931.091 944.248
+CCTV2_NEWSLIST_CMN_20080415_114902 944.248 958.248
+CCTV2_NEWSLIST_CMN_20080415_114902 958.248 981.526
+CCTV2_NEWSLIST_CMN_20080415_114902 981.526 990.776
+CCTV2_NEWSLIST_CMN_20080415_114902 990.776 1001.721
+CCTV2_NEWSLIST_CMN_20080415_114902 1010.787 1018.240
+CCTV2_NEWSLIST_CMN_20080415_114902 1018.240 1025.350
+CCTV2_NEWSLIST_CMN_20080415_114902 1025.350 1034.053
+CCTV2_NEWSLIST_CMN_20080415_114902 1058.628 1067.316
+CCTV2_NEWSLIST_CMN_20080415_114902 1067.316 1078.332
+CCTV2_NEWSLIST_CMN_20080415_114902 1078.332 1088.216
+CCTV2_NEWSLIST_CMN_20080415_114902 1088.216 1097.465
+CCTV2_NEWSLIST_CMN_20080415_114902 1097.465 1108.577
+CCTV2_NEWSLIST_CMN_20080415_114902 1108.577 1117.375
+CCTV2_NEWSLIST_CMN_20080415_114902 1124.037 1146.487
+CCTV2_NEWSLIST_CMN_20080415_114902 1146.487 1151.900
+CCTV2_NEWSLIST_CMN_20080415_114902 1162.203 1172.328
+CCTV2_NEWSLIST_CMN_20080415_114902 1175.196 1190.445
+CCTV2_NEWSLIST_CMN_20080415_114902 1190.445 1200.068
+CCTV2_NEWSLIST_CMN_20080415_114902 1200.068 1204.330
+CCTV2_NEWSLIST_CMN_20080415_114902 1204.330 1211.175
+CCTV2_NEWSLIST_CMN_20080415_114902 1211.175 1215.930
+CCTV2_NEWSLIST_CMN_20080415_114902 1215.930 1224.975
+CCTV2_NEWSLIST_CMN_20080415_114902 1224.975 1231.217
+CCTV2_NEWSLIST_CMN_20080415_114902 1231.217 1236.786
+CCTV2_NEWSLIST_CMN_20080415_114902 1236.786 1244.407
+CCTV2_NEWSLIST_CMN_20080415_114902 1244.407 1255.256
+CCTV2_NEWSLIST_CMN_20080415_114902 1255.256 1264.259
+CCTV2_NEWSLIST_CMN_20080415_114902 1320.930 1330.010
+CCTV2_NEWSLIST_CMN_20080415_114902 1330.010 1341.163
+CCTV2_NEWSLIST_CMN_20080415_114902 1341.163 1347.423
+CCTV2_NEWSLIST_CMN_20080415_114902 1347.423 1353.456
+CCTV2_NEWSLIST_CMN_20080415_114902 1353.456 1358.986
+CCTV2_NEWSLIST_CMN_20080415_114902 1358.986 1376.555
+CCTV2_NEWSLIST_CMN_20080415_114902 1376.555 1384.025
+CCTV2_NEWSLIST_CMN_20080415_114902 1394.014 1398.897
+CCTV2_NEWSLIST_CMN_20080415_114902 1398.897 1404.581
+CCTV2_NEWSLIST_CMN_20080415_114902 1404.581 1419.769
+CCTV2_NEWSLIST_CMN_20080415_114902 1427.902 1430.744
+CCTV2_NEWSLIST_CMN_20080415_114902 1430.744 1440.194
+CCTV2_NEWSLIST_CMN_20080415_114902 1440.194 1445.664
+CCTV2_NEWSLIST_CMN_20080415_114902 1445.664 1449.509
+CCTV2_NEWSLIST_CMN_20080415_114902 1449.509 1458.310
+CCTV2_NEWSLIST_CMN_20080415_114902 1458.310 1464.952
+CCTV2_NEWSLIST_CMN_20080415_114902 1464.952 1471.922
+CCTV2_NEWSLIST_CMN_20080415_114902 1471.922 1478.460
+CCTV2_NEWSLIST_CMN_20080415_114902 1494.346 1508.019
+CCTV2_NEWSLIST_CMN_20080415_114902 1508.019 1512.594
+CCTV2_NEWSLIST_CMN_20080415_114902 1520.223 1532.542
+CCTV2_NEWSLIST_CMN_20080415_114902 1540.155 1543.749
+CCTV2_NEWSLIST_CMN_20080415_114902 1543.749 1561.353
+CCTV2_NEWSLIST_CMN_20080415_114902 1561.353 1566.587
+CCTV2_NEWSLIST_CMN_20080415_114902 1572.348 1577.958
+CCTV2_NEWSLIST_CMN_20080415_114902 1577.958 1583.535
+CCTV2_NEWSLIST_CMN_20080415_114902 1583.535 1589.441
+CCTV2_NEWSLIST_CMN_20080415_114902 1589.441 1595.144
+CCTV2_NEWSLIST_CMN_20080415_114902 1595.144 1605.508
+CCTV2_NEWSLIST_CMN_20080415_114902 1605.508 1614.963
+CCTV2_NEWSLIST_CMN_20080415_114902 1614.963 1620.046
+CCTV2_NEWSLIST_CMN_20080415_114902 1620.046 1621.390
+CCTV2_NEWSLIST_CMN_20080415_114902 1621.390 1629.030
+CCTV2_NEWSLIST_CMN_20080415_114902 1629.030 1638.764
+CCTV2_NEWSLIST_CMN_20080415_114902 1643.812 1649.619
+CCTV2_NEWSLIST_CMN_20080415_114902 1649.619 1652.682
+CCTV2_NEWSLIST_CMN_20080415_114902 1652.682 1661.793
+CCTV2_NEWSLIST_CMN_20080415_114902 1661.793 1664.012
+CCTV2_NEWSLIST_CMN_20080415_114902 1664.012 1671.860
+CCTV2_NEWSLIST_CMN_20080415_114902 1671.860 1679.439
+CCTV2_NEWSLIST_CMN_20080415_114902 1679.439 1685.252
+CCTV2_NEWSLIST_CMN_20080415_114902 1685.252 1695.327
+CCTV2_NEWSLIST_CMN_20080415_114902 1708.134 1715.560
+CCTV2_NEWSLIST_CMN_20080415_114902 1715.560 1726.357
+CCTV2_NEWSLIST_CMN_20080415_114902 1726.357 1739.099
+CCTV2_NEWSLIST_CMN_20080415_114902 1739.099 1749.674
+CCTV2_NEWSLIST_CMN_20080415_114902 1749.674 1754.988
+CCTV2_NEWSLIST_CMN_20080415_114902 1754.988 1761.675
+CCTV2_NEWSLIST_CMN_20080415_114902 1761.675 1768.414
+CCTV2_NEWSLIST_CMN_20080415_114902 1768.414 1776.374
+CCTV2_NEWSLIST_CMN_20080415_114902 1776.374 1780.460
+CCTV2_NEWSLIST_CMN_20080415_114902 1780.460 1783.503
+CCTV2_NEWSLIST_CMN_20080415_114902 1783.503 1789.987
+CCTV2_NEWSLIST_CMN_20080415_114902 1789.987 1801.730
+CCTV2_NEWSLIST_CMN_20080415_114902 1801.730 1812.108
+CCTV2_NEWSLIST_CMN_20080415_114902 1812.108 1816.140
+CCTV2_NEWSLIST_CMN_20080415_114902 1816.140 1825.931
+CCTV2_NEWSLIST_CMN_20080415_114902 1825.931 1843.068
+CCTV2_NEWSLIST_CMN_20080415_114902 1843.068 1856.399
+CCTV2_NEWSLIST_CMN_20080415_114902 1856.399 1860.242
+CCTV2_NEWSLIST_CMN_20080415_114902 1860.242 1865.854
+CCTV2_NEWSLIST_CMN_20080415_114902 1865.854 1874.071
+CCTV2_NEWSLIST_CMN_20080415_114902 1874.071 1881.684
+CCTV2_NEWSLIST_CMN_20080415_114902 1881.684 1887.981
+CCTV2_NEWSLIST_CMN_20080415_114902 1887.981 1892.840
+CCTV2_NEWSLIST_CMN_20080415_114902 1892.840 1899.024
+CCTV2_NEWSLIST_CMN_20080415_114902 1899.024 1902.085
+CCTV2_NEWSLIST_CMN_20080415_114902 1955.315 1964.721
+CCTV2_NEWSLIST_CMN_20080415_114902 1964.721 1971.744
+CCTV2_NEWSLIST_CMN_20080415_114902 1971.744 1975.479
+CCTV2_NEWSLIST_CMN_20080415_114902 1995.780 2003.435
+CCTV2_NEWSLIST_CMN_20080415_114902 2019.123 2023.966
+CCTV2_NEWSLIST_CMN_20080415_114902 2023.966 2031.920
+CCTV2_NEWSLIST_CMN_20080415_114902 2031.920 2041.007
+CCTV2_NEWSLIST_CMN_20080415_114902 2041.007 2047.913
+CCTV2_NEWSLIST_CMN_20080415_114902 2047.913 2055.871
+CCTV2_NEWSLIST_CMN_20080415_114902 2055.871 2065.637
+CCTV2_NEWSLIST_CMN_20080415_114902 2084.090 2092.076
+CCTV2_NEWSLIST_CMN_20080415_114902 2092.076 2107.907
+CCTV2_NEWSLIST_CMN_20080415_114902 2107.907 2115.867
+CCTV2_NEWSLIST_CMN_20080415_114902 2115.867 2127.240
+CCTV2_NEWSLIST_CMN_20080415_114902 2127.240 2128.881
+CCTV2_NEWSLIST_CMN_20080415_114902 2141.889 2149.788
+CCTV2_NEWSLIST_CMN_20080415_114902 2158.202 2168.465
+CCTV2_NEWSLIST_CMN_20080415_114902 2168.465 2181.563
+CCTV2_NEWSLIST_CMN_20080415_114902 2181.563 2184.971
+CCTV2_NEWSLIST_CMN_20080415_114902 2184.971 2187.916
+CCTV2_NEWSLIST_CMN_20080415_114902 2196.459 2200.716
+CCTV2_NEWSLIST_CMN_20080415_114902 2208.655 2212.497
+CCTV2_NEWSLIST_CMN_20080415_114902 2212.497 2222.233
+CCTV2_NEWSLIST_CMN_20080415_114902 2232.969 2239.828
+CCTV2_NEWSLIST_CMN_20080415_114902 2239.828 2247.208
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 212.973 218.392
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 235.071 249.733
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 301.108 320.476
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 320.476 349.126
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 349.126 378.734
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 378.734 388.225
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 418.399 435.853
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 446.061 456.121
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 456.121 469.839
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 498.626 511.021
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 532.605 555.788
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 555.788 576.194
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 643.013 655.558
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 686.449 706.596
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 726.144 737.551
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 748.843 762.548
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 885.263 896.371
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 954.682 966.060
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 989.696 997.660
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1012.148 1022.118
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1022.118 1052.295
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1068.699 1082.442
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1082.442 1093.131
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1093.131 1105.287
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1105.287 1116.095
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1116.095 1133.787
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1158.381 1183.877
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1183.877 1200.015
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1200.015 1228.134
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1228.134 1241.963
+CCTVNEWS_EVENINGNEWS_CMN_20080330_225702 1241.963 1257.472
+CCTV2_ECON30MIN_CMN_20080426_213501 28.410 33.941
+CCTV2_ECON30MIN_CMN_20080426_213501 39.370 46.871
+CCTV2_ECON30MIN_CMN_20080426_213501 50.293 52.980
+CCTV2_ECON30MIN_CMN_20080426_213501 231.537 233.662
+CCTV2_ECON30MIN_CMN_20080426_213501 233.662 238.224
+CCTV2_ECON30MIN_CMN_20080426_213501 238.224 247.521
+CCTV2_ECON30MIN_CMN_20080426_213501 247.521 257.755
+CCTV2_ECON30MIN_CMN_20080426_213501 257.755 266.036
+CCTV2_ECON30MIN_CMN_20080426_213501 266.036 276.957
+CCTV2_ECON30MIN_CMN_20080426_213501 289.346 294.549
+CCTV2_ECON30MIN_CMN_20080426_213501 309.678 326.615
+CCTV2_ECON30MIN_CMN_20080426_213501 333.568 344.084
+CCTV2_ECON30MIN_CMN_20080426_213501 344.084 349.272
+CCTV2_ECON30MIN_CMN_20080426_213501 349.272 355.538
+CCTV2_ECON30MIN_CMN_20080426_213501 355.538 359.757
+CCTV2_ECON30MIN_CMN_20080426_213501 359.757 365.303
+CCTV2_ECON30MIN_CMN_20080426_213501 365.303 372.037
+CCTV2_ECON30MIN_CMN_20080426_213501 397.804 416.102
+CCTV2_ECON30MIN_CMN_20080426_213501 416.102 427.665
+CCTV2_ECON30MIN_CMN_20080426_213501 447.602 452.789
+CCTV2_ECON30MIN_CMN_20080426_213501 464.497 468.310
+CCTV2_ECON30MIN_CMN_20080426_213501 468.310 479.920
+CCTV2_ECON30MIN_CMN_20080426_213501 479.920 490.731
+CCTV2_ECON30MIN_CMN_20080426_213501 490.731 498.571
+CCTV2_ECON30MIN_CMN_20080426_213501 501.821 510.134
+CCTV2_ECON30MIN_CMN_20080426_213501 510.134 515.025
+CCTV2_ECON30MIN_CMN_20080426_213501 523.243 529.915
+CCTV2_ECON30MIN_CMN_20080426_213501 529.915 536.102
+CCTV2_ECON30MIN_CMN_20080426_213501 540.079 545.095
+CCTV2_ECON30MIN_CMN_20080426_213501 558.517 562.376
+CCTV2_ECON30MIN_CMN_20080426_213501 580.438 586.000
+CCTV2_ECON30MIN_CMN_20080426_213501 586.000 602.258
+CCTV2_ECON30MIN_CMN_20080426_213501 602.258 609.677
+CCTV2_ECON30MIN_CMN_20080426_213501 609.677 617.571
+CCTV2_ECON30MIN_CMN_20080426_213501 617.571 625.258
+CCTV2_ECON30MIN_CMN_20080426_213501 625.258 633.961
+CCTV2_ECON30MIN_CMN_20080426_213501 633.961 637.555
+CCTV2_ECON30MIN_CMN_20080426_213501 670.955 672.534
+CCTV2_ECON30MIN_CMN_20080426_213501 793.630 795.083
+CCTV2_ECON30MIN_CMN_20080426_213501 816.223 827.926
+CCTV2_ECON30MIN_CMN_20080426_213501 827.926 832.903
+CCTV2_ECON30MIN_CMN_20080426_213501 832.903 840.122
+CCTV2_ECON30MIN_CMN_20080426_213501 843.859 846.876
+CCTV2_ECON30MIN_CMN_20080426_213501 846.876 849.173
+CCTV2_ECON30MIN_CMN_20080426_213501 849.173 854.803
+CCTV2_ECON30MIN_CMN_20080426_213501 861.365 863.990
+CCTV2_ECON30MIN_CMN_20080426_213501 871.443 880.087
+CCTV2_ECON30MIN_CMN_20080426_213501 880.087 883.212
+CCTV2_ECON30MIN_CMN_20080426_213501 883.212 886.197
+CCTV2_ECON30MIN_CMN_20080426_213501 886.197 895.557
+CCTV2_ECON30MIN_CMN_20080426_213501 895.557 900.979
+CCTV2_ECON30MIN_CMN_20080426_213501 900.979 908.010
+CCTV2_ECON30MIN_CMN_20080426_213501 908.010 914.822
+CCTV2_ECON30MIN_CMN_20080426_213501 914.822 923.994
+CCTV2_ECON30MIN_CMN_20080426_213501 923.994 928.182
+CCTV2_ECON30MIN_CMN_20080426_213501 928.182 933.947
+CCTV2_ECON30MIN_CMN_20080426_213501 938.315 941.189
+CCTV2_ECON30MIN_CMN_20080426_213501 941.189 942.954
+CCTV2_ECON30MIN_CMN_20080426_213501 942.954 947.157
+CCTV2_ECON30MIN_CMN_20080426_213501 942.954 947.157
+CCTV2_ECON30MIN_CMN_20080426_213501 947.157 952.610
+CCTV2_ECON30MIN_CMN_20080426_213501 963.970 967.298
+CCTV2_ECON30MIN_CMN_20080426_213501 967.298 968.657
+CCTV2_ECON30MIN_CMN_20080426_213501 968.657 969.720
+CCTV2_ECON30MIN_CMN_20080426_213501 969.720 971.939
+CCTV2_ECON30MIN_CMN_20080426_213501 979.225 996.490
+CCTV2_ECON30MIN_CMN_20080426_213501 996.490 1001.599
+CCTV2_ECON30MIN_CMN_20080426_213501 1001.599 1012.756
+CCTV2_ECON30MIN_CMN_20080426_213501 1012.756 1019.584
+CCTV2_ECON30MIN_CMN_20080426_213501 1025.537 1033.631
+CCTV2_ECON30MIN_CMN_20080426_213501 1042.006 1046.849
+CCTV2_ECON30MIN_CMN_20080426_213501 1056.069 1066.585
+CCTV2_ECON30MIN_CMN_20080426_213501 1080.184 1093.482
+CCTV2_ECON30MIN_CMN_20080426_213501 1096.039 1114.914
+CCTV2_ECON30MIN_CMN_20080426_213501 1123.039 1132.914
+CCTV2_ECON30MIN_CMN_20080426_213501 1148.338 1159.682
+CCTV2_ECON30MIN_CMN_20080426_213501 1159.682 1167.212
+CCTV2_ECON30MIN_CMN_20080426_213501 1167.212 1170.946
+CCTV2_ECON30MIN_CMN_20080426_213501 1172.555 1173.212
+CCTV2_ECON30MIN_CMN_20080426_213501 1173.212 1180.814
+CCTV2_ECON30MIN_CMN_20080426_213501 1180.814 1186.893
+CCTV2_ECON30MIN_CMN_20080426_213501 1189.456 1200.659
+CCTV2_ECON30MIN_CMN_20080426_213501 1200.659 1211.393
+CCTV2_ECON30MIN_CMN_20080426_213501 1211.393 1229.254
+CCTV2_ECON30MIN_CMN_20080426_213501 1363.474 1364.771
+CCTV2_ECON30MIN_CMN_20080426_213501 1364.771 1374.880
+CCTV2_ECON30MIN_CMN_20080426_213501 1374.880 1382.661
+CCTV2_ECON30MIN_CMN_20080426_213501 1382.661 1392.458
+CCTV2_ECON30MIN_CMN_20080426_213501 1392.458 1409.020
+CCTV2_ECON30MIN_CMN_20080426_213501 1422.069 1431.476
+CCTV2_ECON30MIN_CMN_20080426_213501 1436.079 1437.594
+CCTV2_ECON30MIN_CMN_20080426_213501 1441.390 1449.000
+CCTV2_ECON30MIN_CMN_20080426_213501 1449.000 1456.985
+CCTV2_ECON30MIN_CMN_20080426_213501 1465.672 1486.121
+CCTV2_ECON30MIN_CMN_20080426_213501 1498.824 1508.243
+CCTV2_ECON30MIN_CMN_20080426_213501 1508.243 1514.914
+CCTV2_ECON30MIN_CMN_20080426_213501 1533.164 1544.664
+CCTV2_ECON30MIN_CMN_20080426_213501 1544.664 1562.154
+CCTV2_ECON30MIN_CMN_20080426_213501 1562.154 1580.066
+CCTV2_ECON30MIN_CMN_20080426_213501 1580.066 1592.035
+CCTV2_ECON30MIN_CMN_20080426_213501 1606.364 1620.586
+VOA_INTNLNEWS_CMN_20080407_210000 239.183 245.480
+VOA_INTNLNEWS_CMN_20080407_210000 245.480 247.152
+VOA_INTNLNEWS_CMN_20080407_210000 247.152 253.550
+VOA_INTNLNEWS_CMN_20080407_210000 253.550 257.726
+VOA_INTNLNEWS_CMN_20080407_210000 257.726 262.127
+VOA_INTNLNEWS_CMN_20080407_210000 268.106 273.032
+VOA_INTNLNEWS_CMN_20080407_210000 283.870 286.980
+VOA_INTNLNEWS_CMN_20080407_210000 286.980 291.308
+VOA_INTNLNEWS_CMN_20080407_210000 291.308 292.933
+VOA_INTNLNEWS_CMN_20080407_210000 292.933 302.099
+VOA_INTNLNEWS_CMN_20080407_210000 302.099 308.238
+VOA_INTNLNEWS_CMN_20080407_210000 316.189 318.830
+VOA_INTNLNEWS_CMN_20080407_210000 318.830 329.857
+VOA_INTNLNEWS_CMN_20080407_210000 329.857 347.078
+VOA_INTNLNEWS_CMN_20080407_210000 358.454 365.466
+VOA_INTNLNEWS_CMN_20080407_210000 374.472 378.239
+VOA_INTNLNEWS_CMN_20080407_210000 378.239 384.946
+VOA_INTNLNEWS_CMN_20080407_210000 384.946 390.880
+VOA_INTNLNEWS_CMN_20080407_210000 390.880 401.736
+VOA_INTNLNEWS_CMN_20080407_210000 416.156 422.324
+VOA_INTNLNEWS_CMN_20080407_210000 431.371 438.478
+VOA_INTNLNEWS_CMN_20080407_210000 438.478 443.602
+VOA_INTNLNEWS_CMN_20080407_210000 460.181 464.403
+VOA_INTNLNEWS_CMN_20080407_210000 464.403 473.719
+VOA_INTNLNEWS_CMN_20080407_210000 483.454 487.642
+VOA_INTNLNEWS_CMN_20080407_210000 487.642 491.330
+VOA_INTNLNEWS_CMN_20080407_210000 498.190 505.221
+VOA_INTNLNEWS_CMN_20080407_210000 529.783 542.517
+VOA_INTNLNEWS_CMN_20080407_210000 542.517 550.851
+VOA_INTNLNEWS_CMN_20080407_210000 558.289 568.778
+VOA_INTNLNEWS_CMN_20080407_210000 568.778 576.466
+VOA_INTNLNEWS_CMN_20080407_210000 576.466 585.133
+VOA_INTNLNEWS_CMN_20080407_210000 585.133 588.517
+VOA_INTNLNEWS_CMN_20080407_210000 596.631 602.859
+VOA_INTNLNEWS_CMN_20080407_210000 602.859 608.984
+VOA_INTNLNEWS_CMN_20080407_210000 608.984 611.889
+VOA_INTNLNEWS_CMN_20080407_210000 619.451 629.780
+VOA_INTNLNEWS_CMN_20080407_210000 629.780 637.812
+VOA_INTNLNEWS_CMN_20080407_210000 637.812 643.069
+VOA_INTNLNEWS_CMN_20080407_210000 643.069 651.321
+VOA_INTNLNEWS_CMN_20080407_210000 666.838 672.101
+VOA_INTNLNEWS_CMN_20080407_210000 672.101 676.726
+VOA_INTNLNEWS_CMN_20080407_210000 680.382 707.262
+VOA_INTNLNEWS_CMN_20080407_210000 707.262 719.937
+VOA_INTNLNEWS_CMN_20080407_210000 719.937 723.671
+VOA_INTNLNEWS_CMN_20080407_210000 723.671 737.076
+VOA_INTNLNEWS_CMN_20080407_210000 737.076 741.295
+VOA_INTNLNEWS_CMN_20080407_210000 755.427 763.332
+VOA_INTNLNEWS_CMN_20080407_210000 773.364 782.447
+VOA_INTNLNEWS_CMN_20080407_210000 782.447 791.792
+VOA_INTNLNEWS_CMN_20080407_210000 805.239 807.598
+VOA_INTNLNEWS_CMN_20080407_210000 807.598 814.402
+VOA_INTNLNEWS_CMN_20080407_210000 814.402 821.717
+VOA_INTNLNEWS_CMN_20080407_210000 821.717 829.388
+VOA_INTNLNEWS_CMN_20080407_210000 841.458 850.089
+VOA_INTNLNEWS_CMN_20080407_210000 850.089 858.083
+VOA_INTNLNEWS_CMN_20080407_210000 874.398 883.260
+VOA_INTNLNEWS_CMN_20080407_210000 883.260 888.318
+VOA_INTNLNEWS_CMN_20080407_210000 888.318 899.500
+VOA_INTNLNEWS_CMN_20080407_210000 899.500 908.781
+VOA_INTNLNEWS_CMN_20080407_210000 908.781 911.891
+VOA_INTNLNEWS_CMN_20080407_210000 921.144 927.503
+VOA_INTNLNEWS_CMN_20080407_210000 927.503 929.784
+VOA_INTNLNEWS_CMN_20080407_210000 929.784 935.344
+VOA_INTNLNEWS_CMN_20080407_210000 939.610 940.938
+VOA_INTNLNEWS_CMN_20080407_210000 940.938 954.642
+VOA_INTNLNEWS_CMN_20080407_210000 954.642 960.000
+CCTV1_30MINNEWS_CMN_20080401_115901 85.222 89.924
+CCTV1_30MINNEWS_CMN_20080401_115901 89.924 95.829
+CCTV1_30MINNEWS_CMN_20080401_115901 95.829 97.422
+CCTV1_30MINNEWS_CMN_20080401_115901 102.422 104.000
+CCTV1_30MINNEWS_CMN_20080401_115901 104.000 105.875
+CCTV1_30MINNEWS_CMN_20080401_115901 109.032 118.016
+CCTV1_30MINNEWS_CMN_20080401_115901 118.016 126.157
+CCTV1_30MINNEWS_CMN_20080401_115901 136.095 145.486
+CCTV1_30MINNEWS_CMN_20080401_115901 145.486 150.205
+CCTV1_30MINNEWS_CMN_20080401_115901 150.205 154.564
+CCTV1_30MINNEWS_CMN_20080401_115901 154.564 163.595
+CCTV1_30MINNEWS_CMN_20080401_115901 163.595 170.470
+CCTV1_30MINNEWS_CMN_20080401_115901 170.470 188.345
+CCTV1_30MINNEWS_CMN_20080401_115901 188.345 198.344
+CCTV1_30MINNEWS_CMN_20080401_115901 203.819 213.616
+CCTV1_30MINNEWS_CMN_20080401_115901 220.490 232.554
+CCTV1_30MINNEWS_CMN_20080401_115901 246.908 250.142
+CCTV1_30MINNEWS_CMN_20080401_115901 250.142 255.127
+CCTV1_30MINNEWS_CMN_20080401_115901 302.818 317.366
+CCTV1_30MINNEWS_CMN_20080401_115901 317.366 327.835
+CCTV1_30MINNEWS_CMN_20080401_115901 327.835 343.663
+CCTV1_30MINNEWS_CMN_20080401_115901 343.663 363.819
+CCTV1_30MINNEWS_CMN_20080401_115901 363.819 371.726
+CCTV1_30MINNEWS_CMN_20080401_115901 378.585 392.147
+CCTV1_30MINNEWS_CMN_20080401_115901 408.912 413.584
+CCTV1_30MINNEWS_CMN_20080401_115901 419.818 425.162
+CCTV1_30MINNEWS_CMN_20080401_115901 456.905 466.562
+CCTV1_30MINNEWS_CMN_20080401_115901 466.562 470.655
+CCTV1_30MINNEWS_CMN_20080401_115901 529.681 534.759
+CCTV1_30MINNEWS_CMN_20080401_115901 547.275 559.868
+CCTV1_30MINNEWS_CMN_20080401_115901 565.837 571.415
+CCTV1_30MINNEWS_CMN_20080401_115901 571.415 577.602
+CCTV1_30MINNEWS_CMN_20080401_115901 577.602 585.805
+CCTV1_30MINNEWS_CMN_20080401_115901 585.805 597.804
+CCTV1_30MINNEWS_CMN_20080401_115901 597.804 607.071
+CCTV1_30MINNEWS_CMN_20080401_115901 607.071 616.259
+CCTV1_30MINNEWS_CMN_20080401_115901 624.491 626.413
+CCTV1_30MINNEWS_CMN_20080401_115901 636.069 645.226
+CCTV1_30MINNEWS_CMN_20080401_115901 645.226 649.758
+CCTV1_30MINNEWS_CMN_20080401_115901 649.758 659.382
+CCTV1_30MINNEWS_CMN_20080401_115901 659.382 671.784
+CCTV1_30MINNEWS_CMN_20080401_115901 671.784 677.556
+CCTV1_30MINNEWS_CMN_20080401_115901 677.556 684.025
+CCTV1_30MINNEWS_CMN_20080401_115901 707.748 713.936
+CCTV1_30MINNEWS_CMN_20080401_115901 713.936 718.764
+CCTV1_30MINNEWS_CMN_20080401_115901 723.544 731.932
+CCTV1_30MINNEWS_CMN_20080401_115901 731.932 744.026
+CCTV1_30MINNEWS_CMN_20080401_115901 744.026 754.933
+CCTV1_30MINNEWS_CMN_20080401_115901 754.933 764.152
+CCTV1_30MINNEWS_CMN_20080401_115901 764.152 773.428
+CCTV1_30MINNEWS_CMN_20080401_115901 820.385 831.979
+CCTV1_30MINNEWS_CMN_20080401_115901 841.400 851.994
+CCTV1_30MINNEWS_CMN_20080401_115901 851.994 862.322
+CCTV1_30MINNEWS_CMN_20080401_115901 862.322 874.791
+CCTV1_30MINNEWS_CMN_20080401_115901 874.791 885.995
+CCTV1_30MINNEWS_CMN_20080401_115901 885.995 889.370
+CCTV1_30MINNEWS_CMN_20080401_115901 892.855 901.621
+CCTV1_30MINNEWS_CMN_20080401_115901 911.893 914.710
+CCTV1_30MINNEWS_CMN_20080401_115901 911.893 914.710
+CCTV1_30MINNEWS_CMN_20080401_115901 914.710 917.397
+CCTV1_30MINNEWS_CMN_20080401_115901 917.397 924.053
+CCTV1_30MINNEWS_CMN_20080401_115901 931.038 936.263
+CCTV1_30MINNEWS_CMN_20080401_115901 931.038 936.263
+CCTV1_30MINNEWS_CMN_20080401_115901 936.263 939.179
+CCTV1_30MINNEWS_CMN_20080401_115901 939.179 945.444
+CCTV1_30MINNEWS_CMN_20080401_115901 945.444 947.240
+CCTV1_30MINNEWS_CMN_20080401_115901 947.240 953.901
+CCTV1_30MINNEWS_CMN_20080401_115901 953.901 963.386
+CCTV1_30MINNEWS_CMN_20080401_115901 963.386 968.344
+CCTV1_30MINNEWS_CMN_20080401_115901 968.344 970.704
+CCTV1_30MINNEWS_CMN_20080401_115901 970.704 978.282
+CCTV1_30MINNEWS_CMN_20080401_115901 978.282 988.954
+CCTV1_30MINNEWS_CMN_20080401_115901 1005.032 1013.407
+CCTV1_30MINNEWS_CMN_20080401_115901 1013.407 1024.837
+CCTV1_30MINNEWS_CMN_20080401_115901 1024.837 1034.786
+CCTV1_30MINNEWS_CMN_20080401_115901 1034.786 1040.630
+CCTV1_30MINNEWS_CMN_20080401_115901 1040.630 1046.676
+CCTV1_30MINNEWS_CMN_20080401_115901 1046.676 1056.661
+CCTV1_30MINNEWS_CMN_20080401_115901 1069.892 1075.122
+CCTV1_30MINNEWS_CMN_20080401_115901 1075.122 1079.746
+CCTV1_30MINNEWS_CMN_20080401_115901 1079.746 1091.308
+CCTV1_30MINNEWS_CMN_20080401_115901 1091.308 1098.371
+CCTV1_30MINNEWS_CMN_20080401_115901 1098.371 1101.075
+CCTV1_30MINNEWS_CMN_20080401_115901 1101.075 1108.388
+CCTV1_30MINNEWS_CMN_20080401_115901 1108.388 1119.623
+CCTV1_30MINNEWS_CMN_20080401_115901 1119.623 1126.187
+CCTV1_30MINNEWS_CMN_20080401_115901 1140.828 1146.876
+CCTV1_30MINNEWS_CMN_20080401_115901 1267.391 1269.171
+CCTV1_30MINNEWS_CMN_20080401_115901 1282.216 1294.325
+CCTV1_30MINNEWS_CMN_20080401_115901 1294.325 1307.232
+CCTV1_30MINNEWS_CMN_20080401_115901 1313.482 1318.857
+CCTV1_30MINNEWS_CMN_20080401_115901 1342.153 1349.573
+CCTV1_30MINNEWS_CMN_20080401_115901 1349.573 1364.808
+CCTV1_30MINNEWS_CMN_20080401_115901 1364.808 1376.246
+CCTV1_30MINNEWS_CMN_20080401_115901 1376.246 1387.886
+CCTV1_30MINNEWS_CMN_20080401_115901 1410.499 1418.593
+CCTV1_30MINNEWS_CMN_20080401_115901 1418.593 1426.749
+CCTV1_30MINNEWS_CMN_20080401_115901 1426.749 1431.161
+CCTV1_30MINNEWS_CMN_20080401_115901 1443.590 1450.496
+CCTV1_30MINNEWS_CMN_20080401_115901 1450.496 1465.137
+CCTV1_30MINNEWS_CMN_20080401_115901 1465.137 1467.012
+CCTV1_30MINNEWS_CMN_20080401_115901 1467.012 1482.019
+CCTV1_30MINNEWS_CMN_20080401_115901 1489.660 1491.582
+CCTV1_30MINNEWS_CMN_20080401_115901 1491.582 1504.181
+CCTV1_30MINNEWS_CMN_20080401_115901 1519.757 1522.929
+CCTV1_30MINNEWS_CMN_20080401_115901 1534.820 1544.318
+CCTV1_30MINNEWS_CMN_20080401_115901 1568.481 1580.521
+CCTV1_30MINNEWS_CMN_20080401_115901 1580.521 1592.336
+CCTV1_30MINNEWS_CMN_20080401_115901 1592.336 1603.430
+CCTV1_30MINNEWS_CMN_20080401_115901 1603.430 1612.086
+CCTV1_30MINNEWS_CMN_20080401_115901 1612.086 1620.405
+CCTV1_30MINNEWS_CMN_20080401_115901 1620.405 1634.986
+CCTV1_30MINNEWS_CMN_20080401_115901 1840.741 1843.569
+CCTV1_30MINNEWS_CMN_20080401_115901 1844.929 1845.460
+CCTV2_ECON30MIN_CMN_20080411_213502 226.331 228.722
+CCTV2_ECON30MIN_CMN_20080411_213502 228.722 252.270
+CCTV2_ECON30MIN_CMN_20080411_213502 252.270 260.640
+CCTV2_ECON30MIN_CMN_20080411_213502 260.640 266.796
+CCTV2_ECON30MIN_CMN_20080411_213502 266.796 270.624
+CCTV2_ECON30MIN_CMN_20080411_213502 270.624 273.249
+CCTV2_ECON30MIN_CMN_20080411_213502 273.249 289.630
+CCTV2_ECON30MIN_CMN_20080411_213502 289.630 292.248
+CCTV2_ECON30MIN_CMN_20080411_213502 292.248 317.922
+CCTV2_ECON30MIN_CMN_20080411_213502 317.922 324.940
+CCTV2_ECON30MIN_CMN_20080411_213502 324.940 328.652
+CCTV2_ECON30MIN_CMN_20080411_213502 328.652 330.839
+CCTV2_ECON30MIN_CMN_20080411_213502 330.839 337.152
+CCTV2_ECON30MIN_CMN_20080411_213502 337.152 352.512
+CCTV2_ECON30MIN_CMN_20080411_213502 352.512 370.100
+CCTV2_ECON30MIN_CMN_20080411_213502 370.100 378.838
+CCTV2_ECON30MIN_CMN_20080411_213502 378.838 387.213
+CCTV2_ECON30MIN_CMN_20080411_213502 387.213 402.945
+CCTV2_ECON30MIN_CMN_20080411_213502 414.951 419.264
+CCTV2_ECON30MIN_CMN_20080411_213502 419.264 436.465
+CCTV2_ECON30MIN_CMN_20080411_213502 436.465 459.294
+CCTV2_ECON30MIN_CMN_20080411_213502 459.294 469.013
+CCTV2_ECON30MIN_CMN_20080411_213502 469.013 474.887
+CCTV2_ECON30MIN_CMN_20080411_213502 474.887 484.437
+CCTV2_ECON30MIN_CMN_20080411_213502 484.437 492.967
+CCTV2_ECON30MIN_CMN_20080411_213502 504.045 511.576
+CCTV2_ECON30MIN_CMN_20080411_213502 511.576 525.608
+CCTV2_ECON30MIN_CMN_20080411_213502 525.608 529.438
+CCTV2_ECON30MIN_CMN_20080411_213502 529.438 539.031
+CCTV2_ECON30MIN_CMN_20080411_213502 539.031 553.022
+CCTV2_ECON30MIN_CMN_20080411_213502 563.708 572.630
+CCTV2_ECON30MIN_CMN_20080411_213502 572.630 585.004
+CCTV2_ECON30MIN_CMN_20080411_213502 585.004 600.690
+CCTV2_ECON30MIN_CMN_20080411_213502 600.690 609.373
+CCTV2_ECON30MIN_CMN_20080411_213502 609.373 614.607
+CCTV2_ECON30MIN_CMN_20080411_213502 614.607 624.398
+CCTV2_ECON30MIN_CMN_20080411_213502 630.178 640.601
+CCTV2_ECON30MIN_CMN_20080411_213502 640.601 646.211
+CCTV2_ECON30MIN_CMN_20080411_213502 646.211 658.909
+CCTV2_ECON30MIN_CMN_20080411_213502 658.909 668.457
+CCTV2_ECON30MIN_CMN_20080411_213502 668.457 674.442
+CCTV2_ECON30MIN_CMN_20080411_213502 674.442 697.143
+CCTV2_ECON30MIN_CMN_20080411_213502 697.143 703.089
+CCTV2_ECON30MIN_CMN_20080411_213502 703.089 716.961
+CCTV2_ECON30MIN_CMN_20080411_213502 716.961 732.134
+CCTV2_ECON30MIN_CMN_20080411_213502 732.134 749.540
+CCTV2_ECON30MIN_CMN_20080411_213502 749.540 761.493
+CCTV2_ECON30MIN_CMN_20080411_213502 761.493 770.366
+CCTV2_ECON30MIN_CMN_20080411_213502 770.366 778.417
+CCTV2_ECON30MIN_CMN_20080411_213502 926.746 937.355
+CCTV2_ECON30MIN_CMN_20080411_213502 946.260 967.308
+CCTV2_ECON30MIN_CMN_20080411_213502 967.308 976.011
+CCTV2_ECON30MIN_CMN_20080411_213502 976.011 985.306
+CCTV2_ECON30MIN_CMN_20080411_213502 985.306 987.754
+CCTV2_ECON30MIN_CMN_20080411_213502 987.754 990.176
+CCTV2_ECON30MIN_CMN_20080411_213502 993.364 996.443
+CCTV2_ECON30MIN_CMN_20080411_213502 996.443 1014.180
+CCTV2_ECON30MIN_CMN_20080411_213502 1014.180 1018.570
+CCTV2_ECON30MIN_CMN_20080411_213502 1018.570 1023.804
+CCTV2_ECON30MIN_CMN_20080411_213502 1023.804 1034.678
+CCTV2_ECON30MIN_CMN_20080411_213502 1034.678 1039.300
+CCTV2_ECON30MIN_CMN_20080411_213502 1039.300 1047.890
+CCTV2_ECON30MIN_CMN_20080411_213502 1047.890 1056.781
+CCTV2_ECON30MIN_CMN_20080411_213502 1056.781 1066.354
+CCTV2_ECON30MIN_CMN_20080411_213502 1079.666 1085.854
+CCTV2_ECON30MIN_CMN_20080411_213502 1085.854 1100.354
+CCTV2_ECON30MIN_CMN_20080411_213502 1100.354 1112.058
+CCTV2_ECON30MIN_CMN_20080411_213502 1112.058 1118.740
+CCTV2_ECON30MIN_CMN_20080411_213502 1118.740 1137.460
+CCTV2_ECON30MIN_CMN_20080411_213502 1137.460 1143.819
+CCTV2_ECON30MIN_CMN_20080411_213502 1160.526 1166.682
+CCTV2_ECON30MIN_CMN_20080411_213502 1166.682 1176.721
+CCTV2_ECON30MIN_CMN_20080411_213502 1182.048 1195.365
+CCTV2_ECON30MIN_CMN_20080411_213502 1195.365 1202.993
+CCTV2_ECON30MIN_CMN_20080411_213502 1202.993 1210.688
+CCTV2_ECON30MIN_CMN_20080411_213502 1383.569 1402.834
+CCTV2_ECON30MIN_CMN_20080411_213502 1402.834 1411.302
+CCTV2_ECON30MIN_CMN_20080411_213502 1470.714 1477.370
+CCTV2_ECON30MIN_CMN_20080411_213502 1477.370 1501.710
+CCTV2_ECON30MIN_CMN_20080411_213502 1501.710 1510.116
+CCTV2_ECON30MIN_CMN_20080411_213502 1510.116 1535.443
+CCTV2_ECON30MIN_CMN_20080411_213502 1535.443 1562.162
+CCTV2_ECON30MIN_CMN_20080411_213502 1562.162 1567.240
+CCTV2_ECON30MIN_CMN_20080411_213502 1589.601 1597.726
+CCTV2_ECON30MIN_CMN_20080411_213502 1597.726 1606.148
+CCTV2_ECON30MIN_CMN_20080411_213502 1606.148 1614.374
+CCTV2_ECON30MIN_CMN_20080411_213502 1614.374 1620.586
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 234.403 240.551
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 240.551 242.238
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 242.238 250.802
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 250.802 256.965
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 256.965 262.048
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 262.048 264.220
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 284.799 294.158
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 301.192 311.661
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 311.661 323.645
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 343.659 348.395
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 348.395 356.395
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 356.395 364.144
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 364.144 368.363
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 385.957 393.097
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 399.965 409.148
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 413.070 424.258
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 460.726 471.350
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 471.350 481.348
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 489.202 493.732
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 493.732 500.089
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 500.089 510.916
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 510.916 520.229
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 520.229 531.307
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 542.041 553.788
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 566.473 577.397
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 585.944 589.741
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 589.741 600.400
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 609.951 619.279
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 619.279 627.451
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 632.717 640.046
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 640.046 652.514
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 652.514 658.607
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 658.607 668.138
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 668.138 673.029
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 673.029 675.888
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 675.888 685.277
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 695.403 700.793
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 700.793 714.278
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 714.278 725.465
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 746.903 758.388
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 768.904 775.982
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 775.982 790.061
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 800.358 806.921
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 820.892 825.741
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 825.741 838.585
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 838.585 842.990
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 874.229 883.414
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 883.414 891.695
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 891.695 896.804
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 910.211 920.194
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 983.712 996.213
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1007.869 1030.963
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1030.963 1042.526
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1042.526 1059.981
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1059.981 1063.090
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1074.863 1087.003
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1087.003 1097.775
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1110.949 1115.028
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1115.028 1127.872
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1139.673 1148.751
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1148.751 1154.611
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1154.611 1161.048
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1161.048 1169.095
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1169.095 1178.980
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1178.980 1183.199
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1207.789 1220.694
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1230.441 1240.081
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1250.941 1259.676
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1259.676 1274.550
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1277.191 1286.473
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1292.364 1301.536
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1314.103 1329.396
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1329.396 1340.288
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1350.351 1356.914
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1367.806 1382.602
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1387.087 1392.603
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 1392.603 1395.415
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2068.093 2069.921
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2069.921 2078.609
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2089.484 2098.468
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2115.561 2127.229
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2146.852 2151.586
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2151.586 2159.102
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2159.102 2166.461
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2166.461 2171.180
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2189.039 2196.508
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2196.508 2202.852
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2202.852 2212.227
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2212.227 2215.696
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2215.696 2226.915
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2226.915 2238.087
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2250.040 2256.743
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2256.743 2262.649
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2262.649 2272.774
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2272.774 2283.040
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2283.040 2290.884
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2296.352 2304.415
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2304.415 2310.134
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2316.899 2320.621
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2320.621 2322.903
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2322.903 2324.497
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2329.137 2344.855
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2344.855 2351.417
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2351.417 2359.730
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2359.730 2372.872
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2372.872 2382.091
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2391.154 2396.357
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2396.357 2401.403
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2401.403 2414.996
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2425.667 2433.544
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2437.184 2444.524
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2444.524 2457.757
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2457.757 2480.039
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2480.039 2491.806
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2521.898 2534.353
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2534.353 2539.103
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2539.103 2544.055
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2550.086 2561.726
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2561.726 2568.085
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2568.085 2572.866
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2572.866 2591.135
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2591.135 2605.054
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2605.054 2611.399
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2611.399 2617.197
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2617.197 2623.411
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2646.654 2663.341
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2677.645 2697.408
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2697.408 2701.424
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2701.424 2707.034
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2721.551 2739.751
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2739.751 2749.853
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2749.853 2768.477
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2768.477 2771.836
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2781.421 2788.695
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2788.695 2799.925
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2799.925 2808.614
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2822.424 2842.392
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2915.220 2924.248
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2939.951 2947.748
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2947.748 2954.068
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2961.457 2970.768
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2970.768 2985.440
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 2985.440 2993.425
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3037.420 3049.810
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3049.810 3057.092
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3057.092 3067.842
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3067.842 3087.422
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3107.811 3124.326
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3158.343 3169.749
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3169.749 3196.089
+VOA_CURRENTEVENTSMORNING_CMN_20080405_090000 3203.200 3207.825
+CCTV1_30MINNEWS_CMN_20080328_115902 73.585 78.148
+CCTV1_30MINNEWS_CMN_20080328_115902 89.847 97.422
+CCTV1_30MINNEWS_CMN_20080328_115902 97.422 98.704
+CCTV1_30MINNEWS_CMN_20080328_115902 101.281 102.766
+CCTV1_30MINNEWS_CMN_20080328_115902 102.766 105.297
+CCTV1_30MINNEWS_CMN_20080328_115902 147.171 160.264
+CCTV1_30MINNEWS_CMN_20080328_115902 160.264 170.358
+CCTV1_30MINNEWS_CMN_20080328_115902 175.546 183.406
+CCTV1_30MINNEWS_CMN_20080328_115902 215.554 226.321
+CCTV1_30MINNEWS_CMN_20080328_115902 226.321 236.103
+CCTV1_30MINNEWS_CMN_20080328_115902 236.103 244.665
+CCTV1_30MINNEWS_CMN_20080328_115902 264.945 275.477
+CCTV1_30MINNEWS_CMN_20080328_115902 275.477 294.895
+CCTV1_30MINNEWS_CMN_20080328_115902 311.243 323.868
+CCTV1_30MINNEWS_CMN_20080328_115902 323.868 331.415
+CCTV1_30MINNEWS_CMN_20080328_115902 358.102 363.400
+CCTV1_30MINNEWS_CMN_20080328_115902 363.400 374.715
+CCTV1_30MINNEWS_CMN_20080328_115902 374.715 379.387
+CCTV1_30MINNEWS_CMN_20080328_115902 379.387 398.543
+CCTV1_30MINNEWS_CMN_20080328_115902 398.543 408.597
+CCTV1_30MINNEWS_CMN_20080328_115902 408.597 423.112
+CCTV1_30MINNEWS_CMN_20080328_115902 439.284 450.939
+CCTV1_30MINNEWS_CMN_20080328_115902 450.939 460.580
+CCTV1_30MINNEWS_CMN_20080328_115902 460.580 465.071
+CCTV1_30MINNEWS_CMN_20080328_115902 492.309 508.319
+CCTV1_30MINNEWS_CMN_20080328_115902 508.319 526.023
+CCTV1_30MINNEWS_CMN_20080328_115902 526.023 533.242
+CCTV1_30MINNEWS_CMN_20080328_115902 533.242 541.180
+CCTV1_30MINNEWS_CMN_20080328_115902 545.978 549.009
+CCTV1_30MINNEWS_CMN_20080328_115902 555.837 561.697
+CCTV1_30MINNEWS_CMN_20080328_115902 587.590 590.840
+CCTV1_30MINNEWS_CMN_20080328_115902 663.955 675.572
+CCTV1_30MINNEWS_CMN_20080328_115902 703.216 716.623
+CCTV1_30MINNEWS_CMN_20080328_115902 770.585 781.493
+CCTV1_30MINNEWS_CMN_20080328_115902 792.062 798.172
+CCTV1_30MINNEWS_CMN_20080328_115902 798.172 808.082
+CCTV1_30MINNEWS_CMN_20080328_115902 808.082 827.097
+CCTV1_30MINNEWS_CMN_20080328_115902 827.097 836.316
+CCTV1_30MINNEWS_CMN_20080328_115902 836.316 845.067
+CCTV1_30MINNEWS_CMN_20080328_115902 845.067 850.661
+CCTV1_30MINNEWS_CMN_20080328_115902 850.661 859.928
+CCTV1_30MINNEWS_CMN_20080328_115902 859.928 869.332
+CCTV1_30MINNEWS_CMN_20080328_115902 869.332 877.643
+CCTV1_30MINNEWS_CMN_20080328_115902 893.080 903.055
+CCTV1_30MINNEWS_CMN_20080328_115902 903.055 910.477
+CCTV1_30MINNEWS_CMN_20080328_115902 910.477 922.368
+CCTV1_30MINNEWS_CMN_20080328_115902 1038.231 1048.715
+CCTV1_30MINNEWS_CMN_20080328_115902 1048.715 1057.403
+CCTV1_30MINNEWS_CMN_20080328_115902 1065.105 1073.324
+CCTV1_30MINNEWS_CMN_20080328_115902 1073.324 1080.652
+CCTV1_30MINNEWS_CMN_20080328_115902 1103.562 1111.328
+CCTV1_30MINNEWS_CMN_20080328_115902 1122.832 1133.020
+CCTV1_30MINNEWS_CMN_20080328_115902 1133.020 1135.973
+CCTV1_30MINNEWS_CMN_20080328_115902 1178.629 1185.410
+CCTV1_30MINNEWS_CMN_20080328_115902 1185.410 1189.379
+CCTV1_30MINNEWS_CMN_20080328_115902 1196.254 1201.832
+CCTV1_30MINNEWS_CMN_20080328_115902 1201.832 1206.988
+CCTV1_30MINNEWS_CMN_20080328_115902 1235.681 1238.947
+CCTV1_30MINNEWS_CMN_20080328_115902 1243.041 1255.108
+CCTV1_30MINNEWS_CMN_20080328_115902 1255.108 1269.918
+CCTV1_30MINNEWS_CMN_20080328_115902 1269.918 1282.816
+CCTV1_30MINNEWS_CMN_20080328_115902 1295.253 1305.726
+CCTV1_30MINNEWS_CMN_20080328_115902 1305.726 1320.361
+CCTV1_30MINNEWS_CMN_20080328_115902 1320.361 1336.776
+CCTV1_30MINNEWS_CMN_20080328_115902 1371.446 1377.617
+CCTV1_30MINNEWS_CMN_20080328_115902 1377.617 1393.383
+CCTV1_30MINNEWS_CMN_20080328_115902 1393.383 1402.274
+CCTV1_30MINNEWS_CMN_20080328_115902 1402.274 1414.181
+CCTV1_30MINNEWS_CMN_20080328_115902 1414.181 1423.133
+CCTV1_30MINNEWS_CMN_20080328_115902 1430.368 1437.976
+CCTV1_30MINNEWS_CMN_20080328_115902 1437.976 1442.397
+CCTV1_30MINNEWS_CMN_20080328_115902 1458.631 1466.991
+CCTV1_30MINNEWS_CMN_20080328_115902 1466.991 1481.444
+CCTV1_30MINNEWS_CMN_20080328_115902 1481.444 1489.477
+CCTV1_30MINNEWS_CMN_20080328_115902 1489.477 1500.758
+CCTV1_30MINNEWS_CMN_20080328_115902 1500.758 1505.977
+CCTV1_30MINNEWS_CMN_20080328_115902 1505.977 1522.180
+CCTV1_30MINNEWS_CMN_20080328_115902 1522.180 1525.618
+CCTV1_30MINNEWS_CMN_20080328_115902 1525.618 1538.791
+CCTV1_30MINNEWS_CMN_20080328_115902 1538.791 1551.387
+CCTV1_30MINNEWS_CMN_20080328_115902 1559.528 1562.582
+CCTV1_30MINNEWS_CMN_20080328_115902 1562.582 1567.754
+CCTV1_30MINNEWS_CMN_20080328_115902 1567.754 1576.798
+CCTV1_30MINNEWS_CMN_20080328_115902 1580.954 1585.971
+CCTV1_30MINNEWS_CMN_20080328_115902 1589.988 1597.159
+CCTV1_30MINNEWS_CMN_20080328_115902 1597.159 1601.471
+CCTV1_30MINNEWS_CMN_20080328_115902 1601.471 1604.846
+CCTV1_30MINNEWS_CMN_20080328_115902 1617.376 1621.814
+CCTV1_30MINNEWS_CMN_20080328_115902 1842.454 1845.478
+CCTV1_30MINNEWS_CMN_20080328_115902 1846.885 1847.541
+CCTV2_ECON30MIN_CMN_20080406_213518 0.000 1.718
+CCTV2_ECON30MIN_CMN_20080406_213518 1.718 3.422
+CCTV2_ECON30MIN_CMN_20080406_213518 7.172 9.220
+CCTV2_ECON30MIN_CMN_20080406_213518 9.220 13.736
+CCTV2_ECON30MIN_CMN_20080406_213518 13.736 18.621
+CCTV2_ECON30MIN_CMN_20080406_213518 18.621 21.713
+CCTV2_ECON30MIN_CMN_20080406_213518 21.713 26.541
+CCTV2_ECON30MIN_CMN_20080406_213518 28.416 30.427
+CCTV2_ECON30MIN_CMN_20080406_213518 58.388 60.122
+CCTV2_ECON30MIN_CMN_20080406_213518 64.686 70.087
+CCTV2_ECON30MIN_CMN_20080406_213518 72.228 75.150
+CCTV2_ECON30MIN_CMN_20080406_213518 80.948 88.997
+CCTV2_ECON30MIN_CMN_20080406_213518 88.997 95.154
+CCTV2_ECON30MIN_CMN_20080406_213518 95.154 101.013
+CCTV2_ECON30MIN_CMN_20080406_213518 101.013 108.839
+CCTV2_ECON30MIN_CMN_20080406_213518 108.839 118.776
+CCTV2_ECON30MIN_CMN_20080406_213518 118.776 123.338
+CCTV2_ECON30MIN_CMN_20080406_213518 123.338 134.854
+CCTV2_ECON30MIN_CMN_20080406_213518 134.854 139.448
+CCTV2_ECON30MIN_CMN_20080406_213518 139.448 145.113
+CCTV2_ECON30MIN_CMN_20080406_213518 145.113 151.660
+CCTV2_ECON30MIN_CMN_20080406_213518 151.660 155.832
+CCTV2_ECON30MIN_CMN_20080406_213518 155.832 161.833
+CCTV2_ECON30MIN_CMN_20080406_213518 161.833 168.074
+CCTV2_ECON30MIN_CMN_20080406_213518 188.387 194.091
+CCTV2_ECON30MIN_CMN_20080406_213518 194.091 198.701
+CCTV2_ECON30MIN_CMN_20080406_213518 198.701 206.083
+CCTV2_ECON30MIN_CMN_20080406_213518 220.551 225.332
+CCTV2_ECON30MIN_CMN_20080406_213518 225.332 231.768
+CCTV2_ECON30MIN_CMN_20080406_213518 231.768 240.471
+CCTV2_ECON30MIN_CMN_20080406_213518 240.471 249.129
+CCTV2_ECON30MIN_CMN_20080406_213518 255.113 267.580
+CCTV2_ECON30MIN_CMN_20080406_213518 285.363 287.535
+CCTV2_ECON30MIN_CMN_20080406_213518 287.535 293.799
+CCTV2_ECON30MIN_CMN_20080406_213518 293.799 295.956
+CCTV2_ECON30MIN_CMN_20080406_213518 295.956 296.385
+CCTV2_ECON30MIN_CMN_20080406_213518 296.385 298.365
+CCTV2_ECON30MIN_CMN_20080406_213518 298.365 299.968
+CCTV2_ECON30MIN_CMN_20080406_213518 299.968 311.067
+CCTV2_ECON30MIN_CMN_20080406_213518 299.968 311.067
+CCTV2_ECON30MIN_CMN_20080406_213518 323.403 329.823
+CCTV2_ECON30MIN_CMN_20080406_213518 329.823 333.286
+CCTV2_ECON30MIN_CMN_20080406_213518 338.990 341.490
+CCTV2_ECON30MIN_CMN_20080406_213518 341.490 356.420
+CCTV2_ECON30MIN_CMN_20080406_213518 356.420 357.967
+CCTV2_ECON30MIN_CMN_20080406_213518 357.967 365.305
+CCTV2_ECON30MIN_CMN_20080406_213518 365.305 378.789
+CCTV2_ECON30MIN_CMN_20080406_213518 365.305 378.789
+CCTV2_ECON30MIN_CMN_20080406_213518 378.789 386.429
+CCTV2_ECON30MIN_CMN_20080406_213518 386.429 389.367
+CCTV2_ECON30MIN_CMN_20080406_213518 389.367 395.906
+CCTV2_ECON30MIN_CMN_20080406_213518 395.906 397.055
+CCTV2_ECON30MIN_CMN_20080406_213518 397.055 398.774
+CCTV2_ECON30MIN_CMN_20080406_213518 408.787 412.318
+CCTV2_ECON30MIN_CMN_20080406_213518 412.318 413.005
+CCTV2_ECON30MIN_CMN_20080406_213518 413.005 415.518
+CCTV2_ECON30MIN_CMN_20080406_213518 415.518 419.738
+CCTV2_ECON30MIN_CMN_20080406_213518 419.738 429.663
+CCTV2_ECON30MIN_CMN_20080406_213518 429.663 434.426
+CCTV2_ECON30MIN_CMN_20080406_213518 434.426 443.773
+CCTV2_ECON30MIN_CMN_20080406_213518 446.835 457.057
+CCTV2_ECON30MIN_CMN_20080406_213518 457.057 462.323
+CCTV2_ECON30MIN_CMN_20080406_213518 462.323 469.529
+CCTV2_ECON30MIN_CMN_20080406_213518 469.529 472.967
+CCTV2_ECON30MIN_CMN_20080406_213518 479.530 491.496
+CCTV2_ECON30MIN_CMN_20080406_213518 491.496 496.606
+CCTV2_ECON30MIN_CMN_20080406_213518 496.606 502.256
+CCTV2_ECON30MIN_CMN_20080406_213518 502.256 505.790
+CCTV2_ECON30MIN_CMN_20080406_213518 505.790 515.337
+CCTV2_ECON30MIN_CMN_20080406_213518 515.337 521.148
+CCTV2_ECON30MIN_CMN_20080406_213518 523.195 535.383
+CCTV2_ECON30MIN_CMN_20080406_213518 535.383 546.032
+CCTV2_ECON30MIN_CMN_20080406_213518 546.032 551.642
+CCTV2_ECON30MIN_CMN_20080406_213518 551.642 556.986
+CCTV2_ECON30MIN_CMN_20080406_213518 556.986 560.034
+CCTV2_ECON30MIN_CMN_20080406_213518 560.034 565.846
+CCTV2_ECON30MIN_CMN_20080406_213518 565.846 575.988
+CCTV2_ECON30MIN_CMN_20080406_213518 565.846 575.988
+CCTV2_ECON30MIN_CMN_20080406_213518 575.988 582.207
+CCTV2_ECON30MIN_CMN_20080406_213518 590.707 595.290
+CCTV2_ECON30MIN_CMN_20080406_213518 595.290 599.559
+CCTV2_ECON30MIN_CMN_20080406_213518 595.290 599.559
+CCTV2_ECON30MIN_CMN_20080406_213518 599.559 600.394
+CCTV2_ECON30MIN_CMN_20080406_213518 600.394 613.443
+CCTV2_ECON30MIN_CMN_20080406_213518 613.443 617.974
+CCTV2_ECON30MIN_CMN_20080406_213518 613.443 617.974
+CCTV2_ECON30MIN_CMN_20080406_213518 618.677 625.864
+CCTV2_ECON30MIN_CMN_20080406_213518 625.864 631.568
+CCTV2_ECON30MIN_CMN_20080406_213518 625.864 631.568
+CCTV2_ECON30MIN_CMN_20080406_213518 631.568 635.333
+CCTV2_ECON30MIN_CMN_20080406_213518 635.333 638.583
+CCTV2_ECON30MIN_CMN_20080406_213518 638.583 647.041
+CCTV2_ECON30MIN_CMN_20080406_213518 653.089 655.154
+CCTV2_ECON30MIN_CMN_20080406_213518 655.154 655.779
+CCTV2_ECON30MIN_CMN_20080406_213518 655.779 657.685
+CCTV2_ECON30MIN_CMN_20080406_213518 657.685 658.974
+CCTV2_ECON30MIN_CMN_20080406_213518 658.974 660.568
+CCTV2_ECON30MIN_CMN_20080406_213518 660.568 663.224
+CCTV2_ECON30MIN_CMN_20080406_213518 663.224 663.927
+CCTV2_ECON30MIN_CMN_20080406_213518 663.927 664.522
+CCTV2_ECON30MIN_CMN_20080406_213518 664.522 665.319
+CCTV2_ECON30MIN_CMN_20080406_213518 665.319 665.819
+CCTV2_ECON30MIN_CMN_20080406_213518 665.819 668.867
+CCTV2_ECON30MIN_CMN_20080406_213518 668.867 670.429
+CCTV2_ECON30MIN_CMN_20080406_213518 670.429 671.841
+CCTV2_ECON30MIN_CMN_20080406_213518 671.841 674.435
+CCTV2_ECON30MIN_CMN_20080406_213518 674.435 675.185
+CCTV2_ECON30MIN_CMN_20080406_213518 675.185 675.828
+CCTV2_ECON30MIN_CMN_20080406_213518 675.828 676.484
+CCTV2_ECON30MIN_CMN_20080406_213518 676.484 679.812
+CCTV2_ECON30MIN_CMN_20080406_213518 679.812 682.875
+CCTV2_ECON30MIN_CMN_20080406_213518 687.361 688.937
+CCTV2_ECON30MIN_CMN_20080406_213518 688.937 690.010
+CCTV2_ECON30MIN_CMN_20080406_213518 690.010 693.164
+CCTV2_ECON30MIN_CMN_20080406_213518 693.164 706.206
+CCTV2_ECON30MIN_CMN_20080406_213518 706.206 710.581
+CCTV2_ECON30MIN_CMN_20080406_213518 716.791 720.277
+CCTV2_ECON30MIN_CMN_20080406_213518 720.277 722.683
+CCTV2_ECON30MIN_CMN_20080406_213518 734.568 739.879
+CCTV2_ECON30MIN_CMN_20080406_213518 739.879 745.467
+CCTV2_ECON30MIN_CMN_20080406_213518 739.879 745.467
+CCTV2_ECON30MIN_CMN_20080406_213518 745.467 753.304
+CCTV2_ECON30MIN_CMN_20080406_213518 753.304 754.304
+CCTV2_ECON30MIN_CMN_20080406_213518 754.304 757.335
+CCTV2_ECON30MIN_CMN_20080406_213518 757.335 757.898
+CCTV2_ECON30MIN_CMN_20080406_213518 757.898 758.570
+CCTV2_ECON30MIN_CMN_20080406_213518 758.570 761.897
+CCTV2_ECON30MIN_CMN_20080406_213518 758.570 761.897
+CCTV2_ECON30MIN_CMN_20080406_213518 761.897 763.366
+CCTV2_ECON30MIN_CMN_20080406_213518 761.897 763.366
+CCTV2_ECON30MIN_CMN_20080406_213518 763.366 771.569
+CCTV2_ECON30MIN_CMN_20080406_213518 763.366 771.569
+CCTV2_ECON30MIN_CMN_20080406_213518 771.569 773.449
+CCTV2_ECON30MIN_CMN_20080406_213518 774.709 776.147
+CCTV2_ECON30MIN_CMN_20080406_213518 779.302 785.484
+CCTV2_ECON30MIN_CMN_20080406_213518 785.484 787.359
+CCTV2_ECON30MIN_CMN_20080406_213518 787.359 800.776
+CCTV2_ECON30MIN_CMN_20080406_213518 787.359 800.776
+CCTV2_ECON30MIN_CMN_20080406_213518 800.776 809.165
+CCTV2_ECON30MIN_CMN_20080406_213518 800.776 809.165
+CCTV2_ECON30MIN_CMN_20080406_213518 809.165 813.785
+CCTV2_ECON30MIN_CMN_20080406_213518 809.165 813.785
+CCTV2_ECON30MIN_CMN_20080406_213518 813.785 817.873
+CCTV2_ECON30MIN_CMN_20080406_213518 817.873 823.504
+CCTV2_ECON30MIN_CMN_20080406_213518 823.504 824.660
+CCTV2_ECON30MIN_CMN_20080406_213518 824.660 829.099
+CCTV2_ECON30MIN_CMN_20080406_213518 829.099 834.832
+CCTV2_ECON30MIN_CMN_20080406_213518 834.832 841.337
+CCTV2_ECON30MIN_CMN_20080406_213518 848.777 850.261
+CCTV2_ECON30MIN_CMN_20080406_213518 854.333 858.077
+CCTV2_ECON30MIN_CMN_20080406_213518 858.077 861.202
+CCTV2_ECON30MIN_CMN_20080406_213518 861.202 862.342
+CCTV2_ECON30MIN_CMN_20080406_213518 862.342 864.991
+CCTV2_ECON30MIN_CMN_20080406_213518 864.991 870.084
+CCTV2_ECON30MIN_CMN_20080406_213518 870.084 876.553
+CCTV2_ECON30MIN_CMN_20080406_213518 876.553 881.607
+CCTV2_ECON30MIN_CMN_20080406_213518 889.763 895.419
+CCTV2_ECON30MIN_CMN_20080406_213518 921.337 931.964
+CCTV2_ECON30MIN_CMN_20080406_213518 931.964 937.211
+CCTV2_ECON30MIN_CMN_20080406_213518 937.211 942.574
+CCTV2_ECON30MIN_CMN_20080406_213518 942.574 949.276
+CCTV2_ECON30MIN_CMN_20080406_213518 949.276 953.074
+CCTV2_ECON30MIN_CMN_20080406_213518 953.074 957.561
+CCTV2_ECON30MIN_CMN_20080406_213518 975.252 979.770
+CCTV2_ECON30MIN_CMN_20080406_213518 979.770 985.552
+CCTV2_ECON30MIN_CMN_20080406_213518 985.552 991.895
+CCTV2_ECON30MIN_CMN_20080406_213518 991.895 998.228
+CCTV2_ECON30MIN_CMN_20080406_213518 1008.139 1009.826
+CCTV2_ECON30MIN_CMN_20080406_213518 1009.826 1011.999
+CCTV2_ECON30MIN_CMN_20080406_213518 1011.999 1017.897
+CCTV2_ECON30MIN_CMN_20080406_213518 1017.897 1027.272
+CCTV2_ECON30MIN_CMN_20080406_213518 1027.272 1033.178
+CCTV2_ECON30MIN_CMN_20080406_213518 1047.754 1049.020
+CCTV2_ECON30MIN_CMN_20080406_213518 1053.755 1054.364
+CCTV2_ECON30MIN_CMN_20080406_213518 1062.989 1063.864
+CCTV2_ECON30MIN_CMN_20080406_213518 1065.694 1067.037
+CCTV2_ECON30MIN_CMN_20080406_213518 1069.600 1074.350
+CCTV2_ECON30MIN_CMN_20080406_213518 1091.039 1098.434
+CCTV2_ECON30MIN_CMN_20080406_213518 1106.726 1108.841
+CCTV2_ECON30MIN_CMN_20080406_213518 1108.841 1110.332
+CCTV2_ECON30MIN_CMN_20080406_213518 1120.191 1136.095
+CCTV2_ECON30MIN_CMN_20080406_213518 1136.095 1137.845
+CCTV2_ECON30MIN_CMN_20080406_213518 1137.845 1144.741
+CCTV2_ECON30MIN_CMN_20080406_213518 1137.845 1144.741
+CCTV2_ECON30MIN_CMN_20080406_213518 1144.741 1146.882
+CCTV2_ECON30MIN_CMN_20080406_213518 1146.882 1148.077
+CCTV2_ECON30MIN_CMN_20080406_213518 1148.077 1149.792
+CCTV2_ECON30MIN_CMN_20080406_213518 1150.956 1152.582
+CCTV2_ECON30MIN_CMN_20080406_213518 1152.582 1156.084
+CCTV2_ECON30MIN_CMN_20080406_213518 1152.582 1156.084
+CCTV2_ECON30MIN_CMN_20080406_213518 1156.084 1161.273
+CCTV2_ECON30MIN_CMN_20080406_213518 1161.273 1167.321
+CCTV2_ECON30MIN_CMN_20080406_213518 1161.273 1167.321
+CCTV2_ECON30MIN_CMN_20080406_213518 1167.321 1171.545
+CCTV2_ECON30MIN_CMN_20080406_213518 1171.545 1172.560
+CCTV2_ECON30MIN_CMN_20080406_213518 1172.560 1180.161
+CCTV2_ECON30MIN_CMN_20080406_213518 1187.631 1188.116
+CCTV2_ECON30MIN_CMN_20080406_213518 1188.116 1190.960
+CCTV2_ECON30MIN_CMN_20080406_213518 1190.960 1198.872
+CCTV2_ECON30MIN_CMN_20080406_213518 1198.872 1210.319
+CCTV2_ECON30MIN_CMN_20080406_213518 1210.319 1214.349
+CCTV2_ECON30MIN_CMN_20080406_213518 1214.349 1219.567
+CCTV2_ECON30MIN_CMN_20080406_213518 1219.567 1224.893
+CCTV2_ECON30MIN_CMN_20080406_213518 1224.893 1225.454
+CCTV2_ECON30MIN_CMN_20080406_213518 1225.454 1234.501
+CCTV2_ECON30MIN_CMN_20080406_213518 1234.501 1236.341
+CCTV2_ECON30MIN_CMN_20080406_213518 1234.501 1236.341
+CCTV2_ECON30MIN_CMN_20080406_213518 1236.341 1237.451
+CCTV2_ECON30MIN_CMN_20080406_213518 1237.451 1240.074
+CCTV2_ECON30MIN_CMN_20080406_213518 1240.074 1243.383
+CCTV2_ECON30MIN_CMN_20080406_213518 1243.383 1247.602
+CCTV2_ECON30MIN_CMN_20080406_213518 1247.602 1248.871
+CCTV2_ECON30MIN_CMN_20080406_213518 1248.871 1249.965
+CCTV2_ECON30MIN_CMN_20080406_213518 1249.965 1257.276
+CCTV2_ECON30MIN_CMN_20080406_213518 1257.276 1272.249
+CCTV2_ECON30MIN_CMN_20080406_213518 1257.276 1272.249
+CCTV2_ECON30MIN_CMN_20080406_213518 1272.249 1274.870
+CCTV2_ECON30MIN_CMN_20080406_213518 1272.249 1274.870
+CCTV2_ECON30MIN_CMN_20080406_213518 1274.870 1276.069
+CCTV2_ECON30MIN_CMN_20080406_213518 1276.069 1288.963
+CCTV2_ECON30MIN_CMN_20080406_213518 1276.069 1288.963
+CCTV2_ECON30MIN_CMN_20080406_213518 1288.963 1295.867
+CCTV2_ECON30MIN_CMN_20080406_213518 1288.963 1295.867
+CCTV2_ECON30MIN_CMN_20080406_213518 1295.867 1304.306
+CCTV2_ECON30MIN_CMN_20080406_213518 1314.324 1327.147
+CCTV2_ECON30MIN_CMN_20080406_213518 1327.147 1334.457
+CCTV2_ECON30MIN_CMN_20080406_213518 1334.457 1343.164
+CCTV2_ECON30MIN_CMN_20080406_213518 1343.164 1348.476
+CCTV2_ECON30MIN_CMN_20080406_213518 1348.476 1349.914
+CCTV2_ECON30MIN_CMN_20080406_213518 1349.914 1350.805
+CCTV2_ECON30MIN_CMN_20080406_213518 1350.805 1359.195
+CCTV2_ECON30MIN_CMN_20080406_213518 1359.195 1369.992
+CCTV2_ECON30MIN_CMN_20080406_213518 1369.992 1380.569
+CCTV2_ECON30MIN_CMN_20080406_213518 1380.569 1390.097
+CCTV2_ECON30MIN_CMN_20080406_213518 1390.659 1397.930
+CCTV2_ECON30MIN_CMN_20080406_213518 1397.930 1406.703
+CCTV2_ECON30MIN_CMN_20080406_213518 1406.703 1407.265
+CCTV2_ECON30MIN_CMN_20080406_213518 1407.265 1415.593
+CCTV2_ECON30MIN_CMN_20080406_213518 1415.593 1420.289
+CCTV2_ECON30MIN_CMN_20080406_213518 1420.289 1423.805
+CCTV2_ECON30MIN_CMN_20080406_213518 1423.805 1432.557
+CCTV2_ECON30MIN_CMN_20080406_213518 1432.557 1439.526
+CCTV2_ECON30MIN_CMN_20080406_213518 1439.526 1450.977
+CCTV2_ECON30MIN_CMN_20080406_213518 1439.526 1450.977
+CCTV2_ECON30MIN_CMN_20080406_213518 1450.977 1457.269
+CCTV2_ECON30MIN_CMN_20080406_213518 1457.269 1457.878
+CCTV2_ECON30MIN_CMN_20080406_213518 1457.878 1462.424
+CCTV2_ECON30MIN_CMN_20080406_213518 1462.424 1465.221
+CCTV2_ECON30MIN_CMN_20080406_213518 1465.221 1467.455
+CCTV2_ECON30MIN_CMN_20080406_213518 1465.221 1467.455
+CCTV2_ECON30MIN_CMN_20080406_213518 1467.455 1470.316
+CCTV2_ECON30MIN_CMN_20080406_213518 1470.316 1471.863
+CCTV2_ECON30MIN_CMN_20080406_213518 1471.863 1472.848
+CCTV2_ECON30MIN_CMN_20080406_213518 1472.848 1474.017
+CCTV2_ECON30MIN_CMN_20080406_213518 1474.017 1475.477
+CCTV2_ECON30MIN_CMN_20080406_213518 1475.477 1478.346
+CCTV2_ECON30MIN_CMN_20080406_213518 1478.346 1480.534
+CCTV2_ECON30MIN_CMN_20080406_213518 1480.534 1483.838
+CCTV2_ECON30MIN_CMN_20080406_213518 1480.534 1483.838
+CCTV2_ECON30MIN_CMN_20080406_213518 1483.838 1487.126
+CCTV2_ECON30MIN_CMN_20080406_213518 1487.126 1491.150
+CCTV2_ECON30MIN_CMN_20080406_213518 1491.150 1492.237
+CCTV2_ECON30MIN_CMN_20080406_213518 1492.237 1494.518
+CCTV2_ECON30MIN_CMN_20080406_213518 1492.237 1494.518
+CCTV2_ECON30MIN_CMN_20080406_213518 1494.518 1497.971
+CCTV2_ECON30MIN_CMN_20080406_213518 1497.971 1503.034
+CCTV2_ECON30MIN_CMN_20080406_213518 1503.034 1506.409
+CCTV2_ECON30MIN_CMN_20080406_213518 1506.409 1508.689
+CCTV2_ECON30MIN_CMN_20080406_213518 1508.689 1516.722
+CCTV2_ECON30MIN_CMN_20080406_213518 1516.722 1522.285
+CCTV2_ECON30MIN_CMN_20080406_213518 1530.286 1541.613
+CCTV2_ECON30MIN_CMN_20080406_213518 1541.613 1548.549
+CCTV2_ECON30MIN_CMN_20080406_213518 1554.427 1558.110
+CCTV2_ECON30MIN_CMN_20080406_213518 1558.110 1578.287
+CCTV2_ECON30MIN_CMN_20080406_213518 1578.287 1588.944
+CCTV2_ECON30MIN_CMN_20080406_213518 1588.944 1595.663
+CCTV2_ECON30MIN_CMN_20080406_213518 1601.929 1609.599
+CCTV2_ECON30MIN_CMN_20080406_213518 1609.599 1616.022
+CCTV2_ECON30MIN_CMN_20080406_213518 1616.022 1619.678
+CCTV2_ECON30MIN_CMN_20080406_213518 1619.678 1633.052
+CCTV2_ECON30MIN_CMN_20080406_213518 1645.770 1652.644
+CCTV2_ECON30MIN_CMN_20080406_213518 1652.644 1659.737
+CCTV2_ECON30MIN_CMN_20080406_213518 1659.737 1660.381
+CCTV2_ECON30MIN_CMN_20080406_213518 1660.381 1664.506
+CCTV2_ECON30MIN_CMN_20080406_213518 1664.506 1666.521
+CCTV2_ECON30MIN_CMN_20080406_213518 1666.521 1671.523
+CCTV2_ECON30MIN_CMN_20080406_213518 1666.521 1671.523
+CCTV2_ECON30MIN_CMN_20080406_213518 1671.523 1671.914
+CCTV2_ECON30MIN_CMN_20080406_213518 1671.914 1674.270
+CCTV2_ECON30MIN_CMN_20080406_213518 1674.270 1675.489
+CCTV2_ECON30MIN_CMN_20080406_213518 1675.489 1677.853
+CCTV2_ECON30MIN_CMN_20080406_213518 1677.853 1679.915
+CCTV2_ECON30MIN_CMN_20080406_213518 1679.915 1682.118
+CCTV2_ECON30MIN_CMN_20080406_213518 1682.118 1684.204
+CCTV2_ECON30MIN_CMN_20080406_213518 1684.204 1689.958
+CCTV2_ECON30MIN_CMN_20080406_213518 1689.958 1697.192
+CCTV2_ECON30MIN_CMN_20080406_213518 1697.192 1700.601
+CCTV2_ECON30MIN_CMN_20080406_213518 1700.601 1702.054
+CCTV2_ECON30MIN_CMN_20080406_213518 1702.054 1704.428
+CCTV2_ECON30MIN_CMN_20080406_213518 1704.428 1705.630
+CCTV2_ECON30MIN_CMN_20080406_213518 1705.630 1707.255
+CCTV2_ECON30MIN_CMN_20080406_213518 1707.255 1709.552
+CCTV2_ECON30MIN_CMN_20080406_213518 1709.552 1713.215
+CCTV2_ECON30MIN_CMN_20080406_213518 1713.215 1714.262
+CCTV2_ECON30MIN_CMN_20080406_213518 1714.262 1715.965
+CCTV2_ECON30MIN_CMN_20080406_213518 1715.965 1717.345
+CCTV2_ECON30MIN_CMN_20080406_213518 1717.345 1718.954
+CCTV2_ECON30MIN_CMN_20080406_213518 1718.954 1719.673
+CCTV2_ECON30MIN_CMN_20080406_213518 1719.673 1721.314
+CCTV2_ECON30MIN_CMN_20080406_213518 1721.314 1721.892
+CCTV2_ECON30MIN_CMN_20080406_213518 1721.892 1726.377
+CCTV2_ECON30MIN_CMN_20080406_213518 1726.377 1730.487
+CCTV2_ECON30MIN_CMN_20080406_213518 1730.487 1735.972
+CCTV2_ECON30MIN_CMN_20080406_213518 1735.972 1737.754
+CCTV2_ECON30MIN_CMN_20080406_213518 1737.754 1740.692
+CCTV2_ECON30MIN_CMN_20080406_213518 1740.692 1741.880
+CCTV2_ECON30MIN_CMN_20080406_213518 1741.880 1746.364
+CCTV2_ECON30MIN_CMN_20080406_213518 1747.457 1749.332
+CCTV2_ECON30MIN_CMN_20080406_213518 1751.207 1752.570
+CCTV2_ECON30MIN_CMN_20080406_213518 1752.570 1753.305
+CCTV2_ECON30MIN_CMN_20080406_213518 1753.305 1754.116
+CCTV2_ECON30MIN_CMN_20080406_213518 1754.116 1757.837
+CCTV2_ECON30MIN_CMN_20080406_213518 1757.837 1759.275
+CCTV2_ECON30MIN_CMN_20080406_213518 1759.275 1760.275
+CCTV2_ECON30MIN_CMN_20080406_213518 1760.275 1763.806
+CCTV2_ECON30MIN_CMN_20080406_213518 1763.806 1766.695
+CCTV2_ECON30MIN_CMN_20080406_213518 1766.695 1770.343
+CCTV2_ECON30MIN_CMN_20080406_213518 1770.343 1774.680
+CCTV2_ECON30MIN_CMN_20080406_213518 1774.680 1782.383
+CCTV2_ECON30MIN_CMN_20080406_213518 1788.978 1796.969
+CCTV2_ECON30MIN_CMN_20080406_213518 1796.969 1798.631
+CCTV2_ECON30MIN_CMN_20080406_213518 1798.631 1804.966
+CCTV2_ECON30MIN_CMN_20080406_213518 1804.966 1808.600
+CCTV2_ECON30MIN_CMN_20080406_213518 1808.600 1811.156
+CCTV2_ECON30MIN_CMN_20080406_213518 1811.156 1816.373
+CCTV2_ECON30MIN_CMN_20080406_213518 1932.405 1938.811
+CCTV2_ECON30MIN_CMN_20080406_213518 1938.811 1945.761
+CCTV2_ECON30MIN_CMN_20080406_213518 1952.047 1955.407
+CCTV2_ECON30MIN_CMN_20080406_213518 1955.407 1962.562
+CCTV2_NEWSLIST_CMN_20080407_114902 190.964 206.081
+CCTV2_NEWSLIST_CMN_20080407_114902 206.081 231.442
+CCTV2_NEWSLIST_CMN_20080407_114902 293.868 302.892
+CCTV2_NEWSLIST_CMN_20080407_114902 377.067 402.330
+CCTV2_NEWSLIST_CMN_20080407_114902 402.330 428.120
+CCTV2_NEWSLIST_CMN_20080407_114902 444.461 465.699
+CCTV2_NEWSLIST_CMN_20080407_114902 501.102 518.374
+CCTV2_NEWSLIST_CMN_20080407_114902 518.374 533.099
+CCTV2_NEWSLIST_CMN_20080407_114902 603.572 619.857
+CCTV2_NEWSLIST_CMN_20080407_114902 626.844 634.449
+CCTV2_NEWSLIST_CMN_20080407_114902 634.449 639.293
+CCTV2_NEWSLIST_CMN_20080407_114902 639.293 652.670
+CCTV2_NEWSLIST_CMN_20080407_114902 666.215 684.965
+CCTV2_NEWSLIST_CMN_20080407_114902 684.965 704.091
+CCTV2_NEWSLIST_CMN_20080407_114902 704.091 718.755
+CCTV2_NEWSLIST_CMN_20080407_114902 718.755 734.741
+CCTV2_NEWSLIST_CMN_20080407_114902 734.741 744.705
+CCTV2_NEWSLIST_CMN_20080407_114902 744.705 769.831
+CCTV2_NEWSLIST_CMN_20080407_114902 769.831 783.963
+CCTV2_NEWSLIST_CMN_20080407_114902 808.238 814.887
+CCTV2_NEWSLIST_CMN_20080407_114902 824.133 835.744
+CCTV2_NEWSLIST_CMN_20080407_114902 835.744 840.588
+CCTV2_NEWSLIST_CMN_20080407_114902 849.630 867.651
+CCTV2_NEWSLIST_CMN_20080407_114902 867.651 877.591
+CCTV2_NEWSLIST_CMN_20080407_114902 914.744 929.296
+CCTV2_NEWSLIST_CMN_20080407_114902 929.296 940.488
+CCTV2_NEWSLIST_CMN_20080407_114902 955.102 961.114
+CCTV2_NEWSLIST_CMN_20080407_114902 961.114 977.306
+CCTV2_NEWSLIST_CMN_20080407_114902 977.306 993.617
+CCTV2_NEWSLIST_CMN_20080407_114902 993.617 1005.231
+CCTV2_NEWSLIST_CMN_20080407_114902 1005.231 1025.242
+CCTV2_NEWSLIST_CMN_20080407_114902 1025.242 1042.108
+CCTV2_NEWSLIST_CMN_20080407_114902 1042.108 1050.797
+CCTV2_NEWSLIST_CMN_20080407_114902 1050.797 1065.704
+CCTV2_NEWSLIST_CMN_20080407_114902 1065.704 1080.824
+CCTV2_NEWSLIST_CMN_20080407_114902 1080.824 1094.970
+CCTV2_NEWSLIST_CMN_20080407_114902 1101.267 1106.354
+CCTV2_NEWSLIST_CMN_20080407_114902 1138.680 1147.303
+CCTV2_NEWSLIST_CMN_20080407_114902 1169.665 1172.749
+CCTV2_NEWSLIST_CMN_20080407_114902 1199.363 1220.498
+CCTV2_NEWSLIST_CMN_20080407_114902 1220.498 1233.037
+CCTV2_NEWSLIST_CMN_20080407_114902 1233.037 1245.396
+CCTV2_NEWSLIST_CMN_20080407_114902 1259.169 1263.025
+CCTV2_NEWSLIST_CMN_20080407_114902 1315.415 1324.866
+CCTV2_NEWSLIST_CMN_20080407_114902 1324.866 1343.299
+CCTV2_NEWSLIST_CMN_20080407_114902 1356.988 1371.982
+CCTV2_NEWSLIST_CMN_20080407_114902 1402.039 1415.326
+CCTV2_NEWSLIST_CMN_20080407_114902 1415.326 1418.171
+CCTV2_NEWSLIST_CMN_20080407_114902 1418.171 1442.830
+CCTV2_NEWSLIST_CMN_20080407_114902 1483.270 1496.025
+CCTV2_NEWSLIST_CMN_20080407_114902 1504.702 1522.347
+CCTV2_NEWSLIST_CMN_20080407_114902 1522.347 1538.256
+CCTV2_NEWSLIST_CMN_20080407_114902 1570.672 1585.552
+CCTV2_NEWSLIST_CMN_20080407_114902 1629.706 1653.096
+CCTV2_NEWSLIST_CMN_20080407_114902 1653.096 1672.960
+CCTV2_NEWSLIST_CMN_20080407_114902 1672.960 1685.742
+CCTV2_NEWSLIST_CMN_20080407_114902 1711.517 1729.122
+CCTV2_NEWSLIST_CMN_20080407_114902 1729.122 1750.050
+CCTV2_NEWSLIST_CMN_20080407_114902 1790.849 1817.194
+CCTV2_NEWSLIST_CMN_20080407_114902 1850.350 1864.369
+CCTV2_NEWSLIST_CMN_20080407_114902 1902.717 1923.130
+CCTV2_NEWSLIST_CMN_20080407_114902 1979.999 1991.700
+CCTV2_NEWSLIST_CMN_20080407_114902 2008.520 2021.212
+CCTV2_NEWSLIST_CMN_20080407_114902 2057.251 2062.904
+CCTV2_NEWSLIST_CMN_20080407_114902 2090.204 2105.557
+CCTV2_NEWSLIST_CMN_20080407_114902 2105.557 2114.862
+CCTV2_NEWSLIST_CMN_20080407_114902 2114.862 2131.807
+CCTV2_NEWSLIST_CMN_20080407_114902 2131.807 2143.035
+CCTV2_NEWSLIST_CMN_20080407_114902 2168.478 2178.148
+CCTV2_NEWSLIST_CMN_20080407_114902 2267.472 2279.981
+CCTV2_NEWSLIST_CMN_20080407_114902 2310.269 2322.337
+CCTV2_NEWSLIST_CMN_20080407_114902 2337.035 2354.949
+CCTV2_NEWSLIST_CMN_20080407_114902 2354.949 2366.517
+CCTV2_NEWSLIST_CMN_20080416_114902 136.569 157.486
+CCTV2_NEWSLIST_CMN_20080416_114902 157.486 171.941
+CCTV2_NEWSLIST_CMN_20080416_114902 203.415 211.459
+CCTV2_NEWSLIST_CMN_20080416_114902 236.208 252.317
+CCTV2_NEWSLIST_CMN_20080416_114902 285.120 302.659
+CCTV2_NEWSLIST_CMN_20080416_114902 346.830 356.405
+CCTV2_NEWSLIST_CMN_20080416_114902 356.405 374.393
+CCTV2_NEWSLIST_CMN_20080416_114902 395.198 407.444
+CCTV2_NEWSLIST_CMN_20080416_114902 424.983 442.582
+CCTV2_NEWSLIST_CMN_20080416_114902 442.582 453.295
+CCTV2_NEWSLIST_CMN_20080416_114902 561.461 575.647
+CCTV2_NEWSLIST_CMN_20080416_114902 575.647 587.916
+CCTV2_NEWSLIST_CMN_20080416_114902 628.343 639.624
+CCTV2_NEWSLIST_CMN_20080416_114902 704.371 719.994
+CCTV2_NEWSLIST_CMN_20080416_114902 884.158 899.662
+CCTV2_NEWSLIST_CMN_20080416_114902 924.868 938.455
+CCTV2_NEWSLIST_CMN_20080416_114902 938.455 956.416
+CCTV2_NEWSLIST_CMN_20080416_114902 970.836 976.225
+CCTV2_NEWSLIST_CMN_20080416_114902 976.225 980.905
+CCTV2_NEWSLIST_CMN_20080416_114902 1010.567 1024.643
+CCTV2_NEWSLIST_CMN_20080416_114902 1024.643 1042.937
+CCTV2_NEWSLIST_CMN_20080416_114902 1057.868 1075.197
+CCTV2_NEWSLIST_CMN_20080416_114902 1075.197 1090.614
+CCTV2_NEWSLIST_CMN_20080416_114902 1151.810 1164.080
+CCTV2_NEWSLIST_CMN_20080416_114902 1164.080 1167.218
+CCTV2_NEWSLIST_CMN_20080416_114902 1167.218 1188.853
+CCTV2_NEWSLIST_CMN_20080416_114902 1218.219 1231.686
+CCTV2_NEWSLIST_CMN_20080416_114902 1231.686 1252.859
+CCTV2_NEWSLIST_CMN_20080416_114902 1252.859 1256.506
+CCTV2_NEWSLIST_CMN_20080416_114902 1356.073 1368.073
+CCTV2_NEWSLIST_CMN_20080416_114902 1375.798 1394.564
+CCTV2_NEWSLIST_CMN_20080416_114902 1394.564 1413.738
+CCTV2_NEWSLIST_CMN_20080416_114902 1413.738 1425.539
+CCTV2_NEWSLIST_CMN_20080416_114902 1425.539 1428.713
+CCTV2_NEWSLIST_CMN_20080416_114902 1428.713 1447.270
+CCTV2_NEWSLIST_CMN_20080416_114902 1447.270 1464.119
+CCTV2_NEWSLIST_CMN_20080416_114902 1464.119 1492.141
+CCTV2_NEWSLIST_CMN_20080416_114902 1492.141 1503.366
+CCTV2_NEWSLIST_CMN_20080416_114902 1523.247 1545.067
+CCTV2_NEWSLIST_CMN_20080416_114902 1545.067 1557.549
+CCTV2_NEWSLIST_CMN_20080416_114902 1557.549 1578.087
+CCTV2_NEWSLIST_CMN_20080416_114902 1578.087 1606.821
+CCTV2_NEWSLIST_CMN_20080416_114902 1606.821 1628.438
+CCTV2_NEWSLIST_CMN_20080416_114902 1628.438 1639.360
+CCTV2_NEWSLIST_CMN_20080416_114902 1639.360 1642.773
+CCTV2_NEWSLIST_CMN_20080416_114902 1642.773 1661.213
+CCTV2_NEWSLIST_CMN_20080416_114902 1683.387 1697.079
+CCTV2_NEWSLIST_CMN_20080416_114902 1697.079 1714.468
+CCTV2_NEWSLIST_CMN_20080416_114902 1804.873 1816.843
+CCTV2_NEWSLIST_CMN_20080416_114902 1816.843 1827.322
+CCTV2_NEWSLIST_CMN_20080416_114902 1827.322 1842.289
+CCTV2_NEWSLIST_CMN_20080416_114902 1862.795 1876.957
+CCTV2_NEWSLIST_CMN_20080416_114902 1876.957 1885.155
+CCTV2_NEWSLIST_CMN_20080416_114902 1900.104 1912.100
+CCTV2_NEWSLIST_CMN_20080416_114902 1930.933 1937.460
+CCTV2_NEWSLIST_CMN_20080416_114902 1937.460 1948.838
+CCTV2_NEWSLIST_CMN_20080416_114902 1948.838 1964.288
+CCTV2_NEWSLIST_CMN_20080416_114902 1989.252 1999.437
+CCTV2_NEWSLIST_CMN_20080416_114902 1999.437 2012.964
+CCTV2_NEWSLIST_CMN_20080416_114902 2044.868 2060.413
+CCTV2_NEWSLIST_CMN_20080416_114902 2090.254 2100.015
+CCTV2_NEWSLIST_CMN_20080416_114902 2100.015 2116.180
+CCTV2_NEWSLIST_CMN_20080416_114902 2141.813 2153.639
+CCTV2_NEWSLIST_CMN_20080416_114902 2153.639 2166.783
+CCTV2_NEWSLIST_CMN_20080416_114902 2215.999 2229.077
+CCTV2_NEWSLIST_CMN_20080416_114902 2273.986 2283.360
diff --git a/egs/mandarin_bn_bc/s5/local/gigaword_prep_txt.sh b/egs/mandarin_bn_bc/s5/local/gigaword_prep_txt.sh
new file mode 100644
index 00000000000..5359325bd9d
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gigaword_prep_txt.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins Univeersity (author: Jinyi Yang)
+# Apache 2.0
+
+if [ $# != 2 ]; then
+ echo "$0 "
+ exit 0;
+fi
+
+giga_dir=$1
+giga_lang_dir=$2
+
+[ ! -d $giga_lang_dir ] && mkdir -p $giga_lang_dir;
+
+find $giga_dir -name "*.gz" > $giga_lang_dir/giga_trans.flist || exit "Faile to find files"
+
+if [ `wc -l $giga_lang_dir/giga_trans.flist | cut -d " " -f1` == 0 ]; then
+ echo "Empty file list : $giga_lang_dir/giga_trans.flist"
+ exit 1;
+fi
+
+for f in `cat $giga_lang_dir/giga_trans.flist`
+do
+ fname=$(basename "$f" ".gz")
+ gunzip -c $f | \
+ python3 local/gigaword_text_parse.py > $giga_lang_dir/$fname.tmp.txt
+done
+
+cat $giga_lang_dir/*.tmp.txt > $giga_lang_dir/raw.text
+rm $giga_lang_dir/*.tmp.txt
+
+pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'`
+export PYTHONPATH=$PYTHONPATH:`pwd`/tools/mmseg-1.3.0/lib/python${pyver}/site-packages
+if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+ echo "--- Downloading mmseg-1.3.0 ..."
+ echo "NOTE: it assumes that you have Python, Setuptools installed on your system!"
+ wget -P tools http://pypi.python.org/packages/source/m/mmseg/mmseg-1.3.0.tar.gz
+ tar xf tools/mmseg-1.3.0.tar.gz -C tools
+ cd tools/mmseg-1.3.0
+ mkdir -p lib/python${pyver}/site-packages
+ CC=gcc CXX=g++ python setup.py build
+ python setup.py install --prefix=.
+ cd ../..
+ if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+ echo "mmseg is not found - installation failed?"
+ exit 1
+ fi
+fi
+cat $giga_lang_dir/raw.text |\
+ perl local/mandarin_text_normalize.pl |\
+ python local/mandarin_segment.py > $giga_lang_dir/filtered.text
+cat $giga_lang_dir/filtered.text |\
+ python local/mandarin_segment.py > $giga_lang_dir/segmented.text
+mv $giga_lang_dir/segmented.text $giga_lang_dir/text
diff --git a/egs/mandarin_bn_bc/s5/local/gigaword_text_parse.py b/egs/mandarin_bn_bc/s5/local/gigaword_text_parse.py
new file mode 100644
index 00000000000..e2d7ca4bb6b
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/gigaword_text_parse.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env/python
+
+# Apache 2.0
+
+from __future__ import print_function
+import io
+import sys
+
+if __name__ == '__main__':
+ input_stream = io.TextIOWrapper(sys.stdin.buffer,encoding='utf-8')
+ anker = False
+ for line in input_stream.readlines():
+ line = line.strip()
+ if line == "":
+ anker = True
+ continue
+ elif line == "
":
+ anker = False
+ elif anker:
+ print(line)
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_format_lms.sh b/egs/mandarin_bn_bc/s5/local/mandarin_format_lms.sh
new file mode 100755
index 00000000000..728d2180b70
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_format_lms.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Apache 2.0
+
+if [ -f path.sh ]; then
+ . ./path.sh; else
+ echo "missing path.sh"; exit 1;
+fi
+
+if [ $# -ne 3 ]; then
+ echo "Usage: $0 "
+ echo "E.g., $0 data/local/lm/srim.o4g.kn.gz data/lang data/lang_test"
+ exit 1
+fi
+
+arpa_lm=$1
+src_dir=$2
+tgt_dir=$3
+
+
+set -e -o pipefail
+set -x
+
+export LC_ALL=C
+
+#arpa_lm=data/local/gale/train/lm_4gram/srilm.o4g.kn.gz
+
+[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
+
+rm -r $tgt_dir || true
+cp -r $src_dir $tgt_dir
+
+gunzip -c "$arpa_lm" | \
+ arpa2fst --disambig-symbol=#0 \
+ --read-symbol-table=$tgt_dir/words.txt - $tgt_dir/G.fst
+
+
+echo "Checking how stochastic G is (the first of these numbers should be small):"
+fstisstochastic $tgt_dir/G.fst || true
+
+## Check lexicon.
+## just have a look and make sure it seems sane.
+echo "First few lines of lexicon FST:"
+(
+ fstprint --isymbols=$src_dir/phones.txt --osymbols=$src_dir/words.txt $src_dir/L.fst | head
+) || true
+echo Performing further checks
+
+# Checking that G.fst is determinizable.
+fstdeterminize $tgt_dir/G.fst /dev/null || {
+ echo Error determinizing G.
+ exit 1
+}
+
+# Checking that L_disambig.fst is determinizable.
+fstdeterminize $tgt_dir/L_disambig.fst /dev/null || echo Error determinizing L.
+
+# Checking that disambiguated lexicon times G is determinizable
+# Note: we do this with fstdeterminizestar not fstdeterminize, as
+# fstdeterminize was taking forever (presumbaly relates to a bug
+# in this version of OpenFst that makes determinization slow for
+# some case).
+fsttablecompose $tgt_dir/L_disambig.fst $tgt_dir/G.fst | \
+ fstdeterminizestar >/dev/null || echo Error
+
+# Checking that LG is stochastic:
+fsttablecompose $tgt_dir/L_disambig.fst $tgt_dir/G.fst | \
+ fstisstochastic || echo LG is not stochastic
+
+echo "LM preparation succeeded."
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_merge_dict.sh b/egs/mandarin_bn_bc/s5/local/mandarin_merge_dict.sh
new file mode 100755
index 00000000000..18ddc6d03a8
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_merge_dict.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (author: Jinyi Yang)
+# Apache 2.0
+
+# This script merges the gale-tdt lexicon dicrectory with gigaword (simplified Mandarin)
+# lexicon directory. It requires the lexiconp.txt file in both directories
+# since the probabilities in lexiconp.txt may be re-estimated.
+
+if [ $# -ne 3 ];then
+ echo "Usage: $0 "
+ echo "E.g., $0 data/local/dict_gale_tdt data/local/dict_giga data/local/dict_merged"
+ exit 1
+fi
+
+lex_dir_1=$1
+lex_dir_2=$2
+tgt_lex_dir=$3
+
+mkdir -p $tgt_lex_dir
+
+for f in silence_phones.txt nonsilence_phones.txt lexiconp.txt extra_questions.txt;do
+ [ ! -f $lex_dir_1/$f ] && echo "$0: no such file $lex_dir_1/$f" && exit 1;
+ [ ! -f $lex_dir_2/$f ] && echo "$0: no such file $lex_dir_2/$f" && exit 1;
+ # We copy the phone related files from gale dictionary directory, since they
+ # are the same phone sets as GIGA words.
+ cp $lex_dir_1/$f $tgt_lex_dir
+done
+
+mv $tgt_lex_dir/lexiconp.txt $tgt_lex_dir/lexiconp_1.txt
+
+
+awk 'NR==FNR{a[$1];next}{if (!($1 in a)) print $0}' $tgt_lex_dir/lexiconp_1.txt \
+ $lex_dir_2/lexiconp.txt > $tgt_lex_dir/lexiconp_2.txt
+cat $tgt_lex_dir/lexiconp_1.txt $tgt_lex_dir/lexiconp_2.txt | sort > $tgt_lex_dir/lexiconp.txt
+
+
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_mix_lm.sh b/egs/mandarin_bn_bc/s5/local/mandarin_mix_lm.sh
new file mode 100755
index 00000000000..a5bcf1dc8e0
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_mix_lm.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (author: Jinyi Yang)
+# Apache 2.0
+
+# This script interpolates two language models.
+
+ngram_order=4
+oov_sym=""
+prune_thres=1e-9
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+ echo "Usage: [--ngram-order] [--oov-sym] [--prune-thres] "
+ echo "E.g. $0 --ngram-order 4 --oov-sym \"\" --prune-thres \"1e-9\" \
+ data/local/lm_gale data/local/lm_giga data/local/lm_mix data/dev "
+ exit 1;
+fi
+lm_dir_1=$1
+lm_dir_2=$2
+lm_mix_dir=$3
+heldout=$4/text
+
+mkdir -p $lm_mix_dir || exit 1;
+if [ ! -f $lm_dir_mix/srilm.o${ngram_order}g.kn.gz ]; then
+ for d in $lm_dir_1 $lm_dir_2; do
+ ngram -debug 2 -order $ngram_order -unk -lm $d/srilm.o${ngram_order}g.kn.gz \
+ -ppl $heldout > $d/ppl ;
+ done
+ compute-best-mix $lm_dir_1/ppl $lm_dir_2/ppl > $lm_mix_dir/best-mix.ppl
+ lambdas=$(grep -o '(.*)' $lm_mix_dir/best-mix.ppl | head -1)
+ lambdas=${lambdas%%)}
+ lambdas=${lambdas##(}
+ lambda1=`echo $lambdas | cut -d " " -f1`
+ lambda2=`echo $lambdas | cut -d " " -f2`
+ ngram_opts="$lm_dir_1/srilm.o${ngram_order}g.kn.gz -weight $lambda1 -order \
+ $ngram_order\n$lm_dir_2/srilm.o${ngram_order}g.kn.gz -weight $lambda2 -order $ngram_order"
+ echo -e ${ngram_opts} > $lm_mix_dir/ngram_opts
+ ngram -order $ngram_order \
+ -unk -map-unk $oov_sym \
+ -prune $prune_thres \
+ -read-mix-lms -lm $lm_mix_dir/ngram_opts \
+ -write-lm $lm_mix_dir/srilm.o${ngram_order}g.kn.gz
+ ngram -debug 2 -order $ngram_order -unk \
+ -lm $lm_mix_dir/srilm.o${ngram_order}g.kn.gz \
+ -ppl $heldout > $lm_mix_dir/lm.ppl
+fi
+echo "LM interpolation done"
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_prepare_dict.sh b/egs/mandarin_bn_bc/s5/local/mandarin_prepare_dict.sh
new file mode 100755
index 00000000000..73301f709b0
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_prepare_dict.sh
@@ -0,0 +1,352 @@
+#!/usr/bin/env bash
+
+# prepare dictionary for Mandarin-ASR
+# it is done for English and Chinese separately,
+# For English, we use CMU dictionary, and Sequitur G2P
+# for OOVs, while all englist phone set will concert to Chinese
+# phone set at the end. For Chinese, we use an online dictionary,
+# for OOV, we just produce pronunciation using Charactrt Mapping.
+
+extra_text=
+
+. path.sh
+. utils/parse_options.sh || exit 1;
+
+set -e -o pipefail
+if [ $# != 2 ]; then
+ echo "Usage: local/prepare_dict.sh [--extra-text /path/to/extra-text] "
+ echo "E.g., $0: --extra-text data/local/gigaword/filtered_text data/local/dict data/local/train"
+ exit 1;
+fi
+
+dict_dir=$1
+src_dir=$2
+
+mkdir -p $dict_dir/lexicon-{en,ch}
+
+# extract full vocabulary from train text
+cat $src_dir/text |\
+ awk '{for (i = 2; i <= NF; i++) print $i}' |\
+ perl -ape 's/ /\n/g;' | sort -u | \
+ grep -v '\[LAUGHTER\]' | \
+ grep -v '\[NOISE\]' |\
+ grep -v '\[VOCALIZEDNOISE\]' |\
+ grep -v '\[VOCALIZED-NOISE\]' \
+ > $dict_dir/words_train.txt
+
+# extract vocabulary from extra text
+if [ ! -z $extra_text ];then
+ echo "Using extra text for LM training, add these words for lexicon: $extra_text"
+ cp $extra_text $dict_dir/lm_extra_text_filtered || exit 1;
+ awk '{for (i=1; i <= NF; i++) print $i}' $dict_dir/lm_extra_text_filtered | sort -u |\
+ grep -v '\[LAUGHTER\]' |\
+ grep -v '\[NOISE\]' |\
+ grep -v '\[VOCALIZEDNOISE\]' |\
+ grep -v '\[VOCALIZED-NOISE\]' |\
+ sed -e 's/((\([^)]\{0,\}\)))/\1/g;' |\
+ perl -ape 's/ /\n/g;' | awk 'NF>0' > $dict_dir/extra_words.txt
+ cat $dict_dir/words_train.txt $dict_dir/extra_words.txt | sort -u > $dict_dir/words.txt
+else
+ cp $dict_dir/words_train.txt $dict_dir/words.txt
+fi
+
+# split into English and Chinese
+cat $dict_dir/words.txt | grep '[a-zA-Z]' > $dict_dir/lexicon-en/words-en.txt || exit 1;
+cat $dict_dir/words.txt | grep -v '[a-zA-Z]' > $dict_dir/lexicon-ch/words-ch.txt || exit 1;
+
+
+##### produce pronunciations for english
+if [ ! -f $dict_dir/cmudict/cmudict.0.7a ]; then
+ echo "--- Downloading CMU dictionary ..."
+ svn co -r 13068 https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict \
+ $dict_dir/cmudict || exit 1;
+fi
+
+# format cmudict
+echo "--- Striping stress and pronunciation variant markers from cmudict ..."
+perl $dict_dir/cmudict/scripts/make_baseform.pl \
+ $dict_dir/cmudict/cmudict.0.7a /dev/stdout |\
+ sed -e 's:^\([^\s(]\+\)([0-9]\+)\(\s\+\)\(.*\):\1\2\3:' > $dict_dir/cmudict/cmudict-plain.txt || exit 1;
+
+# extract in-vocab lexicon and oov words
+echo "--- Searching for English OOV words ..."
+awk 'NR==FNR{words[$1]; next;} !($1 in words)' \
+ $dict_dir/cmudict/cmudict-plain.txt $dict_dir/lexicon-en/words-en.txt |\
+ egrep -v '<.?s>' > $dict_dir/lexicon-en/words-en-oov.txt || exit 1;
+
+awk 'NR==FNR{words[$1]; next;} ($1 in words)' \
+ $dict_dir/lexicon-en/words-en.txt $dict_dir/cmudict/cmudict-plain.txt |\
+ egrep -v '<.?s>' > $dict_dir/lexicon-en/lexicon-en-iv.txt || exit 1;
+
+wc -l $dict_dir/lexicon-en/words-en-oov.txt
+wc -l $dict_dir/lexicon-en/lexicon-en-iv.txt
+
+# setup g2p and generate oov lexicon
+if [ ! -f conf/g2p_model ]; then
+ echo "--- Downloading a pre-trained Sequitur G2P model ..."
+ wget http://sourceforge.net/projects/kaldi/files/sequitur-model4 -O conf/g2p_model
+ if [ ! -f conf/g2p_model ]; then
+ echo "Failed to download the g2p model!"
+ exit 1
+ fi
+fi
+
+echo "--- Preparing pronunciations for OOV words ..."
+g2p=`which g2p.py`
+if [ ! -x $g2p ]; then
+ echo "g2p.py is not found. Checkout tools/extras/install_sequitur.sh."
+ exit 1
+fi
+g2p.py --model=conf/g2p_model --apply $dict_dir/lexicon-en/words-en-oov.txt \
+ > $dict_dir/lexicon-en/lexicon-en-oov.txt || exit 1;
+
+# merge in-vocab and oov lexicon
+cat $dict_dir/lexicon-en/lexicon-en-oov.txt $dict_dir/lexicon-en/lexicon-en-iv.txt |\
+ sort > $dict_dir/lexicon-en/lexicon-en-phn.txt || exit 1;
+
+# convert cmu phoneme to pinyin phonenme
+mkdir -p $dict_dir/map
+cat conf/cmu2pinyin | awk '{print $1;}' | sort -u > $dict_dir/map/cmu || exit 1;
+cat conf/pinyin2cmu | awk -v cmu=$dict_dir/map/cmu \
+ 'BEGIN{while((getline $dict_dir/map/cmu-used || exit 1;
+cat $dict_dir/map/cmu | awk -v cmu=$dict_dir/map/cmu-used \
+ 'BEGIN{while((getline $dict_dir/map/cmu-not-used || exit 1;
+
+awk 'NR==FNR{words[$1]; next;} ($1 in words)' \
+ $dict_dir/map/cmu-not-used conf/cmu2pinyin |\
+ egrep -v '<.?s>' > $dict_dir/map/cmu-py || exit 1;
+
+cat $dict_dir/map/cmu-py | \
+ perl -e '
+ open(MAPS, $ARGV[0]) or die("could not open map file");
+ my %py2ph;
+ foreach $line () {
+ @A = split(" ", $line);
+ $py = shift(@A);
+ $py2ph{$py} = [@A];
+ }
+ my @entry;
+ while () {
+ @A = split(" ", $_);
+ @entry = ();
+ $W = shift(@A);
+ push(@entry, $W);
+ for($i = 0; $i < @A; $i++) { push(@entry, @{$py2ph{$A[$i]}}); }
+ print "@entry";
+ print "\n";
+ }
+' conf/pinyin2cmu > $dict_dir/map/cmu-cmu || exit 1;
+
+cat $dict_dir/lexicon-en/lexicon-en-phn.txt | \
+ perl -e '
+ open(MAPS, $ARGV[0]) or die("could not open map file");
+ my %py2ph;
+ foreach $line () {
+ @A = split(" ", $line);
+ $py = shift(@A);
+ $py2ph{$py} = [@A];
+ }
+ my @entry;
+ while () {
+ @A = split(" ", $_);
+ @entry = ();
+ $W = shift(@A);
+ push(@entry, $W);
+ for($i = 0; $i < @A; $i++) {
+ if (exists $py2ph{$A[$i]}) { push(@entry, @{$py2ph{$A[$i]}}); }
+ else {push(@entry, $A[$i])};
+ }
+ print "@entry";
+ print "\n";
+ }
+' $dict_dir/map/cmu-cmu > $dict_dir/lexicon-en/lexicon-en.txt || exit 1;
+
+
+##### produce pronunciations for chinese
+if [ ! -f $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt ]; then
+ echo "------------- Downloading cedit dictionary ---------------"
+ mkdir -p $dict_dir/cedict
+ wget -P $dict_dir/cedict http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz
+ gunzip $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz
+fi
+
+cat $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt | grep -v '#' | awk -F '/' '{print $1}' |\
+ perl -e '
+ while () {
+ @A = split(" ", $_);
+ print $A[1];
+ for($n = 2; $n < @A; $n++) {
+ $A[$n] =~ s:\[?([a-zA-Z0-9\:]+)\]?:$1:;
+ $tmp = uc($A[$n]);
+ print " $tmp";
+ }
+ print "\n";
+ }
+ ' | sort -k1 > $dict_dir/cedict/ch-dict.txt || exit 1;
+
+echo "--- Searching for Chinese OOV words ..."
+awk 'NR==FNR{words[$1]; next;} !($1 in words)' \
+ $dict_dir/cedict/ch-dict.txt $dict_dir/lexicon-ch/words-ch.txt |\
+ egrep -v '<.?s>' > $dict_dir/lexicon-ch/words-ch-oov.txt || exit 1;
+
+awk 'NR==FNR{words[$1]; next;} ($1 in words)' \
+ $dict_dir/lexicon-ch/words-ch.txt $dict_dir/cedict/ch-dict.txt |\
+ egrep -v '<.?s>' > $dict_dir/lexicon-ch/lexicon-ch-iv.txt || exit 1;
+
+wc -l $dict_dir/lexicon-ch/words-ch-oov.txt
+wc -l $dict_dir/lexicon-ch/lexicon-ch-iv.txt
+
+
+# validate Chinese dictionary and compose a char-based
+# dictionary in order to get OOV pronunciations
+cat $dict_dir/cedict/ch-dict.txt |\
+ perl -e '
+ use utf8;
+ binmode(STDIN,":encoding(utf8)");
+ binmode(STDOUT,":encoding(utf8)");
+ while () {
+ @A = split(" ", $_);
+ $word_len = length($A[0]);
+ $proun_len = @A - 1 ;
+ if ($word_len == $proun_len) {print $_;}
+ }
+ ' > $dict_dir/cedict/ch-dict-1.txt || exit 1;
+
+# extract chars
+cat $dict_dir/cedict/ch-dict-1.txt | awk '{print $1}' |\
+ perl -e '
+ use utf8;
+ binmode(STDIN,":encoding(utf8)");
+ binmode(STDOUT,":encoding(utf8)");
+ while () {
+ @A = split(" ", $_);
+ @chars = split("", $A[0]);
+ foreach (@chars) {
+ print "$_\n";
+ }
+ }
+ ' | grep -v '^$' > $dict_dir/lexicon-ch/ch-char.txt || exit 1;
+
+# extract individual pinyins
+cat $dict_dir/cedict/ch-dict-1.txt |\
+ awk '{for(i=2; i<=NF; i++) print $i}' |\
+ perl -ape 's/ /\n/g;' > $dict_dir/lexicon-ch/ch-char-pinyin.txt || exit 1;
+
+# first make sure number of characters and pinyins
+# are equal, so that a char-based dictionary can
+# be composed.
+nchars=`wc -l < $dict_dir/lexicon-ch/ch-char.txt`
+npinyin=`wc -l < $dict_dir/lexicon-ch/ch-char-pinyin.txt`
+if [ $nchars -ne $npinyin ]; then
+ echo "Found $nchars chars and $npinyin pinyin. Please check!"
+ exit 1
+fi
+
+paste $dict_dir/lexicon-ch/ch-char.txt $dict_dir/lexicon-ch/ch-char-pinyin.txt > $dict_dir/lexicon-ch/ch-char-dict.tmp.txt || exit 1;
+
+# Add "4" and "7" to the character-pinyi pair since they are missing in the
+# mdbg dictionary.
+(echo '4 SI4'; echo '7 QI1'; ) | \
+ cat - $dict_dir/lexicon-ch/ch-char-dict.tmp.txt | sort -u > $dict_dir/lexicon-ch/ch-char-dict.txt || exit 1;
+
+rm $dict_dir/lexicon-ch/ch-char-dict.tmp.txt
+
+# create a multiple pronunciation dictionary
+cat $dict_dir/lexicon-ch/ch-char-dict.txt |\
+ perl -e '
+ my $prev = "";
+ my $out_line = "";
+ while () {
+ @A = split(" ", $_);
+ $cur = $A[0];
+ $cur_py = $A[1];
+ #print length($prev);
+ if (length($prev) == 0) { $out_line = $_; chomp($out_line);}
+ if (length($prev)>0 && $cur ne $prev) { print $out_line; print "\n"; $out_line = $_; chomp($out_line);}
+ if (length($prev)>0 && $cur eq $prev) { $out_line = $out_line."/"."$cur_py";}
+ $prev = $cur;
+ }
+ print $out_line;
+ ' > $dict_dir/lexicon-ch/ch-char-dict-mp.txt || exit 1;
+
+# get lexicon for Chinese OOV words
+local/create_oov_char_lexicon.pl $dict_dir/lexicon-ch/ch-char-dict-mp.txt \
+ $dict_dir/lexicon-ch/words-ch-oov.txt > $dict_dir/lexicon-ch/lexicon-ch-oov.txt || exit 1;
+
+# seperate multiple prons for Chinese OOV lexicon
+cat $dict_dir/lexicon-ch/lexicon-ch-oov.txt |\
+ perl -e '
+ my @entry;
+ my @entry1;
+ while () {
+ @A = split(" ", $_);
+ @entry = ();
+ push(@entry, $A[0]);
+ for($i = 1; $i < @A; $i++ ) {
+ @py = split("/", $A[$i]);
+ @entry1 = @entry;
+ @entry = ();
+ for ($j = 0; $j < @entry1; $j++) {
+ for ($k = 0; $k < @py; $k++) {
+ $tmp = $entry1[$j]." ".$py[$k];
+ push(@entry, $tmp);
+ }
+ }
+ }
+ for ($i = 0; $i < @entry; $i++) {
+ print $entry[$i];
+ print "\n";
+ }
+ }
+ ' > $dict_dir/lexicon-ch/lexicon-ch-oov-mp.txt || exit 1;
+
+# compose IV and OOV lexicons for Chinese
+cat $dict_dir/lexicon-ch/lexicon-ch-oov-mp.txt $dict_dir/lexicon-ch/lexicon-ch-iv.txt |\
+ awk '{if (NF > 1 && $2 ~ /[A-Za-z0-9]+/) print $0;}' > $dict_dir/lexicon-ch/lexicon-ch.txt || exit 1;
+
+# convert Chinese pinyin to CMU format
+cat $dict_dir/lexicon-ch/lexicon-ch.txt | sed -e 's/U:/V/g' | sed -e 's/ R\([0-9]\)/ ER\1/g' | grep -v 'M2' |\
+ utils/pinyin_map.pl conf/pinyin2cmu > $dict_dir/lexicon-ch/lexicon-ch-cmu.txt || exit 1;
+
+# combine English and Chinese lexicons
+cat $dict_dir/lexicon-en/lexicon-en.txt $dict_dir/lexicon-ch/lexicon-ch-cmu.txt | awk 'NF>1' |\
+ sort -u > $dict_dir/lexicon1.txt || exit 1;
+
+cat $dict_dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}'| \
+ sort -u |\
+ perl -e '
+ my %ph_cl;
+ while () {
+ $phone = $_;
+ chomp($phone);
+ chomp($_);
+ $phone =~ s:([A-Z]+)[0-9]:$1:;
+ if (exists $ph_cl{$phone}) { push(@{$ph_cl{$phone}}, $_) }
+ else { $ph_cl{$phone} = [$_]; }
+ }
+ foreach $key ( keys %ph_cl ) {
+ print "@{ $ph_cl{$key} }\n"
+ }
+ ' | sort -k1 > $dict_dir/nonsilence_phones.txt || exit 1;
+
+( echo SIL; echo SPN; echo NSN; echo LAU ) > $dict_dir/silence_phones.txt
+
+echo SIL > $dict_dir/optional_silence.txt
+
+# No "extra questions" in the input to this setup, as we don't
+# have stress or tone
+
+cat $dict_dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > $dict_dir/extra_questions.txt || exit 1;
+cat $dict_dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) {
+ $p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
+ >> $dict_dir/extra_questions.txt || exit 1;
+
+# Add to the lexicon the silences, noises etc.
+(echo '!SIL SIL'; echo '[VOCALIZED-NOISE] SPN'; echo '[VOCALIZEDNOISE] SPN';echo '[NOISE] NSN'; echo '[LAUGHTER] LAU';
+ echo ' SPN' ) | \
+ cat - $dict_dir/lexicon1.txt > $dict_dir/lexicon.txt || exit 1;
+
+echo "$0: Mandarin dict preparation succeeded"
+exit 0;
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_prepare_lm.sh b/egs/mandarin_bn_bc/s5/local/mandarin_prepare_lm.sh
new file mode 100755
index 00000000000..f9de16d78fb
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_prepare_lm.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+ngram_order=4
+oov_sym=""
+no_uttid="false"
+prune_thres=1e-9
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+ echo "Usage: [--no-uttid] [--ngram-order] [--oov-sym] [--prune-thres] "
+ echo "E.g. $0 --no-uttid "true" --ngram-order 4 --oov-sym \"\" --prune-thres "1e-9" data/local/dict data/local/train data/local/lm data/dev "
+ exit 1;
+fi
+
+dict_dir=$1
+local_text_dir=$2
+lm_dir=$3
+heldout=$4/text
+
+# check if sri is installed or no
+which ngram-count &>/dev/null
+if [[ $? == 0 ]]; then
+ echo "srilm installed"
+else
+ echo "Please install srilm first !"
+ exit 1
+fi
+echo "Building $ngram_order gram LM"
+[ ! -d $lm_dir ] && mkdir -p $lm_dir exit 1;
+
+if [ ! -f $lm_dir/${ngram_order}gram-mincount/lm_pruned.gz ]; then
+ echo "Training LM with train text"
+ [ ! -f $local_text_dir/text ] && echo "No $local_text_dir/text" && exit 1;
+
+ # If the first column of $local_text_dir/text is uttid, we need to remove
+ # them.
+ if [ $no_uttid == "false" ]; then
+ awk '{i=$2;for (n=3;n<=NF;++n){i=i" "$n;}print i}' $local_text_dir/text > $lm_dir/text
+ else
+ cp $local_text_dir/text $lm_dir/text
+ fi
+ local/train_lms.sh --ngram-order $ngram_order --prune-thres $prune_thres $lm_dir $dict_dir $lm_dir $heldout
+fi
+
+
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_segment.py b/egs/mandarin_bn_bc/s5/local/mandarin_segment.py
new file mode 100755
index 00000000000..31fe135cf91
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_segment.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+#coding:utf-8
+#!/usr/bin/env python
+from __future__ import print_function
+import sys
+from mmseg import seg_txt
+for line in sys.stdin:
+ blks = str.split(line)
+ out_line = ""
+ for i in range(0, len(blks)):
+ if blks[i] == "[VOCALIZED-NOISE]" or blks[i] == "[NOISE]" or blks[i] == "[LAUGHTER]":
+ out_line += " " + blks[i]
+ continue
+ for j in seg_txt(blks[i]):
+ out_line += " " + j
+ print(out_line)
diff --git a/egs/mandarin_bn_bc/s5/local/mandarin_text_normalize.pl b/egs/mandarin_bn_bc/s5/local/mandarin_text_normalize.pl
new file mode 100644
index 00000000000..d587ce0f074
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/mandarin_text_normalize.pl
@@ -0,0 +1,195 @@
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
+
+# This script removes special symbols from the transcriptions.
+
+while () {
+ @A = split(" ", $_);
+ for ($n = 0; $n < @A; $n++) {
+ $a = $A[$n];
+ $tmp = $a;
+ $tmp =~ s:A:A:g;
+ $tmp =~ s:B:B:g;
+ $tmp =~ s:K:K:g;
+ $tmp =~ s:D:D:g;
+ $tmp =~ s:N:N:g;
+ $tmp =~ s:W:W:g;
+ $tmp =~ s:G:G:g;
+ $tmp =~ s:S:S:g;
+ $tmp =~ s:T:T:g;
+ $tmp =~ s:V:V:g;
+ $tmp =~ s:%::g;
+ $tmp =~ s:Ⅱ::g;
+ $tmp =~ s:+::g;
+ $tmp =~ s:-::g;
+ $tmp =~ s:.::g;
+ $tmp =~ s:0:0:g;
+ $tmp =~ s:1:1:g;
+ $tmp =~ s:2:2:g;
+ $tmp =~ s:3:3:g;
+ $tmp =~ s:4:4:g;
+ $tmp =~ s:5:5:g;
+ $tmp =~ s:6:6:g;
+ $tmp =~ s:7:7:g;
+ $tmp =~ s:8:8:g;
+ $tmp =~ s:9:9:g;
+ $tmp =~ s:;::g;
+ $tmp =~ s:<::g;
+ $tmp =~ s:>::g;
+ $tmp =~ s: ::g;
+ $tmp =~ s:、::g;
+ $tmp =~ s:】::g;
+ $tmp =~ s:·::g;
+ $tmp =~ s:〉::g;
+ $tmp =~ s:〈::g;
+ $tmp =~ s:《::g;
+ $tmp =~ s:》::g;
+ $tmp =~ s:"::g;
+ $tmp =~ s:‘::g;
+ $tmp =~ s:’::g;
+ $tmp =~ s:“::g;
+ $tmp =~ s:”::g;
+ $tmp =~ s::::g;
+ $tmp =~ s:(::g;
+ $tmp =~ s:)::g;
+ $tmp =~ s:…::g;
+ $tmp =~ s:!::g;
+ $tmp =~ s:\?::g;
+ $tmp =~ s:-::g;
+ $tmp =~ s:@::g;
+ $tmp =~ s:‰::g;
+ $tmp =~ s:—::g;
+ $tmp =~ s:○::g;
+ $tmp =~ s:,::g;
+ $tmp =~ s:・::g;
+ $tmp =~ s:;::g;
+ $tmp =~ s:\:::g;
+ $tmp =~ s:\(::g;
+ $tmp =~ s:\)::g;
+ $tmp =~ s:□::g;
+ $tmp =~ s: ::g;
+ $tmp =~ s:"::g;
+ $tmp =~ s:#::g;
+ $tmp =~ s:*::g;
+ $tmp =~ s:/::g;
+ $tmp =~ s:E::g;
+ $tmp =~ s:H::g;
+ $tmp =~ s:M::g;
+ $tmp =~ s:X::g;
+ $tmp =~ s:[::g;
+ $tmp =~ s:]::g;
+ $tmp =~ s:~::g;
+ $tmp =~ s: ̄::g;
+ $tmp =~ s:¥::g;
+ $tmp =~ s:?::g;
+ $tmp =~ s:。::g;
+ $tmp =~ s:!::g;
+ $tmp =~ s:,::g;
+ $tmp =~ s:§::g;
+ $tmp =~ s:¨::g;
+ $tmp =~ s:°::g;
+ $tmp =~ s:±::g;
+ $tmp =~ s:×::g;
+ $tmp =~ s:÷::g;
+ $tmp =~ s:ā::g;
+ $tmp =~ s:ǎ::g;
+ $tmp =~ s:ˉ::g;
+ $tmp =~ s:Ι::g;
+ $tmp =~ s:Υ::g;
+ $tmp =~ s:Φ::g;
+ $tmp =~ s:Χ::g;
+ $tmp =~ s:α::g;
+ $tmp =~ s:β::g;
+ $tmp =~ s:γ::g;
+ $tmp =~ s:ε::g;
+ $tmp =~ s:μ::g;
+ $tmp =~ s:π::g;
+ $tmp =~ s:ρ::g;
+ $tmp =~ s:τ::g;
+ $tmp =~ s:φ::g;
+ $tmp =~ s:χ::g;
+ $tmp =~ s:ψ::g;
+ $tmp =~ s:ω::g;
+ $tmp =~ s:А::g;
+ $tmp =~ s:Б::g;
+ $tmp =~ s:В::g;
+ $tmp =~ s:Г::g;
+ $tmp =~ s:Ж::g;
+ $tmp =~ s:З::g;
+ $tmp =~ s:И::g;
+ $tmp =~ s:Л::g;
+ $tmp =~ s:М::g;
+ $tmp =~ s:Н::g;
+ $tmp =~ s:О::g;
+ $tmp =~ s:П::g;
+ $tmp =~ s:С::g;
+ $tmp =~ s:Ш::g;
+ $tmp =~ s:Э::g;
+ $tmp =~ s:а::g;
+ $tmp =~ s:―::g;
+ $tmp =~ s:′::g;
+ $tmp =~ s:″::g;
+ $tmp =~ s:※::g;
+ $tmp =~ s:℃::g;
+ $tmp =~ s:Ⅰ::g;
+ $tmp =~ s:Ⅲ::g;
+ $tmp =~ s:Ⅳ::g;
+ $tmp =~ s:Ⅴ::g;
+ $tmp =~ s:Ⅵ::g;
+ $tmp =~ s:Ⅶ::g;
+ $tmp =~ s:Ⅷ::g;
+ $tmp =~ s:Ⅸ::g;
+ $tmp =~ s:Ⅹ::g;
+ $tmp =~ s:→::g;
+ $tmp =~ s:∏::g;
+ $tmp =~ s:√::g;
+ $tmp =~ s:∮::g;
+ $tmp =~ s:∶::g;
+ $tmp =~ s:≈::g;
+ $tmp =~ s:≤::g;
+ $tmp =~ s:≥::g;
+ $tmp =~ s:⊥::g;
+ $tmp =~ s:⌒::g;
+ $tmp =~ s:①::g;
+ $tmp =~ s:②::g;
+ $tmp =~ s:③::g;
+ $tmp =~ s:④::g;
+ $tmp =~ s:⑤::g;
+ $tmp =~ s:⑥::g;
+ $tmp =~ s:⑦::g;
+ $tmp =~ s:⑧::g;
+ $tmp =~ s:⑨::g;
+ $tmp =~ s:⑩::g;
+ $tmp =~ s:⑴::g;
+ $tmp =~ s:⒈::g;
+ $tmp =~ s:⒉::g;
+ $tmp =~ s:⒒::g;
+ $tmp =~ s:─::g;
+ $tmp =~ s:━::g;
+ $tmp =~ s:│::g;
+ $tmp =~ s:┄::g;
+ $tmp =~ s:┅::g;
+ $tmp =~ s:┘::g;
+ $tmp =~ s:┼::g;
+ $tmp =~ s:╃::g;
+ $tmp =~ s:■::g;
+ $tmp =~ s:△::g;
+ $tmp =~ s:◆::g;
+ $tmp =~ s:●::g;
+ $tmp =~ s:☆::g;
+ $tmp =~ s:〃::g;
+ $tmp =~ s:「::g;
+ $tmp =~ s:」::g;
+ $tmp =~ s:【::g;
+ $tmp =~ s:〓::g;
+ $tmp =~ s:〔::g;
+ $tmp =~ s:〕::g;
+ $tmp =~ s:〖::g;
+ $tmp =~ s:〗::g;
+ $tmp =~ s:ぃ::g;
+ if ($tmp =~ /[^.]{0,}\.+/) {$tmp =~ s:\.:点:g;}
+ if ($tmp =~ /[a-zA-Z]/) {$tmp=uc($tmp);}
+ print "$tmp ";
+ }
+ print "\n";
+}
diff --git a/egs/mandarin_bn_bc/s5/local/nnet3/run_ivector_common.sh b/egs/mandarin_bn_bc/s5/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..643167a4bae
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,163 @@
+#!/usr/bin/env bash
+
+set -e -o pipefail
+
+# This script is called from scripts like local/nnet3/run_tdnn.sh and
+# local/chain/run_tdnn.sh (and may eventually be called by more scripts). It
+# contains the common feature preparation and iVector-related parts of the
+# script. See those scripts for examples of usage.
+
+
+stage=0
+nj=80
+train_set=train_cleanup # you might set this to e.g. train.
+affix="_cleanup"
+gmm=tri6b_cleanup # This specifies a GMM-dir from the features of the type you're training the system on;
+ # it should contain alignments for 'train_set'.
+lang=data/lang_large_test
+num_threads_ubm=32
+num_processes=4
+nnet3_affix="_cleanup" # affix for exp/nnet3 directory to put iVector stuff
+ali_dir=exp/${gmm}_sp_ali
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+for f in data/${train_set}/feats.scp $gmm/final.mdl; do
+ if [ ! -f $f ]; then
+ echo "$0: expected file $f to exist"
+ exit 1
+ fi
+done
+
+if [ $stage -le 1 ]; then
+ mfccdir=mfcc_sp
+ #Although the nnet will be trained by high resolution data, we still have to
+ # perturb the normal data to get the alignment. _sp stands for speed-perturbed
+ echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+ utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+ echo "$0: making MFCC features for low-resolution speed-perturbed data"
+ steps/make_mfcc_pitch_online.sh --nj $nj --cmd "$train_cmd" \
+ data/${train_set}_sp exp/make_sp/${train_set}_sp $mfccdir || exit 1
+ steps/compute_cmvn_stats.sh data/${train_set}_sp exp/make_sp/${train_set}_sp $mfccdir
+ echo "$0: fixing input data-dir to remove nonexistent features, in case some "
+ echo ".. speed-perturbed segments were too short."
+ utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+ if [ -f $ali_dir/ali.1.gz ]; then
+ echo "$0: alignments in $ali_dir appear to already exist. Please either remove them "
+ echo " ... or use a later --stage option."
+ exit 1
+ fi
+ echo "$0: aligning with the perturbed low-resolution data"
+ steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+ data/${train_set}_sp $lang $gmm $ali_dir || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+ echo "$0: creating high-resolution MFCC features"
+
+ # this shows how you can split across multiple file-systems. we'll split the
+ # MFCC dir across multiple locations. You might want to be careful here, if you
+ # have multiple copies of Kaldi checked out and run the same recipe, not to let
+ # them overwrite each other.
+ mfccdir=mfcc_hires_sp
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/mandarin-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+ fi
+
+ for datadir in ${train_set}_sp dev eval; do
+ utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+ done
+
+ # do volume-perturbation on the training data prior to extracting hires
+ # features; this helps make trained nnets more invariant to test data volume.
+ utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires
+
+ for datadir in ${train_set}_sp_hires dev_hires eval_hires; do
+ steps/make_mfcc_pitch_online.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/${datadir} exp/make_hires_sp/$datadir $mfccdir || exit 1
+ steps/compute_cmvn_stats.sh data/${datadir} exp/make_hires_sp/$datadir $mfccdir
+ utils/fix_data_dir.sh data/${datadir}
+
+ # make MFCC data dir without pitch to extract iVector
+ utils/data/limit_feature_dim.sh 0:39 data/${datadir} data/${datadir}_nopitch || exit 1;
+ steps/compute_cmvn_stats.sh data/${datadir}_nopitch exp/make_hires_sp/${datadir}_nopitch $mfccdir || exit 1;
+ done
+fi
+
+train_set=${train_set}_sp_hires_nopitch
+if [ $stage -le 3 ]; then
+ echo "Stage 3: train_set is $train_set"
+ echo "$0: computing a subset of data to train the diagonal UBM."
+ mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+ temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+ # train a diagonal UBM using a subset of about a quarter of the data
+ num_utts_total=$(wc -l max_prons:
+ values_sorted = sorted(values, key=lambda v:v[0], reverse=True)
+ values = values_sorted[:max_prons]
+ for v in values:
+ print(key, " ".join(v))
+
+
+
diff --git a/egs/mandarin_bn_bc/s5/local/rnnlm/run_tdnn_lstm_1a.sh b/egs/mandarin_bn_bc/s5/local/rnnlm/run_tdnn_lstm_1a.sh
new file mode 100644
index 00000000000..1b433890007
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/rnnlm/run_tdnn_lstm_1a.sh
@@ -0,0 +1,136 @@
+#!/usr/bin/env bash
+
+# Copyright 2012 Johns Hopkins University (author: Daniel Povey)
+# 2018 Ke Li
+
+
+# Begin configuration section.
+
+dir=exp/rnnlm_lstm_1a
+embedding_dim=1024
+lstm_rpd=256
+lstm_nrpd=256
+stage=-10
+train_stage=-10
+epochs=4
+
+# variables for lattice rescoring
+run_lat_rescore=true
+run_nbest_rescore=true
+run_backward_rnnlm=false
+ac_model_dir=exp/chain_cleanup/tdnn_1d_sp
+decode_dir_suffix=rnnlm_1a
+ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
+ # if it's set, it merges histories in the lattice if they share
+ # the same ngram history and this prevents the lattice from
+ # exploding exponentially
+pruned_rescore=true
+. path.sh
+. ./cmd.sh
+. ./utils/parse_options.sh
+
+text=data/local/lm_large_4gram/train_text.gz
+lexicon=data/lang_large_test/words.txt
+text_dir=data/rnnlm/text
+mkdir -p $dir/config
+set -e
+for f in $lexicon; do
+ [ ! -f $f ] && \
+ echo "$0: expected file $f to exist; search for run.sh in run.sh" && exit 1
+done
+
+if [ $stage -le 0 ]; then
+ mkdir -p $text_dir
+ echo -n >$text_dir/dev.txt
+ # hold out one in every 2000 lines as dev data.
+ gunzip -c $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%2000 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/mandarin.txt
+fi
+
+if [ $stage -le 1 ]; then
+ cp $lexicon $dir/config/
+ n=`cat $dir/config/words.txt | wc -l`
+ echo " $n" >> $dir/config/words.txt
+
+ # words that are not present in words.txt but are in the training or dev data, will be
+ # mapped to during training.
+ echo "" >$dir/config/oov.txt
+
+ cat > $dir/config/data_weights.txt <" \
+ --data-weights-file=$dir/config/data_weights.txt \
+ $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
+
+ # choose features
+ rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
+ --top-word-features=5000 \
+ --use-constant-feature=true \
+ --special-words=',,,,[VOCALIZED-NOISE],[NOISE],[LAUGHTER]' \
+ $dir/config/words.txt > $dir/config/features.txt
+
+ cat >$dir/config/xconfig < "
+ echo " Options:"
+ echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
+ echo " --stage (0|1|2) # start scoring script from part-way through."
+ echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)."
+ echo " --min_lmwt # minumum LM-weight for lattice rescoring "
+ echo " --max_lmwt # maximum LM-weight for lattice rescoring "
+ exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $dir/lat.1.gz $data/text; do
+ [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+ref_filtering_cmd="cat"
+[ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter"
+[ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter"
+hyp_filtering_cmd="cat"
+[ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter"
+[ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter"
+
+
+if $decode_mbr ; then
+ echo "$0: scoring with MBR, word insertion penalty=$word_ins_penalty"
+else
+ echo "$0: scoring with word insertion penalty=$word_ins_penalty"
+fi
+
+
+mkdir -p $dir/scoring_kaldi
+cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1;
+if [ $stage -le 0 ]; then
+
+ for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+ mkdir -p $dir/scoring_kaldi/penalty_$wip/log
+
+ if $decode_mbr ; then
+ $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \
+ acwt=\`perl -e \"print 1.0/LMWT\"\`\; \
+ lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+ lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
+ lattice-prune --beam=$beam ark:- ark:- \| \
+ lattice-mbr-decode --word-symbol-table=$symtab \
+ ark:- ark,t:- \| \
+ utils/int2sym.pl -f 2- $symtab \| \
+ $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1;
+
+ else
+ $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \
+ lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+ lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
+ lattice-best-path --word-symbol-table=$symtab ark:- ark,t:- \| \
+ utils/int2sym.pl -f 2- $symtab \| \
+ $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1;
+ fi
+
+ $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \
+ cat $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \
+ compute-wer --text --mode=present \
+ ark:$dir/scoring_kaldi/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
+
+ done
+fi
+
+
+# the stage 2 is intentional, to allow nice coexistence with score_kaldi.sh
+# in cases user would be combining calls to these two scripts as shown in
+# the example at the top of the file. Otherwise we or he/she would have to
+# filter the script parameters instead of simple forwarding.
+if [ $stage -le 2 ] ; then
+ files=($dir/scoring_kaldi/test_filt.txt)
+ for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+ for lmwt in $(seq $min_lmwt $max_lmwt); do
+ files+=($dir/scoring_kaldi/penalty_${wip}/${lmwt}.txt)
+ done
+ done
+
+ for f in "${files[@]}" ; do
+ fout=${f%.txt}.chars.txt
+ if [ -x local/character_tokenizer ]; then
+ cat $f | local/character_tokenizer > $fout
+ else
+ cat $f | perl -CSDA -ane '
+ {
+ print $F[0];
+ foreach $s (@F[1..$#F]) {
+ if (($s =~ /\[.*\]/) || ($s =~ /\<.*\>/) || ($s =~ "!SIL")) {
+ print " $s";
+ } else {
+ @chars = split "", $s;
+ foreach $c (@chars) {
+ print " $c";
+ }
+ }
+ }
+ print "\n";
+ }' > $fout
+ fi
+ done
+
+ for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+ $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.cer.LMWT.log \
+ cat $dir/scoring_kaldi/penalty_$wip/LMWT.chars.txt \| \
+ compute-wer --text --mode=present \
+ ark:$dir/scoring_kaldi/test_filt.chars.txt ark,p:- ">&" $dir/cer_LMWT_$wip || exit 1;
+ done
+fi
+
+if [ $stage -le 3 ] ; then
+ for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+ for lmwt in $(seq $min_lmwt $max_lmwt); do
+ # adding /dev/null to the command list below forces grep to output the filename
+ grep WER $dir/cer_${lmwt}_${wip} /dev/null
+ done
+ done | utils/best_wer.sh >& $dir/scoring_kaldi/best_cer || exit 1
+
+ best_cer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_cer)
+ best_wip=$(echo $best_cer_file | awk -F_ '{print $NF}')
+ best_lmwt=$(echo $best_cer_file | awk -F_ '{N=NF-1; print $N}')
+
+ if [ -z "$best_lmwt" ]; then
+ echo "$0: we could not get the details of the best CER from the file $dir/cer_*. Probably something went wrong."
+ exit 1;
+ fi
+
+ if $stats; then
+ mkdir -p $dir/scoring_kaldi/cer_details
+ echo $best_lmwt > $dir/scoring_kaldi/cer_details/lmwt # record best language model weight
+ echo $best_wip > $dir/scoring_kaldi/cer_details/wip # record best word insertion penalty
+
+ $cmd $dir/scoring_kaldi/log/stats1.cer.log \
+ cat $dir/scoring_kaldi/penalty_$best_wip/${best_lmwt}.chars.txt \| \
+ align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.chars.txt ark:- ark,t:- \| \
+ utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/cer_details/per_utt \|\
+ utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/cer_details/per_spk || exit 1;
+
+ $cmd $dir/scoring_kaldi/log/stats2.cer.log \
+ cat $dir/scoring_kaldi/cer_details/per_utt \| \
+ utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \
+ sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/cer_details/ops || exit 1;
+
+ $cmd $dir/scoring_kaldi/log/cer_bootci.cer.log \
+ compute-wer-bootci --mode=present \
+ ark:$dir/scoring_kaldi/test_filt.chars.txt ark:$dir/scoring_kaldi/penalty_$best_wip/${best_lmwt}.chars.txt \
+ '>' $dir/scoring_kaldi/cer_details/cer_bootci || exit 1;
+
+ fi
+fi
+
+# If we got here, the scoring was successful.
+# As a small aid to prevent confusion, we remove all wer_{?,??} files;
+# these originate from the previous version of the scoring files
+# i keep both statement here because it could lead to confusion about
+# the capabilities of the script (we don't do cer in the script)
+rm $dir/wer_{?,??} 2>/dev/null
+rm $dir/cer_{?,??} 2>/dev/null
+
+exit 0;
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_cleanup.sh b/egs/mandarin_bn_bc/s5/local/tdt_cleanup.sh
new file mode 100644
index 00000000000..6a2127fa20e
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_cleanup.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# This script removes non-speech, long musics or long silence from the original
+# speech recordings.
+
+nj=32
+stage=0
+cmd=run.pl
+. cmd.sh
+. path.sh
+. utils/parse_options.sh
+
+set -e -o pipefail
+if [ $# -ne 5 ]; then
+ echo "Usage: $0 "
+ echo "E.g., $0 [options] data/train data/lang exp/gale_mandarin data/train_clean"
+ exit 1;
+fi
+
+src_data_dir=$1
+lang_dir=$2
+mdldir=$3
+newdir=$4
+clean_data_dir=$5
+
+steps/cleanup/segment_long_utterances.sh --nj ${nj} --cmd "$train_cmd" --stage $stage \
+ --max-bad-proportion 0.6 $mdldir $lang_dir $src_data_dir \
+ $clean_data_dir $newdir || exit 1;
+
+echo "Clean up succeeded !"
+
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_mandarin_bad_utts b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_bad_utts
new file mode 100644
index 00000000000..aba1e21e2a9
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_bad_utts
@@ -0,0 +1,3 @@
+19981223_0900_1000_VOA_MAN
+19981230_0900_1000_VOA_MAN
+20001226_2000_2025_CTS_MAN
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_audio.sh b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_audio.sh
new file mode 100755
index 00000000000..1226f2ce918
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_audio.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 QCRI (author: Ahmed Ali)
+# Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
+# Copyright 2019 Johns Hopkins Univeersity (author: Jinyi Yang)
+# Apache 2.0
+
+
+echo $0 "$@"
+
+tdtData=$(utils/make_absolute.sh "${@: -1}" );
+wavedir=$tdtData/wav
+mkdir -p $wavedir
+
+
+length=$(($#-1))
+args=${@:1:$length}
+
+# Check if sph2pipe is installed
+sph2pipe=`which sph2pipe` || sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
+[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1;
+set -e -o pipefail
+
+for var in $args; do
+ CD=$(basename $var)
+ [ -d $wavedir/$CD ] && rm -rf $wavedir/$CD
+ mkdir -p $wavedir/$CD
+ find $var -type f -name *.sph | grep "MAN" | while read file; do
+ f=$(basename $file)
+ if [[ ! -L "$wavedir/$CD/$f" ]]; then
+ ln -sf $file $wavedir/$CD/$f
+ fi
+ done
+done
+
+#figure out the proper sph2pipe command line
+(
+ for w in `find $wavedir -name *.sph` ; do
+ base=`basename $w .sph`
+ fullpath=`utils/make_absolute.sh $w`
+ echo "$base $sph2pipe -f wav -p -c 1 $fullpath |"
+ done
+) | sort -u > $tdtData/wav.scp
+
+#clean
+rm -fr $tdtData/id$$ $tdtData/wav$$
+echo "$0: data prep audio succeded"
+
+exit 0
+
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_filter.sh b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_filter.sh
new file mode 100644
index 00000000000..cb1f4feca7a
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_filter.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 (author: Jinyi Yang)
+# Apache 2.0
+
+# This scripts remove bad utterances from the tdt corpus.
+. path.sh
+. ./utils/parse_options.sh
+if [ $# != 2 ]; then
+ echo "Usage: $0 [options] ";
+ echo "e.g.: $0 TDT2 data/local/tdt2"
+ exit 1;
+fi
+
+set -e -o pipefail
+
+tdtdir=$1
+tgtdir=$2
+mkdir -p $tgtdir
+
+
+for f in "text" "utt2spk" "segments" "uttid"; do
+ cat $tdtdir/txt/$f | grep -v -F -f local/tdt_mandarin_bad_utts > $tgtdir/$f
+done
+
+awk 'NR==FNR{a[$2];next} $1 in a{print $0}' $tgtdir/segments $tdtdir/wav.scp | \
+grep -v -F -f local/tdt_mandarin_bad_utts > $tgtdir/wav.scp
+
+utils/utt2spk_to_spk2utt.pl $tgtdir/utt2spk | sort -u > $tgtdir/spk2utt
+
+echo "TDT data prepare succeeded !"
+
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_txt.sh b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_txt.sh
new file mode 100755
index 00000000000..4fd04acaaaa
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_data_prep_txt.sh
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (author: Jinyi Yang)
+# Apache 2.0
+
+. ./path.sh || exit 1;
+
+echo $0 "$@"
+export LC_ALL=C
+
+tdtData=$(utils/make_absolute.sh "${@: -1}" );
+
+length=$(($#-1))
+args=${@:1:$length}
+
+top_pwd=`pwd`
+txtdir=$tdtData/txt
+sph_scp=$tdtData/wav.scp
+mkdir -p $txtdir
+
+cd $txtdir
+
+for cdx in ${args[@]}; do
+ echo "Preparing $cdx"
+ if [[ $cdx == *.tgz ]] ; then
+ tar -zxf $cdx
+ elif [ -d "$cdx" ]; then
+ tgt=$(basename $cdx)
+ zfile=`find $cdx -type f -name *.tgz`
+ if [ ! -z $zfile ]; then
+ test -x $tgt || mkdir $tgt
+ cd $tgt
+ tar -zxf $zfile
+ cd $txtdir
+ else
+ test -x $tgt || ln -s $cdx `basename $tgt`
+ fi
+ else
+ echo "I don't really know what I shall do with $cdx " >&2
+ fi
+done
+
+# There are more transcriptions that audio files. We only use that
+# transcriptions which have corresponding audio files.
+find -L $txtdir -type f -name *.src_sgm | grep "MAN" | \
+ awk 'NR==FNR {a[$1];next}; {name=$0;gsub(".src_sgm$", "", name); gsub(".*/", "", name); \
+ if (name in a) print $0}' $sph_scp - | sort > $txtdir/trans.flist || exit 1;
+
+perl $top_pwd/local/tdt_mandarin_parse_sgm.pl $txtdir/trans.flist > $txtdir/text.tmp || exit 1;
+cd $top_pwd
+
+cut -d " " -f1 $txtdir/text.tmp > $txtdir/uttid
+cut -d " " -f2- $txtdir/text.tmp > $txtdir/trans
+
+pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'`
+export PYTHONPATH=$PYTHONPATH:`pwd`/tools/mmseg-1.3.0/lib/python${pyver}/site-packages
+if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+ echo "--- Downloading mmseg-1.3.0 ..."
+ echo "NOTE: it assumes that you have Python, Setuptools installed on your system!"
+ wget -P tools http://pypi.python.org/packages/source/m/mmseg/mmseg-1.3.0.tar.gz
+ tar xf tools/mmseg-1.3.0.tar.gz -C tools
+ cd tools/mmseg-1.3.0
+ mkdir -p lib/python${pyver}/site-packages
+ CC=gcc CXX=g++ python setup.py build
+ python setup.py install --prefix=.
+ cd ../..
+ if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then
+ echo "mmseg is not found - installation failed?"
+ exit 1
+ fi
+fi
+# Create text, use mmseg for splitting Mandarin characters into words.
+cat $txtdir/trans |\
+ sed -e 's/,//g' | \
+ sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\
+ perl local/mandarin_text_normalize.pl |\
+ python local/mandarin_segment.py |\
+ sed -e 's/THISISSPKTURN//g' |\
+ paste $txtdir/uttid - |\
+ awk '{if (NF>2 || (NF==2 && $2 != "")) print $0}' > $txtdir/text_with_spk_turn
+
+# The text_with_spk_turn file contains label "" to indicate speaker
+# switching, in case the speaker diarization process is required. We do not use
+# speaker diarization at this moment, so the spk id will be the segment
+# (utterance)
+
+cat $txtdir/text_with_spk_turn | sed 's///g' > $txtdir/text
+awk '{print $1" "$1}' $txtdir/text_with_spk_turn > $txtdir/utt2spk
+cp $txtdir/utt2spk $txtdir/spk2utt
+
+awk '{segments=$1; split(segments, S, "_"); uttid=S[1];for (i=2;i<=5;++i) uttid=uttid"_"S[i]; print segments " " uttid " " S[7]/100 " " S[8]/100}' < $txtdir/text > $txtdir/segments
+
+awk '{print $1}' $txtdir/text > $txtdir/uttid
+
+echo "TDT Mandarin text preparation succeed !"
diff --git a/egs/mandarin_bn_bc/s5/local/tdt_mandarin_parse_sgm.pl b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_parse_sgm.pl
new file mode 100755
index 00000000000..3889aa02616
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/local/tdt_mandarin_parse_sgm.pl
@@ -0,0 +1,170 @@
+#!/usr/bin/env perl
+
+#===============================================================================
+# Copyright (c) 2019 Johns Hopkins University (Author: Jinyi Yang)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+use strict;
+use warnings;
+use utf8;
+use Encode;
+use Time::Piece;
+
+require HTML::Parser or die "This script needs HTML::Parser from CPAN";
+HTML::Parser->import();
+
+binmode(STDOUT, ":utf8");
+
+sub trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s };
+
+sub get_doc_no {
+ my $tag = shift(@_);
+ my @tmpdoc = split /\s+/, $tag;
+ my @doc_nos = split /\./, $tmpdoc[1];
+ return @doc_nos;
+}
+
+sub check_doc_type {
+ my $tag = shift(@_);
+ if ( $tag =~ /UNTRANSCRIBED/){
+ return 0;
+ ;
+ } else {
+ return 1;
+ }
+}
+
+sub str2time {
+ my ($str) = @_;
+ $str =~ s/(\.[0-9]+)?\z//;
+ my $fraction = $1 || 0;
+ return Time::Piece->strptime($str, '%H:%M:%S')->epoch + $fraction;
+}
+
+sub get_time_tag{
+ my $start = shift(@_);
+ my $end = shift(@_);
+ if (($start ne "") && ($end ne "")) {
+ $start = sprintf("%.2f", $start);
+ $end = sprintf("%.2f", $end);
+ my $tag = sprintf("%06.0f_%06.0f", 100*$start+0.5, 100*$end+0.5);
+ return $tag;
+ ;
+ } else{
+ print STDERR "$0: Empty time tag: $start or $end\n";
+ return "";
+ }
+}
+
+if (@ARGV != 1) {
+ print STDERR "$0: This script needs exactly one parameter (list of SGML files)\n";
+ print STDERR " Usage: $0 \n";
+ print STDERR " where\n";
+ print STDERR " is a file containing the official SGML format\n";
+ print STDERR " transcripts. The files are parsed and the parsed representation\n";
+ print STDERR " is dumped to STDOUT (one utterance + the additional data fields\n";
+ print STDERR " per line (we dump all the fields, but not all fields are used\n";
+ print STDERR " in the recipe).\n";
+ die;
+}
+my $filelist=$ARGV[0];
+
+my $p = HTML::Parser->new();
+
+my @files=();
+open(F, '<', $filelist) or die "Could not open file $filelist: $?\n";
+while() {
+ chomp;
+ push @files, $_;
+}
+
+foreach my $file (@files) {
+ my $filename = "";
+ my $docname = "";
+ my $doctype = "";
+ my @docno = ();
+ my $doc_id = "";
+ my @text = ();
+ my $start_time = 0;
+ my $end_time = 0;
+ my $doc_start_time = 0;
+ my $current_time = 0;
+ my @times = ();
+
+ my $sgml_file = `basename $file`;
+ $sgml_file = trim $sgml_file;
+ $sgml_file =~ s/\.src_sgm$//g;
+ my @sgml_file_ids = split '_', $sgml_file;
+ my $sgml_file_id = $sgml_file_ids[3].$sgml_file_ids[0].$sgml_file_ids[1];
+
+ open(my $f, '<:encoding(iso-8859-1)', $file) or die "Could not open file $file: $?\n";
+ while(my $line = <$f>) {
+ $line = trim $line;
+ next unless $line;
+
+ if ($line =~ //) {
+ @docno = get_doc_no $line;
+ $doc_id = $docno[0].$docno[1];
+ $doc_id = $docno[2]; # Four digits
+ ;
+ } elsif($line =~ // ){
+ @times = split /\s+/, $line;
+ $current_time = str2time($times[2]);
+ if ($doc_start_time == 0){
+ $doc_start_time = $current_time;
+ $start_time = 0;
+ ;
+ } else {
+ $start_time = $current_time - $doc_start_time;
+ }
+ ;
+ } elsif ($line =~ //){
+ $line = "THISISSPKTURN"; # Replace with a word, indicating speaker change, will be removed from text before LM training
+ push @text, $line;
+ ;
+ } elsif($line =~ //){
+ @times = split /\s+/, $line;
+ $end_time = str2time($times[2]) - $doc_start_time;
+ ;
+ } elsif ($line =~ //) {
+ $doctype = check_doc_type $line;
+ ;
+ } elsif ($line eq "<\/DOC>") {
+ if ((@text > 0) && ($doctype)) {
+ if ($end_time <= $start_time){
+ print STDERR "$0: WARNING: File $file has invalid time tag at $doc_id\n";
+ }
+ my $time_tag = get_time_tag($start_time, $end_time);
+ $docname = $sgml_file."_".$doc_id."_".$time_tag;
+ print "$docname ";
+ print join(" ", @text) . "\n";
+ }
+ $docname = "";
+ @text = ();
+ ;
+ } elsif ($line !~ "<") {
+ $line = trim $line;
+ $line = decode("gbk", $line);
+ $line =~ s:〈turn〉:THISISSPKTURN:g;
+ $line =~ s::THISISSPKTURN:g;
+ $line =~ s:turn>:THISISSPKTURN:g;
+ $line =~ s:"
+prune_thres=1e-9
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+ echo "Usage: [--ngram-order] [--prune-thres] "
+ echo "E.g. $0 --ngram-order 4 --prune-thres 1e-9 data/local/train data/local/dict
+ data/local/lm_no_extra datal/local/dev/text"
+ exit 1
+fi
+
+text=$1/text
+dict_dir=$2
+dir=$3
+dev_text=$4
+
+
+[ ! -d $dir ] && mkdir -p $dir && exit 1;
+[ ! -f $text ] && echo "$0: No such file $text" && exit 1;
+
+lexicon=$dict_dir/lexicon.txt
+[ ! -f $lexicon ] && echo "$0: No such file $lexicon" && exit 1;
+
+
+cleantext=$dir/text.no_oov
+
+cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } }
+ {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ",$n);} } printf("\n");}' \
+ > $cleantext || exit 1;
+
+
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | sort | uniq -c | \
+ sort -nr > $dir/word.counts || exit 1;
+
+
+# Get counts from acoustic training transcripts, and add one-count
+# for each word in the lexicon (but not silence, we don't want it
+# in the LM-- we'll add it optionally later).
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | \
+ cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
+ sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
+
+cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map \
+ || exit 1;
+
+cat $dir/word_map | awk '{print $1}' | cat - <(echo ""; echo "" ) \
+ > $dir/wordlist
+
+ngram-count -text $dir/text.no_oov -order $ngram_order -limit-vocab -vocab $dir/wordlist -unk \
+ -map-unk "" -kndiscount -interpolate -prune $prune_thres -lm $dir/srilm.o${ngram_order}g.kn.gz
+
+cut -d " " -f2- $dev_text > $dir/heldout
+ngram -lm $dir/srilm.o${ngram_order}g.kn.gz -ppl $dir/heldout > $dir/ppl
+# note: output is
+# $dir/${ngram_order}gram-mincount/lm_unpruned.gz
+echo train lm succeeded
diff --git a/egs/mandarin_bn_bc/s5/path.sh b/egs/mandarin_bn_bc/s5/path.sh
new file mode 100644
index 00000000000..e875e4b585c
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=$(pwd)/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/kaldi_lm:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+. $KALDI_ROOT/tools/env.sh
+export LC_ALL=C
diff --git a/egs/mandarin_bn_bc/s5/rnnlm b/egs/mandarin_bn_bc/s5/rnnlm
new file mode 120000
index 00000000000..e136939ba72
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/rnnlm
@@ -0,0 +1 @@
+../../../scripts/rnnlm/
\ No newline at end of file
diff --git a/egs/mandarin_bn_bc/s5/run.sh b/egs/mandarin_bn_bc/s5/run.sh
new file mode 100644
index 00000000000..da183636b7c
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/run.sh
@@ -0,0 +1,278 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (author: Jinyi Yang)
+# Apache 2.0
+
+train_nj=80
+decode_nj=60
+stage=-1
+
+[ -f ./path.sh ] && . ./path.sh
+[ -f ./cmd.sh ] && . ./cmd.sh
+. parse_options.sh
+
+GALE_AUDIO=(
+ /export/corpora/LDC/LDC2013S08/
+ /export/corpora/LDC/LDC2013S04/
+ /export/corpora/LDC/LDC2014S09/
+ /export/corpora/LDC/LDC2015S06/
+ /export/corpora/LDC/LDC2015S13/
+ /export/corpora/LDC/LDC2016S03/
+ /export/corpora/LDC/LDC2017S25/
+)
+GALE_TEXT=(
+ /export/corpora/LDC/LDC2013T20/
+ /export/corpora/LDC/LDC2013T08/
+ /export/corpora/LDC/LDC2014T28/
+ /export/corpora/LDC/LDC2015T09/
+ /export/corpora/LDC/LDC2015T25/
+ /export/corpora/LDC/LDC2016T12/
+ /export/corpora/LDC/LDC2017T18/
+)
+
+TDT_AUDIO=(
+ /export/corpora/LDC/LDC2001S93/
+ /export/corpora/LDC/LDC2001S95/
+ /export/corpora/LDC/LDC2005S11/
+)
+TDT_TEXT=(
+ /export/corpora/LDC/LDC2001T57/
+ /export/corpora/LDC/LDC2001T58/
+ /export/corpora5/LDC/LDC2005T16/
+)
+
+GIGA_TEXT=/export/corpora/LDC/LDC2003T09/gigaword_man/xin/
+
+galeData=GALE/
+tdtData=TDT/
+gigaData=GIGA/
+
+set -e -o pipefail
+set -x
+
+########################### Data preparation ###########################
+if [ $stage -le 0 ]; then
+ echo "`date -u`: Prepare data for GALE"
+ local/gale_data_prep_audio.sh "${GALE_AUDIO[@]}" $galeData
+ local/gale_data_prep_txt.sh "${GALE_TEXT[@]}" $galeData
+ local/gale_data_prep_split.sh $galeData data/local/gale
+
+ echo "`date -u`: Prepare data for TDT"
+ local/tdt_mandarin_data_prep_audio.sh "${TDT_AUDIO[@]}" $tdtData
+ local/tdt_mandarin_data_prep_txt.sh "${TDT_TEXT[@]}" $tdtData
+ local/tdt_mandarin_data_prep_filter.sh $tdtData data/local/tdt_mandarin
+
+ ## Merge transcripts from GALE and TDT for lexicon and LM training
+ mkdir -p data/local/gale_tdt_train
+ cat data/local/gale/train/text data/local/tdt_mandarin/text > data/local/gale_tdt_train/text
+fi
+
+########################### Lexicon preparation ########################
+if [ $stage -le 1 ]; then
+ echo "`date -u`: Prepare dictionary for GALE and TDT"
+ local/mandarin_prepare_dict.sh data/local/dict_gale_tdt data/local/gale_tdt_train
+ local/check_oov_rate.sh data/local/dict_gale_tdt/lexicon.txt \
+ data/local/gale_tdt_train/text > data/local/gale_tdt_train/oov.rate
+ grep "rate" data/local/gale_tdt_train/oov.rate |\
+ awk '$10>0{print "Warning: OOV rate is "$10 ", make sure it is a small number"}'
+ utils/prepare_lang.sh data/local/dict_gale_tdt "" data/local/lang_gale_tdt data/lang_gale_tdt
+fi
+
+########################### LM preparation for GALE ####################
+if [ $stage -le 2 ]; then
+ echo "`date -u`: Creating LM for GALE"
+ local/mandarin_prepare_lm.sh --no-uttid "false" --ngram-order 4 --oov-sym "" --prune_thres "1e-9" \
+ data/local/dict_gale_tdt data/local/gale/train data/local/gale/train/lm_4gram data/local/gale/dev
+ local/mandarin_format_lms.sh data/local/gale/train/lm_4gram/srilm.o4g.kn.gz \
+ data/lang_gale_tdt data/lang_gale_test
+fi
+
+############# Using GALE data to train cleaning up model for TDT #######
+datadir=data/gale
+mfccdir=mfcc/gale
+expdir=exp/gale
+if [ $stage -le 3 ]; then
+ # spread the mfccs over various machines, as this data-set is quite large.
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
+ mfcc=$(basename $mfccdir) # in case was absolute pathname (unlikely), get basename.
+ utils/create_split_dir.pl /export/b{05,06,07,08}/$USER/kaldi-data/egs/gale_asr/s5/$mfcc/storage \
+ $mfccdir/storage
+ fi
+ echo "`date -u`: Extracting GALE MFCC features"
+ for x in train dev eval; do
+ steps/make_mfcc_pitch.sh --cmd "$train_cmd" --nj $train_nj \
+ $datadir/$x exp/make_mfcc/gale/$x $mfccdir
+ utils/fix_data_dir.sh $datadir/$x # some files fail to get mfcc for many reasons
+ steps/compute_cmvn_stats.sh $datadir/$x exp/make_mfcc/gale/$x $mfccdir
+ done
+# Let's create small subsets to make quick flat-start training:
+# train_100k contains about 150 hours of data.
+ utils/subset_data_dir.sh $datadir/train 100000 $datadir/train_100k || exit 1;
+ utils/subset_data_dir.sh --shortest $datadir/train_100k 2000 $datadir/train_2k_short || exit 1;
+ utils/subset_data_dir.sh $datadir/train_100k 5000 $datadir/train_5k || exit 1;
+ utils/subset_data_dir.sh $datadir/train_100k 10000 $datadir/train_10k || exit 1;
+fi
+
+########################### Monophone training #########################
+if [ $stage -le 4 ]; then
+ echo "`date -u`: Monophone trainign with GALE data"
+ steps/train_mono.sh --boost-silence 1.25 --nj $train_nj --cmd "$train_cmd" \
+ $datadir/train_2k_short data/lang_gale_tdt $expdir/mono || exit 1;
+fi
+
+########################### Tri1 training ##############################
+if [ $stage -le 5 ]; then
+ steps/align_si.sh --boost-silence 1.25 --nj $train_nj --cmd "$train_cmd" \
+ $datadir/train_5k data/lang_gale_tdt $expdir/mono $expdir/mono_ali_5k || exit 1;
+ echo "`date -u`: Tri1 trainign with GALE data"
+ # train tri1 [first triphone pass]
+ steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
+ 2000 10000 $datadir/train_5k data/lang_gale_tdt $expdir/mono_ali_5k $expdir/tri1 || exit 1;
+ utils/mkgraph.sh data/lang_gale_test $expdir/tri1 $expdir/tri1/graph_gale_test || exit 1;
+ steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
+ $expdir/tri1/graph_gale_test $datadir/dev $expdir/tri1/decode_gale_dev
+fi
+
+########################### Tri2b training #############################
+if [ $stage -le 6 ]; then
+ steps/align_si.sh --nj $train_nj --cmd "$train_cmd" \
+ $datadir/train_10k data/lang_gale_tdt $expdir/tri1 $expdir/tri1_ali_10k || exit 1;
+ echo "`date -u`: Tri2b trainign with GALE data"
+ steps/train_lda_mllt.sh --cmd "$train_cmd" \
+ --splice-opts "--left-context=3 --right-context=3" 2500 15000 \
+ $datadir/train_10k data/lang_gale_tdt $expdir/tri1_ali_10k $expdir/tri2b
+ utils/mkgraph.sh data/lang_gale_test $expdir/tri2b $expdir/tri2b/graph_gale_test || exit 1;
+ steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
+ $expdir/tri2b/graph_gale_test $datadir/dev $expdir/tri2b/decode_gale_dev
+fi
+
+########################### Tri3b training #############################
+if [ $stage -le 7 ]; then
+ steps/align_si.sh --nj $train_nj --cmd "$train_cmd" --use-graphs true \
+ $datadir/train_10k data/lang_gale_tdt $expdir/tri2b $expdir/tri2b_ali_10k || exit 1;
+ echo "`date -u`: Tri3b trainign with GALE data"
+ steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
+ $datadir/train_10k data/lang_gale_tdt $expdir/tri2b_ali_10k $expdir/tri3b
+ utils/mkgraph.sh data/lang_gale_test $expdir/tri3b $expdir/tri3b/graph_gale_test || exit 1;
+ steps/decodei_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" \
+ $expdir/tri3b/graph_gale_test $datadir/dev $expdir/tri3b/decode_gale_dev
+fi
+
+########################### Tri4b training #############################
+if [ $stage -le 8 ]; then
+ steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" \
+ $datadir/train_100k data/lang_gale_tdt \
+ $expdir/tri3b $expdir/tri3b_ali_100k || exit 1;
+ echo "`date -u`: Tri4b trainign with GALE data"
+ steps/train_sat.sh --cmd "$train_cmd" 4200 40000 \
+ $datadir/train_100k data/lang_gale_tdt \
+ $expdir/tri3b_ali_100k $expdir/tri4b
+ utils/mkgraph.sh data/lang_gale_test $expdir/tri4b $expdir/tri4b/graph_gale_test || exit 1;
+ steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" \
+ $expdir/tri4b/graph_gale_test $datadir/dev $expdir/tri4b/decode_gale_dev
+fi
+
+######################### Re-create lang directory######################
+# We want to add pronunciation probabilities to lexicon, using the previously trained model.
+if [ $stage -le 9 ]; then
+ steps/get_prons.sh --cmd "$train_cmd" \
+ $datadir/train_100k data/lang_gale_tdt $expdir/tri4b
+ utils/dict_dir_add_pronprobs.sh --max-normalize true \
+ data/local/dict_gale_tdt \
+ $expdir/tri4b/pron_counts_nowb.txt $expdir/tri4b/sil_counts_nowb.txt \
+ $expdir/tri4b/pron_bigram_counts_nowb.txt data/local/dict_gale_tdt_reestimated
+ utils/prepare_lang.sh data/local/dict_gale_tdt_reestimated \
+ "" data/local/lang_gale_tdt_reestimated data/lang_gale_tdt_reestimated
+ local/mandarin_format_lms.sh data/local/gale/train/lm_4gram/srilm.o4g.kn.gz \
+ data/lang_gale_tdt_reestimated data/lang_gale_tdt_reestimated_test
+fi
+
+######################### Train tri5b with all GALE data ###############
+if [ $stage -le 10 ]; then
+ steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" \
+ $datadir/train data/lang_gale_tdt_reestimated \
+ $expdir/tri4b $expdir/tri4b_ali_train || exit 1;
+
+ steps/train_sat.sh --cmd "$train_cmd" 5000 100000 \
+ $datadir/train data/lang_gale_tdt_reestimated \
+ $expdir/tri4b_ali_train $expdir/tri5b || exit 1;
+fi
+
+if [ $stage -le 11 ]; then
+ echo "Clean up TDT data"
+ mkdir -p data/tdt || exit 1;
+ mfccdir=mfcc/tdt
+ cp -r data/local/tdt_mandarin/* data/tdt
+ steps/make_mfcc_pitch.sh --cmd "$train_cmd" --nj $train_nj \
+ data/tdt exp/make_mfcc/tdt $mfccdir
+ utils/fix_data_dir.sh data/tdt # some files fail to get mfcc for many reasons
+ steps/compute_cmvn_stats.sh data/tdt exp/make_mfcc/tdt $mfccdir
+ local/tdt_cleanup.sh --nj $train_nj data/tdt data/lang_gale_tdt_reestimated \
+ $expdir/tri5b $expdir/tri5b_tdt_cleanup data/tdt_cleanup
+ sed -i 's///g' data/tdt_cleanup/text
+ steps/compute_cmvn_stats.sh data/tdt_cleanup exp/make_mfcc/tdt_cleanup ${mfccdir}_cleanup
+fi
+
+datadir=data/train_gale_tdt_cleanup
+expdir=exp
+if [ $stage -le 12 ]; then
+ echo "Combine GALE and TDT cleaned"
+ utils/combine_data.sh \
+ $datadir data/gale/train data/tdt_cleanup
+
+ steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" \
+ $datadir data/lang_gale_tdt_reestimated \
+ exp/gale/tri5b exp/gale/tri5b_ali_gale_tdt_cleanup || exit 1;
+
+ steps/train_quick.sh --cmd "$train_cmd" \
+ 7000 150000 $datadir data/lang_gale_tdt_reestimated \
+ exp/gale/tri5b_ali_gale_tdt_cleanup exp/tri6b_cleanup
+ utils/mkgraph.sh data/lang_gale_tdt_reestimated_test exp/tri6b_cleanup \
+ exp/tri6b_cleanup/graph_gale_tdt_reestimated_test || exit 1;
+ steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" \
+ exp/tri6b_cleanup/graph_gale_tdt_reestimated_test data/gale/dev exp/tri6b_cleanup/decode_gale_dev
+fi
+
+if [ $stage -le 13 ]; then
+ echo "Expand the lexicon with Gigaword"
+ local/gigaword_prepare.sh $GIGA_TEXT $gigaData
+ local/mandarin_prepare_dict.sh data/local/dict_giga_man_simp data/local/giga_man_simp
+ utils/prepare_lang.sh data/local/dict_giga_man_simp "" \
+ data/local/lang_giga_man_simp data/lang_giga_man_simp
+ # Merge the previous dictionary with GIGAWORD dictionary
+ local/mandarin_merge_dict.sh data/local/dict_gale_tdt_reestimated data/local/dict_giga_man_simp data/local/dict_large
+ # Prune the lexicon for multi-pronunciation words
+ python3 local/prune_lex.py data/local/dict_large/lexiconp.txt | \
+ sort > data/local/dict_large/lexiconp.tmp
+ mv data/local/dict_large/lexiconp.tmp data/local/dict_large/lexiconp.txt
+ utils/prepare_lang.sh data/local/dict_large "" \
+ data/local/lang_large data/lang_large
+fi
+
+
+if [ $stage -le 14 ]; then
+ echo "Prepare LM with all data"
+ # Train LM with GALE + TDT
+ local/mandarin_prepare_lm.sh --no-uttid "false" --ngram-order 4 --oov-sym "" --prune_thres "1e-9" \
+ data/local/dict_large data/local/gale_tdt_train data/local/gale_tdt_lm_4gram data/local/gale/dev
+
+ # Train LM with gigaword
+ local/mandarin_prepare_lm.sh --no-uttid "true" --ngram-order 4 --oov-sym "" --prune_thres "1e-9" \
+ data/local/dict_large GIGA/ data/local/giga_lm_4gram data/local/gale/dev
+
+ # LM interpolation
+ local/mandarin_mix_lm.sh --ngram-order 4 --oov-sym "" --prune-thres "1e-9" \
+ data/local/gale_tdt_lm_4gram data/local/giga_lm_4gram data/local/lm_large_4gram data/local/gale/dev
+ local/mandarin_format_lms.sh data/local/lm_large_4gram/srilm.o4g.kn.gz \
+ data/lang_large data/lang_large_test
+fi
+
+# From here, we train a tdnnf model. You should modify the related directories
+# in this script, and in local/nnet3/run_ivector_common.sh
+local/chain/run_tdnn.sh
+
+# We use all GALE+TDT+GIGAWORD text to train RNNLM
+cat local/gale_tdt_lm_4gram/text data/local/giga_lm_4gram/text | gzip > data/local/lm_large_4gram/train_text.gz
+# Train RNNLM. You should modify the related directories in this script.
+local/rnnlm/run_tdnn_lstm_1a.sh
+
diff --git a/egs/mandarin_bn_bc/s5/steps b/egs/mandarin_bn_bc/s5/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/mandarin_bn_bc/s5/utils b/egs/mandarin_bn_bc/s5/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/mandarin_bn_bc/s5/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/material/s5/local/chain/decode_test.sh b/egs/material/s5/local/chain/decode_test.sh
index 40115a04cf6..d35ee5fd994 100755
--- a/egs/material/s5/local/chain/decode_test.sh
+++ b/egs/material/s5/local/chain/decode_test.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Johns Hopkins University (author: Daniel Povey)
# 2018 Mahsa Yarmohammadi
diff --git a/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh
index 4f38ee886a7..533ca2a2fc5 100755
--- a/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey)
# 2017-2018 Yiming Wang
diff --git a/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh
index 023cb34b43d..d81e23547ba 100755
--- a/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey)
# 2017-2018 Yiming Wang
diff --git a/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index af5a62dad0d..711bd593ae2 100755
--- a/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey)
# 2017-2018 Yiming Wang
diff --git a/egs/material/s5/local/g2p/apply_g2p.sh b/egs/material/s5/local/g2p/apply_g2p.sh
index 704a1a906bb..66cd52a91e1 100755
--- a/egs/material/s5/local/g2p/apply_g2p.sh
+++ b/egs/material/s5/local/g2p/apply_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Allen Guo
# 2017 Xiaohui Zhang
diff --git a/egs/material/s5/local/g2p/train_g2p.sh b/egs/material/s5/local/g2p/train_g2p.sh
index 43e75f6608d..af6072baef7 100755
--- a/egs/material/s5/local/g2p/train_g2p.sh
+++ b/egs/material/s5/local/g2p/train_g2p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Intellisist, Inc. (Author: Navneeth K)
# 2017 Xiaohui Zhang
diff --git a/egs/material/s5/local/nnet3/run_ivector_common.sh b/egs/material/s5/local/nnet3/run_ivector_common.sh
index a56b3bf67d8..3471834bb75 100755
--- a/egs/material/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/material/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/material/s5/local/prepare_audio_data.sh b/egs/material/s5/local/prepare_audio_data.sh
index 2bf9283f435..ee65d0e47ea 100755
--- a/egs/material/s5/local/prepare_audio_data.sh
+++ b/egs/material/s5/local/prepare_audio_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/material/s5/local/prepare_dict.sh b/egs/material/s5/local/prepare_dict.sh
index 710f1a66e2e..123d9615244 100755
--- a/egs/material/s5/local/prepare_dict.sh
+++ b/egs/material/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/material/s5/local/prepare_text_data.sh b/egs/material/s5/local/prepare_text_data.sh
index 4200a55ed9d..52daa434f87 100755
--- a/egs/material/s5/local/prepare_text_data.sh
+++ b/egs/material/s5/local/prepare_text_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/material/s5/local/preprocess_external_text.sh b/egs/material/s5/local/preprocess_external_text.sh
index 4cbc457310e..83e6988a6f8 100755
--- a/egs/material/s5/local/preprocess_external_text.sh
+++ b/egs/material/s5/local/preprocess_external_text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
set -e -o pipefail
diff --git a/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh b/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh
index 3f5c7e547b1..2c9786fcb0f 100755
--- a/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh
+++ b/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey)
# 2017 Hainan Xu
diff --git a/egs/material/s5/local/rnnlm/run_tdnn_lstm_2.sh b/egs/material/s5/local/rnnlm/run_tdnn_lstm_2.sh
index 13cf0bde44c..4e4314ca3e0 100755
--- a/egs/material/s5/local/rnnlm/run_tdnn_lstm_2.sh
+++ b/egs/material/s5/local/rnnlm/run_tdnn_lstm_2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey)
# 2017 Hainan Xu
diff --git a/egs/material/s5/local/score.sh b/egs/material/s5/local/score.sh
index c7da00fba32..a443efb17dc 100755
--- a/egs/material/s5/local/score.sh
+++ b/egs/material/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# License: Apache 2.0
diff --git a/egs/material/s5/local/score_segments.sh b/egs/material/s5/local/score_segments.sh
index 064e15ae40d..4fb31037bdd 100755
--- a/egs/material/s5/local/score_segments.sh
+++ b/egs/material/s5/local/score_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
set -o nounset # Treat unset variables as an error
diff --git a/egs/material/s5/local/score_stm.sh b/egs/material/s5/local/score_stm.sh
index 7e1236ce92e..31f1f31a7d6 100755
--- a/egs/material/s5/local/score_stm.sh
+++ b/egs/material/s5/local/score_stm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal)
# 2018 Vimal Manohar
diff --git a/egs/material/s5/local/score_wer_segments.sh b/egs/material/s5/local/score_wer_segments.sh
index 555ec5056d9..dea325853c1 100755
--- a/egs/material/s5/local/score_wer_segments.sh
+++ b/egs/material/s5/local/score_wer_segments.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
[ -f ./path.sh ] && . ./path.sh
diff --git a/egs/material/s5/local/semisup/chain/decode_test.sh b/egs/material/s5/local/semisup/chain/decode_test.sh
index 3d9a1eda1f5..019e15db01b 100755
--- a/egs/material/s5/local/semisup/chain/decode_test.sh
+++ b/egs/material/s5/local/semisup/chain/decode_test.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Johns Hopkins University (author: Daniel Povey)
# 2018 Mahsa Yarmohammadi
diff --git a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
index 3d3056182ee..b171753ed3e 100755
--- a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
+++ b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2019 Johns Hopkins University (author: Daniel Povey)
# 2017 Vimal Manohar
diff --git a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh
index 37c957a3227..3f5e3b7b97e 100755
--- a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh
+++ b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Vimal Manohar
# 2019 Yiming Wang
diff --git a/egs/material/s5/local/semisup/rnnlm/run_tdnn_lstm.sh b/egs/material/s5/local/semisup/rnnlm/run_tdnn_lstm.sh
index 8fb570ea153..a6cbc79b5da 100755
--- a/egs/material/s5/local/semisup/rnnlm/run_tdnn_lstm.sh
+++ b/egs/material/s5/local/semisup/rnnlm/run_tdnn_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey)
# 2017 Hainan Xu
diff --git a/egs/material/s5/local/semisup/run.sh b/egs/material/s5/local/semisup/run.sh
index 6b22cb1ad36..a0949267c76 100755
--- a/egs/material/s5/local/semisup/run.sh
+++ b/egs/material/s5/local/semisup/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Vimal Manohar
# 2019 Yiming Wang
diff --git a/egs/material/s5/local/train_lms_srilm.sh b/egs/material/s5/local/train_lms_srilm.sh
index 8160b060dc7..eee6a47eb4d 100755
--- a/egs/material/s5/local/train_lms_srilm.sh
+++ b/egs/material/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
export LC_ALL=C
words_file=
diff --git a/egs/material/s5/run.sh b/egs/material/s5/run.sh
index 4ba518f53e0..c35103e3793 100755
--- a/egs/material/s5/run.sh
+++ b/egs/material/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Johns Hopkins University (Jan "Yenda" Trmal)
# 2017-2018 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index 56aa815ffb9..c4e361e3b4f 100755
--- a/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Adapted from gale_arabic s5b.
diff --git a/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a_disc.sh b/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a_disc.sh
index 290c13e223d..3fbc03e774a 100644
--- a/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a_disc.sh
+++ b/egs/mgb2_arabic/s5/local/chain/tuning/run_tdnn_lstm_1a_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Vimal Manohar
# Apache 2.0
diff --git a/egs/mgb2_arabic/s5/local/check_tools.sh b/egs/mgb2_arabic/s5/local/check_tools.sh
new file mode 100755
index 00000000000..448a6536946
--- /dev/null
+++ b/egs/mgb2_arabic/s5/local/check_tools.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# check whether bs4 and lxml is installed
+if ! python3 -c "import bs4" 2>/dev/null; then
+ echo "$0: BeautifulSoup4 not installed, you can install it by 'pip install beautifulsoup4' if you prefer to use python to process xml file"
+ exit 1;
+fi
+
+if ! python3 -c "import lxml" 2>/dev/null; then
+ echo "$0: lxml not installed, you can install it by 'pip install lxml' if you prefer to use python to process xml file"
+ exit 1;
+fi
+
+echo "both BeatufileSoup4 and lxml are installed in python"
+exit 0
diff --git a/egs/mgb2_arabic/s5/local/graphgeme_mgb_prep_dict.sh b/egs/mgb2_arabic/s5/local/graphgeme_mgb_prep_dict.sh
index 5a88220a19a..2f7c7a5d592 100755
--- a/egs/mgb2_arabic/s5/local/graphgeme_mgb_prep_dict.sh
+++ b/egs/mgb2_arabic/s5/local/graphgeme_mgb_prep_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (C) 2016, Qatar Computing Research Institute, HBKU
diff --git a/egs/mgb2_arabic/s5/local/mgb_data_prep.sh b/egs/mgb2_arabic/s5/local/mgb_data_prep.sh
index 9d5b3611da8..681894a9e29 100755
--- a/egs/mgb2_arabic/s5/local/mgb_data_prep.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_data_prep.sh
@@ -1,11 +1,11 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (C) 2016, Qatar Computing Research Institute, HBKU
# 2016-2019 Vimal Manohar
# 2019 Dongji Gao
-if [ $# -ne 2 ]; then
- echo "Usage: $0 "
+if [ $# -ne 3 ]; then
+ echo "Usage: $0 "
exit 1;
fi
@@ -23,12 +23,6 @@ for x in $train_dir $dev_dir; do
fi
done
-if [ -z $(which xml) ]; then
- echo "$0: Could not find tool xml"
- echo "$0: Download and install it from xmlstar.sourceforge.net"
- exit 1
-fi
-
find $db_dir/train/wav -type f -name "*.wav" | \
awk -F/ '{print $NF}' | perl -pe 's/\.wav//g' > \
$train_dir/wav_list
@@ -39,11 +33,33 @@ head -500 $train_dir/wav_list > $train_dir/wav_list.short
set -e -o pipefail
xmldir=$db_dir/train/xml/bw
-cat $train_dir/wav_list | while read basename; do
+if [ $process_xml == "python" ]; then
+ echo "using python to process xml file"
+ # check if bs4 and lxml are installin in python
+ local/check_tools.sh
+ # process xml file using python
+ cat $train_dir/wav_list | while read basename; do
[ ! -e $xmldir/$basename.xml ] && echo "Missing $xmldir/$basename.xml" && exit 1
- xml sel -t -m '//segments[@annotation_id="transcript_align"]' -m "segment" -n -v "concat(@who,' ',@starttime,' ',@endtime,' ',@WMER,' ')" -m "element" -v "concat(text(),' ')" $xmldir/$basename.xml | local/add_to_datadir.py $basename $train_dir $mer
- echo $basename $wavDir/$basename.wav >> $train_dir/wav.scp
-done
+ local/process_xml.py $xmldir/$basename.xml - | local/add_to_datadir.py $basename $train_dir $mer
+ done
+elif [ $process_xml == 'xml' ]; then
+ # check if xml binary exsits
+ if command -v xml >/dev/null 2>/dev/null; then
+ echo "using xml"
+ cat $train_dir/wav_list | while read basename; do
+ [ ! -e $xmldir/$basename.xml ] && echo "Missing $xmldir/$basename.xml" && exit 1
+ xml sel -t -m '//segments[@annotation_id="transcript_align"]' -m "segment" -n -v "concat(@who,' ',@starttime,' ',@endtime,' ',@WMER,' ')" -m "element" -v "concat(text(),' ')" $xmldir/$basename.xml | local/add_to_datadir.py $basename $train_dir $mer
+ echo $basename $wavDir/$basename.wav >> $train_dir/wav.scp
+ done
+ else
+ echo "xml not found, you may use python by '--process-xml python'"
+ exit 1;
+ fi
+else
+ # invalid option
+ echo "$0: invalid option for --process-xml, choose from 'xml' or 'python'"
+ exit 1;
+fi
for x in text segments; do
cp $db_dir/dev/${x}.all $dev_dir/${x}
diff --git a/egs/mgb2_arabic/s5/local/mgb_format_data.sh b/egs/mgb2_arabic/s5/local/mgb_format_data.sh
index 0fc24c15add..91dd114938a 100755
--- a/egs/mgb2_arabic/s5/local/mgb_format_data.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (C) 2016, Qatar Computing Research Institute, HBKU
diff --git a/egs/mgb2_arabic/s5/local/mgb_prep_full_data.sh b/egs/mgb2_arabic/s5/local/mgb_prep_full_data.sh
index 30e702c6841..40b464e7a37 100755
--- a/egs/mgb2_arabic/s5/local/mgb_prep_full_data.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_prep_full_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (C) 2016, Qatar Computing Research Institute, HBKU
# 2016-2019 Vimal Manohar
diff --git a/egs/mgb2_arabic/s5/local/mgb_prep_original_data.sh b/egs/mgb2_arabic/s5/local/mgb_prep_original_data.sh
index 6edb5ac946d..f3c01c7a57e 100755
--- a/egs/mgb2_arabic/s5/local/mgb_prep_original_data.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_prep_original_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (C) 2016, Qatar Computing Research Institute, HBKU
diff --git a/egs/mgb2_arabic/s5/local/mgb_train_lms.sh b/egs/mgb2_arabic/s5/local/mgb_train_lms.sh
index e49055b478d..7473c92a6ce 100755
--- a/egs/mgb2_arabic/s5/local/mgb_train_lms.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (C) 2016, Qatar Computing Research Institute, HBKU
# To be run from one directory above this script.
diff --git a/egs/mgb2_arabic/s5/local/mgb_train_lms_extra.sh b/egs/mgb2_arabic/s5/local/mgb_train_lms_extra.sh
index c29b6e83764..88b618671a4 100755
--- a/egs/mgb2_arabic/s5/local/mgb_train_lms_extra.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_train_lms_extra.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (C) 2016, Qatar Computing Research Institute, HBKU
# To be run from one directory above this script.
diff --git a/egs/mgb2_arabic/s5/local/mgb_train_lms_extra_pocolm.sh b/egs/mgb2_arabic/s5/local/mgb_train_lms_extra_pocolm.sh
index b9f82012add..1631985764b 100755
--- a/egs/mgb2_arabic/s5/local/mgb_train_lms_extra_pocolm.sh
+++ b/egs/mgb2_arabic/s5/local/mgb_train_lms_extra_pocolm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Johns Hopkins University (author: Daniel Povey)
# 2017 Vimal Manohar
diff --git a/egs/mgb2_arabic/s5/local/nnet3/run_ivector_common.sh b/egs/mgb2_arabic/s5/local/nnet3/run_ivector_common.sh
index ae2edc27a91..ecfcb780d7c 100755
--- a/egs/mgb2_arabic/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/mgb2_arabic/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
diff --git a/egs/mgb2_arabic/s5/local/process_xml.py b/egs/mgb2_arabic/s5/local/process_xml.py
new file mode 100755
index 00000000000..3c6eed452ac
--- /dev/null
+++ b/egs/mgb2_arabic/s5/local/process_xml.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+from bs4 import BeautifulSoup
+import sys
+import argparse
+
+def get_args():
+ parser = argparse.ArgumentParser(description="""This script process xml file.""")
+ parser.add_argument("xml", type=str, help="""Input xml file""")
+ parser.add_argument("output", type=str, help="""output text file""")
+ args = parser.parse_args()
+ return args
+
+def process_xml(xml_handle, output_handle):
+ soup = BeautifulSoup(xml_handle, "xml")
+ for segment in soup.find_all("segment"):
+ who = segment["who"]
+ starttime = segment["starttime"]
+ endtime = segment["endtime"]
+ WMER = segment["WMER"]
+ text = " ".join([element.string for element in segment.find_all("element") if element.string != None])
+ output_handle.write("{} {} {} {} {}\n".format(who, starttime, endtime, WMER, text))
+ xml_handle.close()
+ output_handle.close()
+
+def main():
+ args = get_args()
+
+ xml_handle = open(args.xml, 'r')
+ output_handle = sys.stdout if args.output == '-' else open(args.output, 'w')
+
+ process_xml(xml_handle, output_handle)
+
+if __name__ == "__main__":
+ main()
diff --git a/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh b/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh
index 559d20046dd..ac5f0cb9009 100755
--- a/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh
+++ b/egs/mgb2_arabic/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vimal Manohar
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/mgb2_arabic/s5/local/score.sh b/egs/mgb2_arabic/s5/local/score.sh
index 08b67050c01..4531c8f3675 100755
--- a/egs/mgb2_arabic/s5/local/score.sh
+++ b/egs/mgb2_arabic/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/mgb2_arabic/s5/local/score_combine.sh b/egs/mgb2_arabic/s5/local/score_combine.sh
index 576962c7442..1c7796e16ea 100755
--- a/egs/mgb2_arabic/s5/local/score_combine.sh
+++ b/egs/mgb2_arabic/s5/local/score_combine.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 Arnab Ghoshal
diff --git a/egs/mgb2_arabic/s5/local/score_mbr.sh b/egs/mgb2_arabic/s5/local/score_mbr.sh
index 4052512f726..a5ca96a67d6 100755
--- a/egs/mgb2_arabic/s5/local/score_mbr.sh
+++ b/egs/mgb2_arabic/s5/local/score_mbr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Script for minimum bayes risk decoding.
diff --git a/egs/mgb2_arabic/s5/local/score_sclite.sh b/egs/mgb2_arabic/s5/local/score_sclite.sh
index 2c8be28a568..de41053a0c9 100755
--- a/egs/mgb2_arabic/s5/local/score_sclite.sh
+++ b/egs/mgb2_arabic/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
diff --git a/egs/mgb2_arabic/s5/run.sh b/egs/mgb2_arabic/s5/run.sh
index 334aef1bf30..e4192c067b0 100755
--- a/egs/mgb2_arabic/s5/run.sh
+++ b/egs/mgb2_arabic/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (C) 2016, Qatar Computing Research Institute, HBKU
# 2017-19 Vimal Manohar
@@ -6,6 +6,9 @@
stage=-1
+# preference on how to process xml file [python, xml]
+process_xml="python"
+
. ./cmd.sh
if [ -f ./path.sh ]; then . ./path.sh; fi
. utils/parse_options.sh
@@ -50,7 +53,7 @@ fi
if [ $stage -le 1 ]; then
#DATA PREPARATION
echo "Preparing training data"
- local/mgb_data_prep.sh DB $mer
+ local/mgb_data_prep.sh DB $mer $process_xml
fi
if [ $stage -le 2 ]; then
diff --git a/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh
index 6300511e817..1cc68f43a33 100644
--- a/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/mgb5/s5/local/nnet3/run_ivector_common.sh b/egs/mgb5/s5/local/nnet3/run_ivector_common.sh
index b909ed04cde..ddec4419a61 100644
--- a/egs/mgb5/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/mgb5/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/mgb5/s5/local/prepare_data.sh b/egs/mgb5/s5/local/prepare_data.sh
index 36cb4d8fa3f..7ab937f7b45 100755
--- a/egs/mgb5/s5/local/prepare_data.sh
+++ b/egs/mgb5/s5/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 QCRI (Author: Ahmed Ali)
# Apache 2.0
diff --git a/egs/mgb5/s5/local/prepare_lm.sh b/egs/mgb5/s5/local/prepare_lm.sh
index 02fb59aba87..5b47360a730 100755
--- a/egs/mgb5/s5/local/prepare_lm.sh
+++ b/egs/mgb5/s5/local/prepare_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 QCRI (Author: Ahmed Ali)
# Apache 2.0
diff --git a/egs/mgb5/s5/local/score.sh b/egs/mgb5/s5/local/score.sh
index 9988c941441..3ddee8e4b12 100755
--- a/egs/mgb5/s5/local/score.sh
+++ b/egs/mgb5/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
# Apache 2.0
diff --git a/egs/mgb5/s5/local/train_lms_srilm.sh b/egs/mgb5/s5/local/train_lms_srilm.sh
index 6af13921511..ed3200eb103 100755
--- a/egs/mgb5/s5/local/train_lms_srilm.sh
+++ b/egs/mgb5/s5/local/train_lms_srilm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 2019 QCRI (Ahmed Ali)
diff --git a/egs/mgb5/s5/run.sh b/egs/mgb5/s5/run.sh
index 6fc21629f0f..27c4f751eae 100755
--- a/egs/mgb5/s5/run.sh
+++ b/egs/mgb5/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 QCRI (Author:Ahmed Ali)
# Apache 2.0
diff --git a/egs/mini_librispeech/s5/RESULTS b/egs/mini_librispeech/s5/RESULTS
index 0b747120416..94e95e97f99 100755
--- a/egs/mini_librispeech/s5/RESULTS
+++ b/egs/mini_librispeech/s5/RESULTS
@@ -20,3 +20,7 @@ exit 0
%WER 18.58 [ 3742 / 20138, 366 ins, 763 del, 2613 sub ] exp/chain/tdnn1a_sp/decode_tgsmall_dev_clean_2/wer_10_0.0
%WER 13.35 [ 2689 / 20138, 318 ins, 491 del, 1880 sub ] exp/chain/tdnn1a_sp/decode_tglarge_dev_clean_2/wer_9_0.5
+
+# Results with chain2 recipe. Results are w/o final model combination
+%WER 21.38 [ 4305 / 20138, 449 ins, 740 del, 3116 sub ] exp/chain2/tdnn1a_sp/decode_tgsmall_dev_clean_2//wer_10_0.0
+%WER 15.64 [ 3150 / 20138, 395 ins, 584 del, 2171 sub ] exp/chain2/tdnn1a_sp/decode_tglarge_dev_clean_2//wer_11_0.0
diff --git a/egs/mini_librispeech/s5/local/chain/compare_wer.sh b/egs/mini_librispeech/s5/local/chain/compare_wer.sh
index 8ee5db2326a..411d2691bb9 100755
--- a/egs/mini_librispeech/s5/local/chain/compare_wer.sh
+++ b/egs/mini_librispeech/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
index c8f2503b578..636de409f2c 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# run_cnn_tdnn_1a.sh is modified from run_tdnn_1h.sh, but adding CNN layers
# near the beginning.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1b.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
index 9be405a5e1a..6bcb4f2e9aa 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1b is as 1a but adding SpecAugment and removing dropout (which, in
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh
index da16297c9dd..20ee39095dd 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is a basic TDNN experiment.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
index 3d0c2d63902..ab0c30f0da6 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is as 1a but increasing epochs from 4 to 10 and adding the option
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
index 081af8fe2f8..4dbff118902 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1c is as 1b but replacing the renorm with batchnorm components
# (i.e. NormalizeComponent with BatchNormComponent).
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c_discriminative.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c_discriminative.sh
index 1aa519ccb9d..f2c24443744 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c_discriminative.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1c_discriminative.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -o pipefail
set -e
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
index 04df38d4da3..b8944be91c3 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1d is as 1c but adding two non-splicing layers towards the beginning
# of the network.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh
index cdf9bb584f4..13aaf0c12c1 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1e is as 1d but instead of the --proportional-shrink option, using
# the newly added xconfig-layer-specific 'l2-regularize' options.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh
index d1385ff2be5..8d2854247c2 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1f.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1f is as 1e but a smaller model with various tuning changes, the most
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh
index ad51780e191..646f0875e61 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1g is as 1f but adding dropout (well, something like dropout-- the mask
# is shared across time and it's continuous rather than zero-one), increasing
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh
index dbfe5c5a07a..07f6e25473a 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1g20.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1g20 is as 1g but adding the option "--constrained false" to --egs.opts.
# This is the new 'unconstrained egs' code where it uses the e2e examples.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh
index cc4123e2755..5097007e56a 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1h is as 1g but a re-tuned model based on resnet-style TDNN-F layers with
# bypass connections. Below, 1h2 and 1h3 are just reruns of 1h with different
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1i.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1i.sh
index 502c225fa87..1577d7dc10a 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1i.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1i.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1i is as 1h but adding SpecAugment.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1j.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1j.sh
index 7a6604f9773..824de7d7341 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1j.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1j.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1j is as 1i but replaces the LDA layer at the input of the
# network with delta and delta-delta features.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1k.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1k.sh
index 652f0175558..b4a9ebe1418 100755
--- a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1k.sh
+++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1k.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1k is like 1j, while it introduces 'apply-cmvn-online' that does
# cmn normalization both for i-extractor and TDNN input.
diff --git a/egs/mini_librispeech/s5/local/chain2/data_prep_common.sh b/egs/mini_librispeech/s5/local/chain2/data_prep_common.sh
new file mode 100755
index 00000000000..21b36cce421
--- /dev/null
+++ b/egs/mini_librispeech/s5/local/chain2/data_prep_common.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# Copyright 2019 Daniel Povey
+# 2019 Srikanth Madikeri (Idiap Research Institute)
+
+set -euo pipefail
+
+# This script is called from local/chain/tuning/run_tdnn_2a.sh and
+# similar scripts. It contains the common feature preparation and
+# lattice-alignment preparation parts of the chaina training.
+# See those scripts for examples of usage.
+
+stage=0
+train_set=train_clean_5
+test_sets="dev_clean_2"
+gmm=tri3b
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
+ if [ ! -f $f ]; then
+ echo "$0: expected file $f to exist"
+ exit 1
+ fi
+done
+
+# Our default data augmentation method is 3-way speed augmentation followed by
+# volume perturbation. We are looking into better ways of doing this,
+# e.g. involving noise and reverberation.
+
+if [ $stage -le 1 ]; then
+ # Although the nnet will be trained by high resolution data, we still have to
+ # perturb the normal data to get the alignment. _sp stands for speed-perturbed
+ echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+ utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+ echo "$0: making MFCC features for low-resolution speed-perturbed data"
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/${train_set}_sp || exit 1;
+ steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1;
+ utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: aligning with the perturbed low-resolution data"
+ steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
+ data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+ # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+ # this shows how you can split across multiple file-systems.
+ echo "$0: creating high-resolution MFCC features"
+ mfccdir=data/${train_set}_sp_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/fs0{1,2}/$USER/kaldi-data/mfcc/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+ fi
+
+ for datadir in ${train_set}_sp ${test_sets}; do
+ utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+ done
+
+ # do volume-perturbation on the training data prior to extracting hires
+ # features; this helps make trained nnets more invariant to test data volume.
+ utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1;
+
+ for datadir in ${train_set}_sp ${test_sets}; do
+ steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+ utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+ done
+fi
+
+
+exit 0
diff --git a/egs/mini_librispeech/s5/local/chain2/run_tdnn.sh b/egs/mini_librispeech/s5/local/chain2/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/mini_librispeech/s5/local/chain2/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/mini_librispeech/s5/local/chain2/tuning/run_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain2/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..2311fc0699e
--- /dev/null
+++ b/egs/mini_librispeech/s5/local/chain2/tuning/run_tdnn_1a.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+
+# Copyright 2019 Srikanth Madikeri (Idiap Research Institute)
+#
+# This script is a modification of local/chain/run_tdnn.sh adapted to the chain2 recipes.
+
+# Set -e here so that we catch if any executable fails immediately
+set -euo pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=10
+train_set=train_clean_5
+test_sets=dev_clean_2
+gmm=tri3b
+srand=0
+nnet3_affix=
+
+# The rest are configs specific to this script. Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=2c # affix for the TDNN directory name
+tree_affix=
+train_stage=-10
+get_egs_stage=-10
+
+
+# training chunk-options
+chunk_width=140
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+xent_regularize=0.1
+bottom_subsampling_factor=1 # I'll set this to 3 later, 1 is for compatibility with a broken ru.
+frame_subsampling_factor=3
+langs="default" # list of language names
+
+# The amount of extra left/right context we put in the egs. Note: this could
+# easily be zero, since we're not using a recurrent topology, but we put in a
+# little extra context so that we have more room to play with the configuration
+# without re-dumping egs.
+egs_extra_left_context=5
+egs_extra_right_context=5
+
+# The number of chunks (of length: see $chunk_width above) that we group
+# together for each "speaker" (actually: pseudo-speaker, since we may have
+# to group multiple speaker together in some cases).
+chunks_per_group=4
+
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+# if ! cuda-compiled; then
+# cat <$lang/topo
+ fi
+fi
+
+if [ $stage -le 11 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom).
+ # use the same num-jobs as the alignments
+ steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \
+ data/lang $gmm_dir $lat_dir
+ rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 12 ]; then
+ # Build a tree using our new topology. We know we have alignments for the
+ # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+ # those. The num-leaves is always somewhat less than the num-leaves from
+ # the GMM baseline.
+ # This will be a two-level tree (with the smaller number of leaves specified
+ # by the '--num-clusters' option); this is needed by the adaptation framework
+ # search below for 'tree.map'
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ steps/nnet3/chain/build_tree.sh \
+ --frame-subsampling-factor ${frame_subsampling_factor} \
+ --context-opts "--context-width=2 --central-position=1" \
+ --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+ $lang $ali_dir $tree_dir
+fi
+
+
+# $dir/configs will contain xconfig and config files for the initial
+# models. It's a scratch space used by this script but not by
+# scripts called from here.
+mkdir -p $dir/configs/
+# $dir/init will contain the initial models
+mkdir -p $dir/init/
+
+learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
+
+if [ $stage -le 14 ]; then
+
+ # Note: we'll use --bottom-subsampling-factor=3, so all time-strides for the
+ # top network should be interpreted at the 30ms frame subsampling rate.
+ num_leaves=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+
+ echo "$0: creating top model"
+ cat < $dir/configs/default.xconfig
+ input name=input dim=40
+ # the first splicing is moved before the lda layer, so no splicing here
+ fixed-affine-layer name=lda input=Append(-2,-1,0,1,2) affine-transform-file=$dir/configs/lda.mat
+ relu-renorm-layer name=tdnn1 dim=512 input=Append(-2,-1,0,1,2)
+ relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1)
+ relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1)
+ relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3)
+ relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3)
+ relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0)
+ relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5
+ output-layer name=output include-log-softmax=false dim=$num_leaves max-change=1.5
+ output-layer name=output-default input=prefinal-chain include-log-softmax=false dim=$num_leaves max-change=1.5
+ relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5
+ output-layer name=output-xent dim=$num_leaves learning-rate-factor=$learning_rate_factor max-change=1.5
+ output-layer name=output-default-xent input=prefinal-xent dim=$num_leaves learning-rate-factor=$learning_rate_factor max-change=1.5
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/default.xconfig --config-dir $dir/configs/
+ if [ $dir/init/default_trans.mdl ]; then # checking this because it may have been copied in a previous run of the same script
+ copy-transition-model $tree_dir/final.mdl $dir/init/default_trans.mdl || exit 1 &
+ else
+ echo "Keeping the old $dir/init/default_trans.mdl as it already exists."
+ fi
+fi
+wait;
+
+init_info=$dir/init/info.txt
+if [ $stage -le 15 ]; then
+
+ if [ ! -f $dir/configs/ref.raw ]; then
+ echo "Expected $dir/configs/ref.raw to exist"
+ exit
+ fi
+
+ nnet3-info $dir/configs/ref.raw > $dir/configs/temp.info
+ model_left_context=`fgrep 'left-context' $dir/configs/temp.info | awk '{print $2}'`
+ model_right_context=`fgrep 'right-context' $dir/configs/temp.info | awk '{print $2}'`
+ cat >$init_info <)
# License: Apache 2.0
diff --git a/egs/mini_librispeech/s5/local/kws/create_hitlist.sh b/egs/mini_librispeech/s5/local/kws/create_hitlist.sh
index be06a3b9312..8e9ac40090d 100755
--- a/egs/mini_librispeech/s5/local/kws/create_hitlist.sh
+++ b/egs/mini_librispeech/s5/local/kws/create_hitlist.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2018 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/mini_librispeech/s5/local/kws/make_L_align.sh b/egs/mini_librispeech/s5/local/kws/make_L_align.sh
index 72a1e9e3f4c..6c59b779fc7 100755
--- a/egs/mini_librispeech/s5/local/kws/make_L_align.sh
+++ b/egs/mini_librispeech/s5/local/kws/make_L_align.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013-2018 Johns Hopkins University (authors: Guoguo Chen, Yenda Trmal)
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/mini_librispeech/s5/local/kws/run_kws.sh b/egs/mini_librispeech/s5/local/kws/run_kws.sh
index 8e7b56f0082..82be01ee14e 100755
--- a/egs/mini_librispeech/s5/local/kws/run_kws.sh
+++ b/egs/mini_librispeech/s5/local/kws/run_kws.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2018, Johns Hopkins University (Yenda Trmal )
# License: Apache 2.0
diff --git a/egs/mini_librispeech/s5/local/kws/score.sh b/egs/mini_librispeech/s5/local/kws/score.sh
index b056e150e83..6fea8adadb0 100755
--- a/egs/mini_librispeech/s5/local/kws/score.sh
+++ b/egs/mini_librispeech/s5/local/kws/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2018 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
diff --git a/egs/mini_librispeech/s5/local/kws/search.sh b/egs/mini_librispeech/s5/local/kws/search.sh
index 1c69b0da556..73696e55403 100755
--- a/egs/mini_librispeech/s5/local/kws/search.sh
+++ b/egs/mini_librispeech/s5/local/kws/search.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2018 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# License: Apache 2.0
diff --git a/egs/mini_librispeech/s5/local/lookahead/run_lookahead.sh b/egs/mini_librispeech/s5/local/lookahead/run_lookahead.sh
new file mode 100755
index 00000000000..7afe9cc67be
--- /dev/null
+++ b/egs/mini_librispeech/s5/local/lookahead/run_lookahead.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+. ./path.sh
+
+# Example script for lookahead composition
+
+lm=tgmed
+am=exp/chain_online_cmn/tdnn1k_sp
+testset=dev_clean_2
+
+# %WER 10.32 [ 2078 / 20138, 201 ins, 275 del, 1602 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_base/wer_10_0.5
+# %WER 10.29 [ 2073 / 20138, 200 ins, 272 del, 1601 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_static/wer_10_0.5
+# %WER 10.25 [ 2064 / 20138, 192 ins, 277 del, 1595 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead/wer_10_0.5
+# %WER 10.24 [ 2063 / 20138, 187 ins, 290 del, 1586 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_arpa/wer_10_0.5
+# %WER 10.29 [ 2072 / 20138, 228 ins, 242 del, 1602 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_arpa_fast/wer_9_0.5
+
+# Speed
+#
+# base 0.29 xRT
+# static 0.31 xRT
+# lookahead 0.77 xRT
+# arpa 1.03 xRT
+# arpa_fast 0.31 xRT
+
+# Graph size
+#
+# Base 461 Mb
+# Static 587 Mb
+# Lookahead 44 Mb HCL + 77 Mb Grammar
+# Lookahead + OpenGrm 44 Mb HCL + 42 Mb Grammar
+
+if [ ! -f "${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so" ]; then
+ echo "Missing ${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so"
+ echo "Make sure you compiled openfst with lookahead support. Run make in ${KALDI_ROOT}/tools after git pull."
+ exit 1
+fi
+if [ ! -f "${KALDI_ROOT}/tools/openfst/bin/ngramread" ]; then
+ echo "You appear to not have OpenGRM tools installed. Missing ${KALDI_ROOT}/tools/openfst/bin/ngramread"
+ echo "cd to $KALDI_ROOT/tools and run extras/install_opengrm.sh."
+ exit 1
+fi
+export LD_LIBRARY_PATH=${KALDI_ROOT}/tools/openfst/lib/fst
+
+# Baseline
+utils/format_lm.sh data/lang data/local/lm/lm_${lm}.arpa.gz \
+ data/local/dict/lexicon.txt data/lang_test_${lm}_base
+
+utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov \
+ data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead_base
+
+steps/nnet3/decode.sh --nj 20 \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+ ${am}/graph_${lm}_lookahead_base data/${testset}_hires ${am}/decode_${testset}_lookahead_base
+
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --remove-oov --compose-graph \
+ data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead
+
+# Decode with statically composed lookahead graph
+steps/nnet3/decode.sh --nj 20 \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+ ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead_static
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+ ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead
+
+# Compile arpa graph
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --compose-graph \
+ data/lang_test_${lm}_base ${am} data/local/lm/lm_tgmed.arpa.gz ${am}/graph_${lm}_lookahead_arpa
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+ ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa
+
+# Decode with runtime composition and tuned beams
+steps/nnet3/decode_lookahead.sh --nj 20 \
+ --beam 12.0 --max-active 3000 \
+ --acwt 1.0 --post-decode-acwt 10.0 \
+ --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+ ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa_fast
diff --git a/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh b/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh
index 095e85cc338..4888de1f159 100755
--- a/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh
+++ b/egs/mini_librispeech/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh b/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh
index f44b0cb0284..89d2a9f6e57 100755
--- a/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
index c2f90df4b5c..817f5312a40 100755
--- a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is a basic TDNN+LSTM nnet3 experiment.
diff --git a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh
index 2b3c2844972..49618686842 100755
--- a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh
+++ b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is like 1a, but adding dropout. It's definitely helpful,
# and you can see in the objf values that the train-test difference
diff --git a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh
index 5118cb0f8bd..9f5c1cd5e03 100755
--- a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh
+++ b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1c is like 1b, but changing renorm to batchnorm and adding l2 regularization.
diff --git a/egs/mini_librispeech/s5/local/score.sh b/egs/mini_librispeech/s5/local/score.sh
index c812199fc98..cb5bbb7277b 100755
--- a/egs/mini_librispeech/s5/local/score.sh
+++ b/egs/mini_librispeech/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# 2014 Guoguo Chen
# Apache 2.0
diff --git a/egs/mini_librispeech/s5/local/subset_dataset.sh b/egs/mini_librispeech/s5/local/subset_dataset.sh
index 050128247a4..f8936b64c97 100755
--- a/egs/mini_librispeech/s5/local/subset_dataset.sh
+++ b/egs/mini_librispeech/s5/local/subset_dataset.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Luminar Technologies, Inc. (author: Daniel Galvez)
# Apache 2.0
diff --git a/egs/mini_librispeech/s5/run.sh b/egs/mini_librispeech/s5/run.sh
index 2a13668e2c2..257d88d4139 100755
--- a/egs/mini_librispeech/s5/run.sh
+++ b/egs/mini_librispeech/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Change this location to somewhere where you want to put the data.
data=./corpus/
@@ -196,7 +196,7 @@ fi
# Train a chain model
if [ $stage -le 9 ]; then
- local/chain/run_tdnn.sh
+ local/chain2/run_tdnn.sh
fi
# local/grammar/simple_demo.sh
diff --git a/egs/mobvoi/README.txt b/egs/mobvoi/README.txt
new file mode 100644
index 00000000000..a3400dd7f65
--- /dev/null
+++ b/egs/mobvoi/README.txt
@@ -0,0 +1,15 @@
+
+ The Mobvoi dataset is a ~67-hour corpus of wake word corpus
+ in Chinese covering 523 speakers. It is currently not publicly available.
+ The wake word is "Hi Xiaowen" (in Pinyin).
+ Each speaker’s collection includes positive utterances and negative utterances
+ recorded with different speaker-to-microphone distance and different
+ signal-to-noise (SNR) ratio where noises are from typical home environments.
+ The dataset is provided by Mobvoi. Inc.
+
+ The recipe is in v1/
+
+ The E2E LF-MMI recipe does not require any prior alignments for training
+ LF-MMI, making the alignment more flexible during training. It can be optionally
+ followed by a regular LF-MMI training to further improve the performance.
+
diff --git a/egs/mobvoi/v1/cmd.sh b/egs/mobvoi/v1/cmd.sh
new file mode 100644
index 00000000000..fc5d4aa9e1c
--- /dev/null
+++ b/egs/mobvoi/v1/cmd.sh
@@ -0,0 +1,24 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine). queue.pl works with GridEngine (qsub). slurm.pl works
+# with slurm. Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration. Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl"
+export decode_cmd="queue.pl --mem 4G"
+# the use of cuda_cmd is deprecated, used only in 'nnet1',
+export cuda_cmd="queue.pl --gpu 1"
+
+if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then
+ queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+ export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+ export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+ export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
+
diff --git a/egs/mobvoi/v1/conf/mfcc.conf b/egs/mobvoi/v1/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/mobvoi/v1/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false # only non-default option.
diff --git a/egs/mobvoi/v1/conf/mfcc_hires.conf b/egs/mobvoi/v1/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..d96b86ddfcb
--- /dev/null
+++ b/egs/mobvoi/v1/conf/mfcc_hires.conf
@@ -0,0 +1,9 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false # use average of log energy, not energy.
+--num-mel-bins=40 # similar to Google's setup.
+--num-ceps=40 # there is no dimensionality reduction.
+--low-freq=20 # low cutoff frequency for mel bins
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
diff --git a/egs/mobvoi/v1/conf/online_cmvn.conf b/egs/mobvoi/v1/conf/online_cmvn.conf
new file mode 100644
index 00000000000..a173510e433
--- /dev/null
+++ b/egs/mobvoi/v1/conf/online_cmvn.conf
@@ -0,0 +1,3 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
+--norm-means=true
+--norm-vars=false
diff --git a/egs/mobvoi/v1/local/add_prefix_to_scp.py b/egs/mobvoi/v1/local/add_prefix_to_scp.py
new file mode 120000
index 00000000000..b6750c78e16
--- /dev/null
+++ b/egs/mobvoi/v1/local/add_prefix_to_scp.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/add_prefix_to_scp.py
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/chain/build_tree.sh b/egs/mobvoi/v1/local/chain/build_tree.sh
new file mode 100755
index 00000000000..452d844401d
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/build_tree.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey).
+# 2019 Yiming Wang
+# Apache 2.0.
+
+
+# This script is modified from steps/nnet3/chain/build_tree.sh, but only contains
+# trivial mono phone tree building without any states tying.
+
+
+# Begin configuration section.
+cmd=run.pl
+frame_subsampling_factor=1
+# End configuration section.
+
+echo "$0 $@" # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+ echo "Usage: $0 "
+ echo " e.g.: $0 --frame-subsampling-factor 3 \\"
+ echo " data/train data/lang_chain exp/mono_ali_train_sp exp/chain/tree"
+ echo "Main options (for others, see top of script file)"
+ echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
+ echo " --frame-subsampling-factor # Factor (e.g. 3) controlling frame subsampling"
+ echo " # at the neural net output, so the frame rate at"
+ echo " # the output is less than at the input."
+ exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+for f in $data/feats.scp $lang/phones.txt $alidir/final.mdl $alidir/ali.1.gz; do
+ [ ! -f $f ] && echo "train_sat.sh: no such file $f" && exit 1;
+done
+
+oov=`cat $lang/oov.int`
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null`
+delta_opts=`cat $alidir/delta_opts 2>/dev/null`
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+cp $alidir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option.
+cp $alidir/delta_opts $dir 2>/dev/null # delta option.
+cp $alidir/ali.1.gz $dir 2>/dev/null # to pass the file checking later during training
+
+utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1;
+cp $lang/phones.txt $dir || exit 1;
+
+echo $nj >$dir/num_jobs
+if [ -f $alidir/per_utt ]; then
+ sdata=$data/split${nj}utt
+ utils/split_data.sh --per-utt $data $nj
+else
+ sdata=$data/split$nj
+ utils/split_data.sh $data $nj
+fi
+
+# Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+## Set up speaker-independent features.
+case $feat_type in
+ delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |";;
+ lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+ cp $alidir/final.mat $dir
+ cp $alidir/full.mat $dir 2>/dev/null
+ ;;
+ *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+# Add fMLLR transforms if available
+if [ -f $alidir/trans.1 ]; then
+ echo "$0: Using transforms from $alidir"
+ feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+
+# Do subsampling of feats, if needed
+if [ $frame_subsampling_factor -gt 1 ]; then
+ feats="$feats subsample-feats --n=$frame_subsampling_factor ark:- ark:- |"
+fi
+
+echo "$0: Initializing monophone model (for alignment conversion, in case topology changed)"
+
+[ ! -f $lang/phones/sets.int ] && exit 1;
+shared_phones_opt="--shared-phones=$lang/phones/sets.int"
+# get feature dimension
+example_feats="`echo $feats | sed s/JOB/1/g`";
+if ! feat_dim=$(feat-to-dim "$example_feats" - 2>/dev/null) || [ -z $feat_dim ]; then
+ feat-to-dim "$example_feats" - # to see the error message.
+ echo "error getting feature dimension"
+ exit 1;
+fi
+$cmd JOB=1 $dir/log/init_mono.log \
+ gmm-init-mono $shared_phones_opt "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo $feat_dim \
+ $dir/mono.mdl $dir/mono.tree || exit 1;
+
+cp $dir/mono.mdl $dir/final.mdl
+cp $dir/mono.tree $dir/tree
+
+echo $0: Done building tree
diff --git a/egs/mobvoi/v1/local/chain/run_e2e_tdnn.sh b/egs/mobvoi/v1/local/chain/run_e2e_tdnn.sh
new file mode 120000
index 00000000000..891eec02423
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/run_e2e_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/chain/run_tdnn.sh b/egs/mobvoi/v1/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/chain/run_tdnn_e2eali.sh b/egs/mobvoi/v1/local/chain/run_tdnn_e2eali.sh
new file mode 120000
index 00000000000..38f0bd07e6c
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/run_tdnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_e2eali_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/chain/tuning/run_e2e_tdnn_1a.sh b/egs/mobvoi/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
new file mode 100755
index 00000000000..99ce93ff28e
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
@@ -0,0 +1,239 @@
+#!/bin/bash
+# Copyright 2018-2020 Daniel Povey
+# 2018-2020 Yiming Wang
+
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+minibatch_size=150=128,64/300=100,64,32/600=50,32,16/1200=16,8
+common_egs_dir=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_combined_spe2e
+test_sets="dev eval"
+export LC_ALL=en_US.UTF-8
+wake_word="嗨小问"
+export LC_ALL=C
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat <$lang/topo
+ fi
+fi
+
+if [ $stage -le 1 ]; then
+ echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+ mkdir -p $tree_dir
+ id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+ id_word=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+ id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+ cat < $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+0 5 $id_sil $id_sil
+1 2 $id_word $id_word
+2 3 $id_sil $id_sil
+1 4 $id_freetext $id_freetext
+4 5 $id_sil $id_sil
+3 1.9
+5 0.7
+EOF
+ fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+ fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+ mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+ steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$train_cmd" \
+ data/${train_set}_hires $lang $tree_dir
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: creating neural net configs using the xconfig parser";
+ num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+ learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+ affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+ tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+ linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+ prefinal_opts="l2-regularize=0.01"
+ output_opts="l2-regularize=0.002"
+
+ mkdir -p $dir/configs
+ cat < $dir/configs/network.xconfig
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+
+ relu-batchnorm-dropout-layer name=tdnn1 input=Append(-2,-1,0,1,2) $affine_opts dim=$dim
+ tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+ tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ linear-component name=prefinal-l dim=30 $linear_opts
+
+ prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+ prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+ # no need to store the egs in a shared storage because we always
+ # remove them. Anyway, it takes only 5 minutes to generate them.
+
+ steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+ --cmd "$decode_cmd" \
+ --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+ --chain.xent-regularize $xent_regularize \
+ --chain.leaky-hmm-coefficient=0.1 \
+ --chain.l2-regularize=0.0 \
+ --chain.apply-deriv-weights=false \
+ --chain.frame-subsampling-factor=3 \
+ --trainer.add-option="--optimization.memory-compression-level=2" \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs=$num_epochs \
+ --trainer.frames-per-iter $frames_per_iter \
+ --trainer.optimization.num-jobs-initial $num_jobs_initial \
+ --trainer.optimization.num-jobs-final $num_jobs_final \
+ --trainer.optimization.initial-effective-lrate 0.00003 \
+ --trainer.optimization.final-effective-lrate 0.000003 \
+ --trainer.optimization.backstitch-training-scale $bs_scale \
+ --trainer.num-chunk-per-minibatch $minibatch_size \
+ --trainer.optimization.momentum=0.0 \
+ --egs.dir "$common_egs_dir" \
+ --egs.opts "--num-utts-subset 300 --online-cmvn $online_cmvn" \
+ --cleanup.remove-egs=$remove_egs \
+ --feat-dir data/${train_set}_hires \
+ --tree-dir $tree_dir \
+ --dir=$dir || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+ steps/online/nnet3/prepare_online_decoding.sh \
+ --mfcc-config conf/mfcc_hires.conf \
+ --online-cmvn-config conf/online_cmvn.conf \
+ $lang ${dir} ${dir}_online
+
+ rm $dir/.error 2>/dev/null || true
+ for wake_word_cost in 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5; do
+ rm -rf $lang_decode
+ utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+ --position-dependent-phones false \
+ data/local/dict "" $lang_decode/temp $lang_decode
+
+ sil_id=`cat $lang_decode/words.txt | grep "" | awk '{print $2}'`
+ freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+ id=`cat $lang_decode/words.txt | grep $wake_word | awk '{print $2}'`
+ mkdir -p $lang_decode/lm
+ cat < $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.7
+4 0 $sil_id $sil_id
+1 2 $id $id $wake_word_cost
+2 0 $sil_id $sil_id
+0
+EOF
+ fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+ set +e
+ fstisstochastic $lang_decode/G.fst
+ set -e
+ utils/validate_lang.pl $lang_decode
+ cp $lang/topo $lang_decode/topo
+
+ utils/lang/check_phones_compatible.sh \
+ data/lang/phones.txt $lang_decode/phones.txt
+ rm -rf $tree_dir/graph_online/HCLG.fst
+ utils/mkgraph.sh \
+ --self-loop-scale 1.0 $lang_decode \
+ $dir $tree_dir/graph_online || exit 1;
+
+ frames_per_chunk=150
+ for data in $test_sets; do
+ (
+ nj=30
+ steps/online/nnet3/decode_wake_word.sh \
+ --beam 200 --acwt 1.0 \
+ --wake-word $wake_word \
+ --extra-left-context-initial 0 \
+ --frames-per-chunk $frames_per_chunk \
+ --nj $nj --cmd "$decode_cmd" \
+ $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_cost$wake_word_cost || exit 1
+ ) || touch $dir/.error &
+ done
+ wait
+ [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+ done
+ for data in $test_sets; do
+ echo "Results on $data set:"
+ cat ${dir}_online/decode_${data}_cost*/scoring_kaldi/all_results
+ done
+fi
diff --git a/egs/mobvoi/v1/local/chain/tuning/run_tdnn_1a.sh b/egs/mobvoi/v1/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..0b417f6541e
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,292 @@
+#!/bin/bash
+#
+# Copyright 2018-2020 Daniel Povey
+# 2018-2020 Yiming Wang
+# Apache 2.0
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=30
+gmm=mono
+train_stage=-5 # starting from -5 to skip phone-lm estimation
+get_egs_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+chunk_width=140,100,160
+common_egs_dir=
+reporting_email=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter
+combined_train_set=train_shorter_sp_combined
+test_sets="dev eval"
+aug_prefix="rev1 noise music babble"
+export LC_ALL=en_US.UTF-8
+wake_word="嗨小问"
+export LC_ALL=C
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat <$lang/topo
+ fi
+fi
+
+if [ $stage -le 2 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom)
+ # use the same num-jobs as the alignments
+ steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \
+ data/lang $gmm_dir $lat_dir
+ rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 3 ]; then
+ local/copy_lat_dir.sh --nj 75 --cmd "$train_cmd" --utt-prefixes "$aug_prefix" \
+ $combined_train_data_dir $lat_dir $combined_lat_dir
+fi
+
+if [ $stage -le 4 ]; then
+ # Build a tree using our new topology. We know we have alignments from
+ # steps/align_fmllr.sh, so use those.
+ # The num-leaves is always somewhat less than the num-leaves from the GMM baseline.
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ local/chain/build_tree.sh \
+ --frame-subsampling-factor 3 \
+ --cmd "$train_cmd" ${lores_train_data_dir} \
+ $lang $ali_dir $tree_dir
+
+ echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+ id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+ id_word=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+ id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+ cat < $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+1 2 $id_word $id_word
+2 3 $id_sil $id_sil
+1 4 $id_freetext $id_freetext
+4 5 $id_sil $id_sil
+3 1.9
+5 0.7
+EOF
+ fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+ fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+ mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+fi
+
+if [ $stage -le 5 ]; then
+ echo "$0: creating neural net configs using the xconfig parser";
+ num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+ learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+ affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+ tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+ linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+ prefinal_opts="l2-regularize=0.01"
+ output_opts="l2-regularize=0.002"
+
+ mkdir -p $dir/configs
+ cat < $dir/configs/network.xconfig
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+ relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=$dim
+ tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+ tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ linear-component name=prefinal-l dim=30 $linear_opts
+
+ prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+ prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 6 ]; then
+ # no need to store the egs in a shared storage because we always
+ # remove them. Anyway, it takes only 5 minutes to generate them.
+
+ cp $tree_dir/phone_lm.fst $dir/phone_lm.fst
+
+ steps/nnet3/chain/train.py --stage=$train_stage \
+ --cmd="$decode_cmd" \
+ --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+ --chain.xent-regularize $xent_regularize \
+ --chain.leaky-hmm-coefficient=0.1 \
+ --chain.l2-regularize=0.0 \
+ --chain.apply-deriv-weights=false \
+ --trainer.add-option="--optimization.memory-compression-level=2" \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs=$num_epochs \
+ --trainer.frames-per-iter $frames_per_iter \
+ --trainer.optimization.num-jobs-initial $num_jobs_initial \
+ --trainer.optimization.num-jobs-final $num_jobs_final \
+ --trainer.optimization.initial-effective-lrate 0.00005 \
+ --trainer.optimization.final-effective-lrate 0.000005 \
+ --trainer.optimization.backstitch-training-scale $bs_scale \
+ --trainer.num-chunk-per-minibatch=128,64 \
+ --trainer.optimization.momentum=0.0 \
+ --egs.chunk-width=$chunk_width \
+ --egs.chunk-left-context=0 \
+ --egs.chunk-right-context=0 \
+ --egs.chunk-left-context-initial=0 \
+ --egs.chunk-right-context-final=0 \
+ --egs.dir="$common_egs_dir" \
+ --egs.opts="--frames-overlap-per-eg 0 --online-cmvn $online_cmvn" \
+ --cleanup.remove-egs=$remove_egs \
+ --use-gpu=true \
+ --reporting.email="$reporting_email" \
+ --feat-dir $combined_train_data_dir \
+ --tree-dir $tree_dir \
+ --lat-dir=$combined_lat_dir \
+ --dir=$dir || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+ steps/online/nnet3/prepare_online_decoding.sh \
+ --mfcc-config conf/mfcc_hires.conf \
+ --online-cmvn-config conf/online_cmvn.conf \
+ $lang ${dir} ${dir}_online
+
+ rm $dir/.error 2>/dev/null || true
+ for wake_word_cost in -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0; do
+ rm -rf $lang_decode
+ utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+ --position-dependent-phones false \
+ data/local/dict "" $lang_decode/temp $lang_decode
+
+ sil_id=`cat $lang_decode/words.txt | grep "" | awk '{print $2}'`
+ freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+ id=`cat $lang_decode/words.txt | grep $wake_word | awk '{print $2}'`
+ mkdir -p $lang_decode/lm
+ cat < $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.7
+4 0 $sil_id $sil_id
+1 2 $id $id $wake_word_cost
+2 0 $sil_id $sil_id
+0
+EOF
+ fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+ set +e
+ fstisstochastic $lang_decode/G.fst
+ set -e
+ utils/validate_lang.pl $lang_decode
+ cp $lang/topo $lang_decode/topo
+
+ utils/lang/check_phones_compatible.sh \
+ data/lang/phones.txt $lang_decode/phones.txt
+ rm -rf $tree_dir/graph_online/HCLG.fst
+ utils/mkgraph.sh \
+ --self-loop-scale 1.0 $lang_decode \
+ $dir $tree_dir/graph_online || exit 1;
+
+ frames_per_chunk=150
+ for data in $test_sets; do
+ (
+ nj=30
+ steps/online/nnet3/decode_wake_word.sh \
+ --beam 200 --acwt 1.0 \
+ --wake-word $wake_word \
+ --extra-left-context-initial 0 \
+ --frames-per-chunk $frames_per_chunk \
+ --nj $nj --cmd "$decode_cmd" \
+ $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_cost$wake_word_cost || exit 1
+ ) || touch $dir/.error &
+ done
+ wait
+ [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+ done
+ for data in $test_sets; do
+ echo "Results on $data set:"
+ cat ${dir}_online/decode_${data}_cost*/scoring_kaldi/all_results
+ done
+fi
diff --git a/egs/mobvoi/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh b/egs/mobvoi/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
new file mode 100755
index 00000000000..eb806b76850
--- /dev/null
+++ b/egs/mobvoi/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
@@ -0,0 +1,282 @@
+#!/bin/bash
+#
+# Copyright 2019-2020 Daniel Povey
+# 2019-2020 Yiming Wang
+# Apache 2.0
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=30
+e2echain_model_dir=exp/chain/e2e_tdnn_1a
+train_stage=-5 # starting from -5 to skip phone-lm estimation
+get_egs_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+chunk_width=140,100,160
+common_egs_dir=
+reporting_email=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_sp_combined
+test_sets="dev eval"
+export LC_ALL=en_US.UTF-8
+wake_word="嗨小问"
+export LC_ALL=C
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat <$lang/topo
+ fi
+fi
+
+if [ $stage -le 2 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom)
+ # use the same num-jobs as the alignments
+ steps/nnet3/align_lats.sh --nj 75 --cmd "$train_cmd" \
+ --acoustic-scale 1.0 \
+ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+ $train_data_dir data/lang $e2echain_model_dir $lat_dir
+ echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+ # Build a tree using our new topology. We know we have alignments from
+ # steps/align_fmllr.sh, so use those.
+ # The num-leaves is always somewhat less than the num-leaves from the GMM baseline.
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ local/chain/build_tree.sh \
+ --frame-subsampling-factor 3 --cmd "$train_cmd" \
+ $train_data_dir $lang $ali_dir $tree_dir
+
+ echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+ id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+ id_word=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+ id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+ cat < $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+1 2 $id_word $id_word
+2 3 $id_sil $id_sil
+1 4 $id_freetext $id_freetext
+4 5 $id_sil $id_sil
+3 1.9
+5 0.7
+EOF
+ fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+ fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+ mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+fi
+
+if [ $stage -le 4 ]; then
+ echo "$0: creating neural net configs using the xconfig parser";
+ num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+ learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+ affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+ tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+ linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+ prefinal_opts="l2-regularize=0.01"
+ output_opts="l2-regularize=0.002"
+
+ mkdir -p $dir/configs
+ cat < $dir/configs/network.xconfig
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+ relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=$dim
+ tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+ tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ linear-component name=prefinal-l dim=30 $linear_opts
+
+ prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+ prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 6 ]; then
+ # no need to store the egs in a shared storage because we always
+ # remove them. Anyway, it takes only 5 minutes to generate them.
+
+ cp $tree_dir/phone_lm.fst $dir/phone_lm.fst
+
+ steps/nnet3/chain/train.py --stage=$train_stage \
+ --cmd="$decode_cmd" \
+ --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+ --chain.xent-regularize $xent_regularize \
+ --chain.leaky-hmm-coefficient=0.1 \
+ --chain.l2-regularize=0.0 \
+ --chain.apply-deriv-weights=false \
+ --chain.alignment-subsampling-factor=1 \
+ --trainer.add-option="--optimization.memory-compression-level=2" \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs=$num_epochs \
+ --trainer.frames-per-iter $frames_per_iter \
+ --trainer.optimization.num-jobs-initial $num_jobs_initial \
+ --trainer.optimization.num-jobs-final $num_jobs_final \
+ --trainer.optimization.initial-effective-lrate 0.00005 \
+ --trainer.optimization.final-effective-lrate 0.000005 \
+ --trainer.optimization.backstitch-training-scale $bs_scale \
+ --trainer.num-chunk-per-minibatch=128,64 \
+ --trainer.optimization.momentum=0.0 \
+ --egs.chunk-width=$chunk_width \
+ --egs.chunk-left-context=0 \
+ --egs.chunk-right-context=0 \
+ --egs.chunk-left-context-initial=0 \
+ --egs.chunk-right-context-final=0 \
+ --egs.dir="$common_egs_dir" \
+ --egs.opts="--frames-overlap-per-eg 0 --online-cmvn $online_cmvn" \
+ --cleanup.remove-egs=$remove_egs \
+ --use-gpu=true \
+ --reporting.email="$reporting_email" \
+ --feat-dir $train_data_dir \
+ --tree-dir $tree_dir \
+ --lat-dir=$lat_dir \
+ --dir=$dir || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+ steps/online/nnet3/prepare_online_decoding.sh \
+ --mfcc-config conf/mfcc_hires.conf \
+ --online-cmvn-config conf/online_cmvn.conf \
+ $lang ${dir} ${dir}_online
+
+ rm $dir/.error 2>/dev/null || true
+ for wake_word_cost in 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0; do
+ rm -rf $lang_decode
+ utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+ --position-dependent-phones false \
+ data/local/dict "" $lang_decode/temp $lang_decode
+
+ sil_id=`cat $lang_decode/words.txt | grep "" | awk '{print $2}'`
+ freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+ id=`cat $lang_decode/words.txt | grep $wake_word | awk '{print $2}'`
+ mkdir -p $lang_decode/lm
+ cat < $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.7
+4 0 $sil_id $sil_id
+1 2 $id $id $wake_word_cost
+2 0 $sil_id $sil_id
+0
+EOF
+ fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+ set +e
+ fstisstochastic $lang_decode/G.fst
+ set -e
+ utils/validate_lang.pl $lang_decode
+ cp $lang/topo $lang_decode/topo
+
+ utils/lang/check_phones_compatible.sh \
+ data/lang/phones.txt $lang_decode/phones.txt
+ rm -rf $tree_dir/graph_online/HCLG.fst
+ utils/mkgraph.sh \
+ --self-loop-scale 1.0 $lang_decode \
+ $dir $tree_dir/graph_online || exit 1;
+
+ frames_per_chunk=150
+ for data in $test_sets; do
+ (
+ nj=30
+ steps/online/nnet3/decode_wake_word.sh \
+ --beam 200 --acwt 1.0 \
+ --wake-word $wake_word \
+ --extra-left-context-initial 0 \
+ --frames-per-chunk $frames_per_chunk \
+ --nj $nj --cmd "$decode_cmd" \
+ $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_cost$wake_word_cost || exit 1
+ ) || touch $dir/.error &
+ done
+ wait
+ [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+ done
+ for data in $test_sets; do
+ echo "Results on $data set:"
+ cat ${dir}_online/decode_${data}_cost*/scoring_kaldi/all_results
+ done
+fi
diff --git a/egs/mobvoi/v1/local/compute_metrics.py b/egs/mobvoi/v1/local/compute_metrics.py
new file mode 120000
index 00000000000..695a2ca5f6d
--- /dev/null
+++ b/egs/mobvoi/v1/local/compute_metrics.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/compute_metrics.py
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/copy_lat_dir.sh b/egs/mobvoi/v1/local/copy_lat_dir.sh
new file mode 120000
index 00000000000..6be684730ad
--- /dev/null
+++ b/egs/mobvoi/v1/local/copy_lat_dir.sh
@@ -0,0 +1 @@
+../../../../scripts/wakeword/copy_lat_dir.sh
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/gen_topo.pl b/egs/mobvoi/v1/local/gen_topo.pl
new file mode 120000
index 00000000000..fd5959cebaf
--- /dev/null
+++ b/egs/mobvoi/v1/local/gen_topo.pl
@@ -0,0 +1 @@
+../../../../scripts/wakeword/gen_topo.pl
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/get_random_subsegments.py b/egs/mobvoi/v1/local/get_random_subsegments.py
new file mode 120000
index 00000000000..24631471ff6
--- /dev/null
+++ b/egs/mobvoi/v1/local/get_random_subsegments.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/get_random_subsegments.py
\ No newline at end of file
diff --git a/egs/mobvoi/v1/local/mobvoi_data_download.sh b/egs/mobvoi/v1/local/mobvoi_data_download.sh
new file mode 100755
index 00000000000..f0e7d961be2
--- /dev/null
+++ b/egs/mobvoi/v1/local/mobvoi_data_download.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright 2018-2020 Yiming Wang
+# 2018-2020 Daniel Povey
+# Apache 2.0
+
+# This script loads the Mobvoi dataset.
+[ -f ./path.sh ] && . ./path.sh
+
+dl_dir=data/download
+
+mkdir -p $dl_dir
+
+src_path=/export/fs04/a11/hlyu/wakeup_word_corpra/mobvoi
+
+dataset=ticmini2_dataset_20180607.zip
+if [ -d $dl_dir/$(basename "$dataset" .zip) ]; then
+ echo "Not extracting $(basename "$dataset" .zip) as it is already there."
+else
+ if [ ! -f $dl_dir/$dataset ]; then
+ echo "Downloading $dataset..."
+ cat $src_path/ticmini2_dataset_20180607.z01 $src_path/$dataset > $dl_dir/$dataset
+ fi
+ unzip $dl_dir/$dataset -d $dl_dir
+ rm -f $dl_dir/$dataset 2>/dev/null || true
+ echo "Done extracting $dataset."
+fi
+
+dataset=ticmini2_for_school_20180911.tar.gz
+if [ -d $dl_dir/$(basename "$dataset" .tar.gz) ]; then
+ echo "Not extracting $(basename "$dataset" .tar.gz) as it is already there."
+else
+ echo "Extracting $dataset..."
+ tar -xvzf $src_path/$dataset -C $dl_dir || exit 1;
+ echo "Done extracting $dataset."
+fi
+
+dataset=ticmini2_hixiaowen_adult_20180731.7z
+if [ -d $dl_dir/$(basename "$dataset" .7z) ]; then
+ echo "Not extracting $(basename "$dataset" .7z) as it is already there."
+else
+ echo "Extracting $dataset..."
+ ~/p7zip_16.02/bin/7z x $src_path/$dataset -o$dl_dir|| exit 1;
+ echo "Done extracting $dataset."
+fi
+
+for dataset in train dev eval; do
+ cp $src_path/${dataset}_list $dl_dir/${dataset}_list
+done
+
+exit 0
diff --git a/egs/mobvoi/v1/local/prepare_dict.sh b/egs/mobvoi/v1/local/prepare_dict.sh
new file mode 100755
index 00000000000..96946c83cd7
--- /dev/null
+++ b/egs/mobvoi/v1/local/prepare_dict.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+
+set -e
+dir=data/local/dict
+
+. ./utils/parse_options.sh
+
+mkdir -p $dir
+
+# First get the set of all letters that occur in data/train/text
+echo "hixiaowen" > $dir/nonsilence_phones.txt
+echo "freetext" >> $dir/nonsilence_phones.txt
+
+echo "嗨小问 hixiaowen" > $dir/lexicon.txt
+echo "FREETEXT freetext" >> $dir/lexicon.txt
+echo " SIL" >> $dir/lexicon.txt
+
+echo SIL > $dir/silence_phones.txt
+
+echo SIL >$dir/optional_silence.txt
+
+echo -n "" >$dir/extra_questions.txt
diff --git a/egs/mobvoi/v1/local/prepare_wav.py b/egs/mobvoi/v1/local/prepare_wav.py
new file mode 100755
index 00000000000..5e42f64ba9f
--- /dev/null
+++ b/egs/mobvoi/v1/local/prepare_wav.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2020 Yiming Wang
+# 2018-2020 Daniel Povey
+# Apache 2.0
+
+""" This script prepares the Mobvoi data into kaldi format.
+"""
+
+
+import argparse
+import os
+import sys
+import glob
+
+def main():
+ parser = argparse.ArgumentParser(description="""Generates {train|dev|eval}_wav.scp files.""")
+ parser.add_argument('dir', type=str,
+ default='data',
+ help='path to the directory containing downloaded dataset')
+ args = parser.parse_args()
+
+ assert os.path.isdir(args.dir)
+ with open(os.path.join(args.dir, "train", "text"), 'r', encoding='utf-8') as f:
+ lines = f.readlines()
+ train_set = set([os.path.splitext(os.path.split(line.strip().split()[0])[1])[0] for line in lines])
+ assert len(train_set) > 0
+ with open(os.path.join(args.dir, "dev", "text"), 'r', encoding='utf-8') as f:
+ lines = f.readlines()
+ dev_set = set([os.path.splitext(os.path.split(line.strip().split()[0])[1])[0] for line in lines])
+ assert len(dev_set) > 0
+ with open(os.path.join(args.dir, "eval", "text"), 'r', encoding='utf-8') as f:
+ lines = f.readlines()
+ eval_set = set([os.path.splitext(os.path.split(line.strip().split()[0])[1])[0] for line in lines])
+ assert len(eval_set) > 0
+ assert len(train_set.intersection(dev_set)) == 0
+ assert len(train_set.intersection(eval_set)) == 0
+ assert len(dev_set.intersection(eval_set)) == 0
+
+ train_wav_scp = open(os.path.join(args.dir, "train", "wav.scp"), 'w', encoding='utf-8')
+ dev_wav_scp = open(os.path.join(args.dir, "dev", "wav.scp"), 'w', encoding='utf-8')
+ eval_wav_scp = open(os.path.join(args.dir, "eval", "wav.scp"), 'w', encoding='utf-8')
+
+ # Look through all the subfolders to find audio samples
+ wav_files = {}
+ search_path = os.path.join(args.dir, '**', '*.wav')
+ for wav_path in glob.glob(search_path, recursive=True):
+ _, basename = os.path.split(wav_path)
+ utt_id = os.path.splitext(basename)[0]
+ extended_wav_path = "sox " + os.path.abspath(wav_path) + " -t wav - |"
+ if not utt_id in wav_files:
+ wav_files[utt_id] = extended_wav_path
+ for utt_id in train_set:
+ train_wav_scp.write(utt_id + " " + wav_files[utt_id] + "\n")
+ for utt_id in dev_set:
+ dev_wav_scp.write(utt_id + " " + wav_files[utt_id] + "\n")
+ for utt_id in eval_set:
+ eval_wav_scp.write(utt_id + " " + wav_files[utt_id] + "\n")
+
+ train_wav_scp.close()
+ dev_wav_scp.close()
+ eval_wav_scp.close()
+
+if __name__ == "__main__":
+ main()
diff --git a/egs/mobvoi/v1/local/score_online.sh b/egs/mobvoi/v1/local/score_online.sh
new file mode 100755
index 00000000000..c3a7c60eb46
--- /dev/null
+++ b/egs/mobvoi/v1/local/score_online.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright 2018-2019 Daniel Povey
+# 2018-2020 Yiming Wang
+# Apache 2.0
+
+[ -f ./path.sh ] && . ./path.sh
+
+# begin configuration section.
+wake_word="嗨小问"
+#end configuration section.
+
+echo "$0 $@" # Print the command line for logging
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+ echo "Usage: $0 "
+ echo " Options:"
+ exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $data/text; do
+ [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
+done
+
+
+utils/data/get_utt2dur.sh $data
+rm $data/utt2dur_negative 2>/dev/null || true
+utils/filter_scp.pl <(grep -v $wake_word $data/text) $data/utt2dur > $data/utt2dur_negative && dur=`awk '{a+=$2} END{print a}' $data/utt2dur_negative`
+echo "total duration (in seconds) of negative examples in $data: $dur"
+
+ref_filtering_cmd="cat"
+[ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter"
+[ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter"
+hyp_filtering_cmd="cat"
+[ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter"
+[ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter"
+
+
+mkdir -p $dir/scoring_kaldi
+cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1;
+cat $dir/trans.txt | utils/int2sym.pl -f 2- $symtab | $hyp_filtering_cmd > $dir/scoring_kaldi/hyp_filt.txt || exit 1;
+export LC_ALL=en_US.UTF-8
+cat $dir/scoring_kaldi/hyp_filt.txt | \
+local/compute_metrics.py $dir/scoring_kaldi/test_filt.txt - --wake-word $wake_word \
+ --duration $dur > $dir/scoring_kaldi/all_results
+export LC_ALL=C
+
+exit 0;
diff --git a/egs/mobvoi/v1/local/split_datasets.sh b/egs/mobvoi/v1/local/split_datasets.sh
new file mode 100755
index 00000000000..a403523622c
--- /dev/null
+++ b/egs/mobvoi/v1/local/split_datasets.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright 2018-2020 Yiming Wang
+# 2018-2020 Daniel Povey
+# Apache 2.0
+
+stage=0
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -eu
+
+
+if [ $stage -le 1 ]; then
+ dir=data/download/ticmini2_dataset_20180607
+ trans=$dir/hixiaowen.txt
+ paste -d ' ' <(cat $trans | awk '{split($1,a,"."); print a[1]}') <(cat $trans | cut -d ' ' -f2-) > $dir/hixiaowen_text || exit 1
+ dir=data/download/ticmini2_hixiaowen_adult_20180731
+ for folder in patch1 patch2; do
+ trans=$dir/$folder/hixiaowen_trans
+ paste -d ' ' <(cat $trans | awk '{split($1,a,"."); print a[1]}' | awk '{split($1,a,"/"); print a[3]}') <(cat $trans | cut -d ' ' -f2-) || exit 1
+ done > $dir/hixiaowen_text || exit 1
+ dir=data/download/ticmini2_for_school_20180911
+ trans=$dir/hixiaowen/hixiaowen.trans
+ paste -d ' ' <(cat $trans | awk '{split($1,a,"/"); print a[4]}' | awk '{split($1,a,"."); print a[1]}') <(cat $trans | cut -d ' ' -f2-) > $dir/hixiaowen_text || exit 1
+ for dataset in ticmini2_dataset_20180607 ticmini2_hixiaowen_adult_20180731 ticmini2_for_school_20180911; do
+ cat data/download/$dataset/hixiaowen_text || exit 1
+ done | sort -u -k1,1 > data/hixiaowen_text || exit 1
+fi
+
+if [ $stage -le 2 ]; then
+ dir=data/download/ticmini2_dataset_20180607
+ trans=$dir/freetext.txt
+ paste -d ' ' <(cat $trans | awk '{print $1}' | awk '{split($1,a,"."); print a[1]}') <(cat $trans | cut -d ' ' -f2-) > $dir/freetext_text || exit 1
+ dir=data/download/ticmini2_for_school_20180911
+ trans=$dir/freetext/freetext.trans
+ paste -d ' ' <(cat $trans | awk '{print $1}' | awk '{split($1,a,"/"); print a[4]}' | awk '{split($1,a,"."); print a[1]}') <(cat $trans | cut -d ' ' -f2-) > $dir/freetext_text || exit 1
+ for dataset in ticmini2_dataset_20180607 ticmini2_for_school_20180911; do
+ cat data/download/$dataset/freetext_text || exit 1
+ done | sort -u -k1,1 > data/freetext_text || exit 1
+fi
+
+if [ $stage -le 3 ]; then
+ dir=data/download/ticmini2_dataset_20180607
+ trans=$dir/garbage.txt
+ paste -d ' ' <(cat $trans | awk '{print $1}' | awk '{split($1,a,"."); print a[1]}') <(cat $trans | cut -d ' ' -f2-) > $dir/garbage_text || exit 1
+ cat $dir/garbage_text > data/garbage_text
+fi
+
+if [ $stage -le 4 ]; then
+ cat data/hixiaowen_text data/freetext_text data/garbage_text > data/text
+ cat data/hixiaowen_text data/freetext_text | awk '{print $1}' | awk '{split($1,a,"-"); print $1,a[1]}' > data/hixiaowen_freetext_utt2spk || exit 1
+ cat data/garbage_text | awk '{print $1}' | awk '{split($1,a,"_"); if(a[1]=="garbage") print $1,a[1] "_" a[2] "_" a[3]; else if(a[1]=="ticmini" || a[1]=="timini") print $1,a[1] "_" a[2] "_" a[3] "_" a[4] "_" a[5]; else print $1,$1}' | cat data/hixiaowen_freetext_utt2spk - > data/utt2spk || exit 1
+ rm -f data/hixiaowen_freetext_utt2spk 2>/dev/null || true
+fi
+
+if [ $stage -le 5 ]; then
+ for folder in train dev eval; do
+ mkdir -p data/$folder
+ utils/filter_scp.pl data/download/${folder}_list data/text > data/$folder/text || exit 1
+ utils/filter_scp.pl data/download/${folder}_list data/utt2spk > data/$folder/utt2spk || exit 1
+ done
+fi
+
+exit 0
diff --git a/egs/mobvoi/v1/local/wer_output_filter b/egs/mobvoi/v1/local/wer_output_filter
new file mode 100755
index 00000000000..bb4de1d1572
--- /dev/null
+++ b/egs/mobvoi/v1/local/wer_output_filter
@@ -0,0 +1,24 @@
+#!/usr/bin/env perl
+# Copyright 2012-2014 Johns Hopkins University (Author: Yenda Trmal)
+# Apache 2.0
+use utf8;
+
+use open qw(:encoding(utf8));
+binmode STDIN, ":utf8";
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+
+while (<>) {
+ @F = split " ";
+ print $F[0] . " ";
+ foreach $s (@F[1..$#F]) {
+ if ($s =~ /\<.*\>/) {
+ print "";
+ } else {
+ print "$s "
+ }
+ }
+ print "\n";
+}
+
+
diff --git a/egs/mobvoi/v1/path.sh b/egs/mobvoi/v1/path.sh
new file mode 100755
index 00000000000..2d17b17a84a
--- /dev/null
+++ b/egs/mobvoi/v1/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/mobvoi/v1/run.sh b/egs/mobvoi/v1/run.sh
new file mode 100755
index 00000000000..994f7217848
--- /dev/null
+++ b/egs/mobvoi/v1/run.sh
@@ -0,0 +1,243 @@
+#!/bin/bash
+# Copyright 2018-2020 Daniel Povey
+# 2018-2020 Yiming Wang
+
+stage=0
+
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -euo pipefail
+
+if [ $stage -le 0 ]; then
+ local/mobvoi_data_download.sh
+ echo "$0: Extracted all datasets into data/download/"
+fi
+
+if [ $stage -le 1 ]; then
+ echo "$0: Splitting datasets..."
+ local/split_datasets.sh
+ echo "$0: text and utt2spk have been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: Preparing wav.scp..."
+ local/prepare_wav.py data
+ echo "wav.scp has been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 3 ]; then
+ echo "$0: Extracting MFCC..."
+ for folder in train dev eval; do
+ dir=data/$folder
+ utils/fix_data_dir.sh $dir
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 16 $dir
+ steps/compute_cmvn_stats.sh $dir
+ utils/fix_data_dir.sh $dir
+ utils/data/get_utt2dur.sh $dir
+ utils/validate_data_dir.sh $dir
+ done
+fi
+
+if [ $stage -le 4 ]; then
+ echo "$0: Post processing transcripts..."
+ for folder in train dev eval; do
+ dir=data/$folder
+ cat $dir/text | awk '{if ($2=="嗨小问" || $2=="嗨小问嗨小问") {print $1,"嗨小问";} else {print $1,"FREETEXT"}}' > $dir/text.tmp || exit 1
+ mv $dir/text.tmp $dir/text || exit 1
+ done
+fi
+
+if [ $stage -le 5 ]; then
+ echo "$0: Preparing dictionary and lang..."
+ local/prepare_dict.sh
+ utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.5 \
+ --position-dependent-phones false \
+ data/local/dict "" data/lang/temp data/lang
+fi
+
+if [ $stage -le 6 ]; then
+ id_sil=`cat data/lang/words.txt | grep "" | awk '{print $2}'`
+ id_freetext=`cat data/lang/words.txt | grep "FREETEXT" | awk '{print $2}'`
+ export LC_ALL=en_US.UTF-8
+ id_word=`cat data/lang/words.txt | grep "嗨小问" | awk '{print $2}'`
+ export LC_ALL=C
+ mkdir -p data/lang/lm
+ cat < data/lang/lm/fst.txt
+0 1 $id_sil $id_sil
+0 4 $id_sil $id_sil 7.0
+1 4 $id_freetext $id_freetext 0.0
+4 0 $id_sil $id_sil
+1 2 $id_word $id_word 1.1
+2 0 $id_sil $id_sil
+0
+EOF
+ fstcompile data/lang/lm/fst.txt data/lang/G.fst
+ set +e
+ fstisstochastic data/lang/G.fst
+ set -e
+ utils/validate_lang.pl data/lang
+fi
+
+if [ $stage -le 7 ]; then
+ echo "$0: subsegmenting for the training data..."
+ srcdir=data/train
+ utils/data/convert_data_dir_to_whole.sh $srcdir ${srcdir}_whole
+
+ utils/data/get_segments_for_data.sh $srcdir > ${srcdir}_whole/segments
+ utils/filter_scp.pl <(awk '{if ($2 == "FREETEXT") print $1}' ${srcdir}_whole/text) \
+ ${srcdir}_whole/segments >${srcdir}_whole/neg_segments
+ utils/filter_scp.pl --exclude ${srcdir}_whole/neg_segments ${srcdir}_whole/segments \
+ >${srcdir}_whole/pos_segments
+ utils/filter_scp.pl ${srcdir}_whole/pos_segments ${srcdir}_whole/utt2dur >${srcdir}_whole/pos_utt2dur
+ local/get_random_subsegments.py --overlap-duration=0.3 --max-remaining-duration=0.3 \
+ ${srcdir}_whole/neg_segments ${srcdir}_whole/pos_utt2dur | \
+ cat ${srcdir}_whole/pos_segments - | sort >${srcdir}_whole/sub_segments
+ utils/data/subsegment_data_dir.sh ${srcdir}_whole \
+ ${srcdir}_whole/sub_segments data/train_segmented
+ awk '{print $1,$2}' ${srcdir}_whole/sub_segments | \
+ utils/apply_map.pl -f 2 ${srcdir}_whole/text >data/train_segmented/text
+ utils/data/extract_wav_segments_data_dir.sh --nj 50 --cmd "$train_cmd" \
+ data/train_segmented data/train_shorter
+ steps/compute_cmvn_stats.sh data/train_shorter
+ utils/fix_data_dir.sh data/train_shorter
+ utils/validate_data_dir.sh data/train_shorter
+fi
+
+# In this section, we augment the training data with reverberation,
+# noise, music, and babble, and combined it with the clean data.
+if [ $stage -le 8 ]; then
+ utils/data/get_utt2dur.sh data/train_shorter
+ cp data/train/utt2dur data/train/reco2dur
+ # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+ [ ! -f rirs_noises.zip ] && wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+ [ ! -d "RIRS_NOISES" ] && unzip rirs_noises.zip
+
+ # Make a version with reverberated speech
+ rvb_opts=()
+ rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+ rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+ # Make a reverberated version of the SWBD+SRE list. Note that we don't add any
+ # additive noise here.
+ steps/data/reverberate_data_dir.py \
+ "${rvb_opts[@]}" \
+ --speech-rvb-probability 1 \
+ --prefix "rev" \
+ --pointsource-noise-addition-probability 0 \
+ --isotropic-noise-addition-probability 0 \
+ --num-replications 1 \
+ --source-sampling-rate 16000 \
+ data/train_shorter data/train_shorter_reverb
+ cat data/train_shorter/utt2dur | awk -v name=rev1 '{print name"-"$0}' >data/train_shorter_reverb/utt2dur
+
+ # Prepare the MUSAN corpus, which consists of music, speech, and noise
+ # suitable for augmentation.
+ steps/data/make_musan.sh /export/corpora/JHU/musan data
+
+ # Get the duration of the MUSAN recordings. This will be used by the
+ # script augment_data_dir.py.
+ for name in speech noise music; do
+ utils/data/get_utt2dur.sh data/musan_${name}
+ cp data/musan_${name}/utt2dur data/musan_${name}/reco2dur
+ done
+
+ # Augment with musan_noise
+ export LC_ALL=en_US.UTF-8
+ steps/data/augment_data_dir.py --utt-prefix "noise" --modify-spk-id true --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_shorter data/train_shorter_noise
+ # Augment with musan_music
+ steps/data/augment_data_dir.py --utt-prefix "music" --modify-spk-id true --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_shorter data/train_shorter_music
+ # Augment with musan_speech
+ steps/data/augment_data_dir.py --utt-prefix "babble" --modify-spk-id true --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_shorter data/train_shorter_babble
+ export LC_ALL=C
+fi
+
+if [ $stage -le 9 ]; then
+ # Now make MFCC features
+ for name in reverb noise music babble; do
+ steps/make_mfcc.sh --nj 16 --cmd "$train_cmd" \
+ data/train_shorter_${name} || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_${name}
+ utils/fix_data_dir.sh data/train_shorter_${name}
+ utils/validate_data_dir.sh data/train_shorter_${name}
+ done
+fi
+
+# monophone training
+if [ $stage -le 10 ]; then
+ steps/train_mono.sh --nj 50 --cmd "$train_cmd" \
+ data/train_shorter data/lang exp/mono
+ (
+ utils/mkgraph.sh data/lang \
+ exp/mono exp/mono/graph
+ )&
+
+ steps/align_si.sh --nj 50 --cmd "$train_cmd" \
+ data/train_shorter data/lang exp/mono exp/mono_ali_train_shorter
+fi
+
+if [ $stage -le 11 ]; then
+ echo "$0: preparing for low-resolution speed-perturbed data (for alignment)"
+ utils/data/perturb_data_dir_speed_3way.sh data/train_shorter data/train_shorter_sp
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 data/train_shorter_sp || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_sp || exit 1;
+ utils/fix_data_dir.sh data/train_shorter_sp
+fi
+
+if [ $stage -le 12 ]; then
+ echo "$0: aligning with the perturbed low-resolution data"
+ steps/align_fmllr.sh --nj 50 --cmd "$train_cmd" \
+ data/train_shorter_sp data/lang exp/mono exp/mono_ali_train_shorter_sp || exit 1
+fi
+
+if [ $stage -le 13 ]; then
+ echo "$0: creating high-resolution MFCC features"
+ mfccdir=data/train_shorter_sp_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(dte +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+ fi
+
+ for datadir in train_shorter_sp dev eval; do
+ utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+ done
+
+ # do volume-perturbation on the training data prior to extracting hires
+ # features; this helps make trained nnets more invariant to test data volume.
+ utils/data/perturb_data_dir_volume.sh data/train_shorter_sp_hires || exit 1;
+
+ for datadir in train_shorter_sp dev eval; do
+ steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+ utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+ done
+fi
+
+combined_train_set=train_shorter_sp_combined
+aug_affix="reverb noise music babble"
+if [ $stage -le 14 ]; then
+ for name in $aug_affix; do
+ echo "$0: creating high-resolution MFCC features for train_shorter_${name}"
+ mfccdir=data/train_shorter_${name}_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+ fi
+ utils/copy_data_dir.sh data/train_shorter_${name} data/train_shorter_${name}_hires
+ steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/train_shorter_${name}_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_${name}_hires || exit 1;
+ utils/fix_data_dir.sh data/train_shorter_${name}_hires || exit 1;
+ done
+ eval utils/combine_data.sh data/${combined_train_set}_hires data/train_shorter_sp_hires \
+ data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}_hires
+fi
+
+
+if [ $stage -le 15 ]; then
+ local/chain/run_tdnn.sh --train-set train_shorter --combined-train-set ${combined_train_set}
+fi
+
+exit 0
+
diff --git a/egs/mobvoi/v1/run_e2e.sh b/egs/mobvoi/v1/run_e2e.sh
new file mode 100755
index 00000000000..7976a3a7593
--- /dev/null
+++ b/egs/mobvoi/v1/run_e2e.sh
@@ -0,0 +1,302 @@
+#!/bin/bash
+# Copyright 2018-2020 Daniel Povey
+# 2018-2020 Yiming Wang
+
+# This recipe uses E2E LF-MMI training which doesn't require GMM training to obtain alignments.
+# Its performance is slightly better than those based on alignments (cross-entropy or regular LF-MMI)
+# on this dataset.
+
+stage=0
+
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -euo pipefail
+
+if [ $stage -le 0 ]; then
+ local/mobvoi_data_download.sh
+ echo "$0: Extracted all datasets into data/download/"
+fi
+
+if [ $stage -le 1 ]; then
+ echo "$0: Splitting datasets..."
+ local/split_datasets.sh
+ echo "$0: text and utt2spk have been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: Preparing wav.scp..."
+ local/prepare_wav.py data
+ echo "wav.scp has been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 3 ]; then
+ echo "$0: Extracting MFCC..."
+ for folder in train dev eval; do
+ dir=data/$folder
+ utils/fix_data_dir.sh $dir
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 16 $dir
+ steps/compute_cmvn_stats.sh $dir
+ utils/fix_data_dir.sh $dir
+ utils/data/get_utt2dur.sh $dir
+ utils/validate_data_dir.sh $dir
+ done
+fi
+
+if [ $stage -le 4 ]; then
+ echo "$0: Post processing transcripts..."
+ for folder in train dev eval; do
+ dir=data/$folder
+ export LC_ALL=en_US.UTF-8
+ cat $dir/text | awk '{if ($2=="嗨小问" || $2=="嗨小问嗨小问") {print $1,"嗨小问";} else {print $1,"FREETEXT"}}' > $dir/text.tmp || exit 1
+ export LC_ALL=C
+ cat $dir/text.tmp > $dir/text || exit 1
+ rm -f $dir/text.tmp 2>/dev/null || true
+ done
+fi
+
+if [ $stage -le 5 ]; then
+ echo "$0: Preparing dictionary and lang..."
+ local/prepare_dict.sh
+ utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.5 \
+ --position-dependent-phones false \
+ data/local/dict "" data/lang/temp data/lang
+fi
+
+if [ $stage -le 6 ]; then
+ id_sil=`cat data/lang/words.txt | grep "" | awk '{print $2}'`
+ id_freetext=`cat data/lang/words.txt | grep "FREETEXT" | awk '{print $2}'`
+ export LC_ALL=en_US.UTF-8
+ id_word=`cat data/lang/words.txt | grep "嗨小问" | awk '{print $2}'`
+ export LC_ALL=C
+ mkdir -p data/lang/lm
+ cat < data/lang/lm/fst.txt
+0 1 $id_sil $id_sil
+0 4 $id_sil $id_sil 7.0
+1 4 $id_freetext $id_freetext 0.0
+4 0 $id_sil $id_sil
+1 2 $id_word $id_word 1.1
+2 0 $id_sil $id_sil
+0
+EOF
+ fstcompile data/lang/lm/fst.txt data/lang/G.fst
+ set +e
+ fstisstochastic data/lang/G.fst
+ set -e
+ utils/validate_lang.pl data/lang
+fi
+
+if [ $stage -le 7 ]; then
+ echo "$0: subsegmenting for the training data..."
+ srcdir=data/train
+ utils/data/convert_data_dir_to_whole.sh $srcdir ${srcdir}_whole
+
+ utils/data/get_segments_for_data.sh $srcdir > ${srcdir}_whole/segments
+ utils/filter_scp.pl <(awk '{if ($2 == "FREETEXT") print $1}' ${srcdir}_whole/text) \
+ ${srcdir}_whole/segments >${srcdir}_whole/neg_segments
+ utils/filter_scp.pl --exclude ${srcdir}_whole/neg_segments ${srcdir}_whole/segments \
+ >${srcdir}_whole/pos_segments
+ utils/filter_scp.pl ${srcdir}_whole/pos_segments ${srcdir}_whole/utt2dur >${srcdir}_whole/pos_utt2dur
+ local/get_random_subsegments.py --overlap-duration=0.3 --max-remaining-duration=0.3 \
+ ${srcdir}_whole/neg_segments ${srcdir}_whole/pos_utt2dur | \
+ cat ${srcdir}_whole/pos_segments - | sort >${srcdir}_whole/sub_segments
+ utils/data/subsegment_data_dir.sh ${srcdir}_whole \
+ ${srcdir}_whole/sub_segments data/train_segmented
+ awk '{print $1,$2}' ${srcdir}_whole/sub_segments | \
+ utils/apply_map.pl -f 2 ${srcdir}_whole/text >data/train_segmented/text
+ utils/data/extract_wav_segments_data_dir.sh --nj 50 --cmd "$train_cmd" \
+ data/train_segmented data/train_shorter
+ steps/compute_cmvn_stats.sh data/train_shorter
+ utils/fix_data_dir.sh data/train_shorter
+ utils/validate_data_dir.sh data/train_shorter
+fi
+
+# In this section, we augment the training data with reverberation,
+# noise, music, and babble, and combined it with the clean data.
+if [ $stage -le 8 ]; then
+ utils/data/get_utt2dur.sh data/train_shorter
+ cp data/train_shorter/utt2dur data/train_shorter/reco2dur
+ # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+ [ ! -f rirs_noises.zip ] && wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+ [ ! -d "RIRS_NOISES" ] && unzip rirs_noises.zip
+
+ # Make a version with reverberated speech
+ rvb_opts=()
+ rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+ rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+ # Make a reverberated version of the SWBD+SRE list. Note that we don't add any
+ # additive noise here.
+ steps/data/reverberate_data_dir.py \
+ "${rvb_opts[@]}" \
+ --speech-rvb-probability 1 \
+ --prefix "rev" \
+ --pointsource-noise-addition-probability 0 \
+ --isotropic-noise-addition-probability 0 \
+ --num-replications 1 \
+ --source-sampling-rate 16000 \
+ data/train_shorter data/train_shorter_reverb
+ cat data/train_shorter/utt2dur | awk -v name=rev1 '{print name"-"$0}' >data/train_shorter_reverb/utt2dur
+
+ # Prepare the MUSAN corpus, which consists of music, speech, and noise
+ # suitable for augmentation.
+ steps/data/make_musan.sh /export/corpora/JHU/musan data
+
+ # Get the duration of the MUSAN recordings. This will be used by the
+ # script augment_data_dir.py.
+ for name in speech noise music; do
+ utils/data/get_utt2dur.sh data/musan_${name}
+ cp data/musan_${name}/utt2dur data/musan_${name}/reco2dur
+ done
+
+ # Augment with musan_noise
+ export LC_ALL=en_US.UTF-8
+ steps/data/augment_data_dir.py --utt-prefix "noise" --modify-spk-id true --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_shorter data/train_shorter_noise
+ # Augment with musan_music
+ steps/data/augment_data_dir.py --utt-prefix "music" --modify-spk-id true --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_shorter data/train_shorter_music
+ # Augment with musan_speech
+ steps/data/augment_data_dir.py --utt-prefix "babble" --modify-spk-id true --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_shorter data/train_shorter_babble
+ export LC_ALL=C
+fi
+
+if [ $stage -le 9 ]; then
+ # Now make MFCC features
+ for name in reverb noise music babble; do
+ steps/make_mfcc.sh --nj 16 --cmd "$train_cmd" \
+ data/train_shorter_${name} || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_${name}
+ utils/fix_data_dir.sh data/train_shorter_${name}
+ utils/validate_data_dir.sh data/train_shorter_${name}
+ done
+fi
+
+combined_train_set=train_shorter_combined
+aug_affix="reverb noise music babble"
+if [ $stage -le 10 ]; then
+ aug_affix="reverb noise music babble"
+ eval utils/combine_data.sh data/${combined_train_set} data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}
+fi
+
+if [ -f data/${combined_train_set}_spe2e_hires/feats.scp ]; then
+ echo "$0: It seems that features for the perturbed training data already exist."
+ echo "If you want to extract them anyway, remove them first and run this"
+ echo "stage again. Skipping this stage..."
+else
+ if [ $stage -le 11 ]; then
+ echo "$0: perturbing the training data to allowed lengths..."
+ utils/data/get_utt2dur.sh data/${combined_train_set} # necessary for the next command
+
+ # 12 in the following command means the allowed lengths are spaced
+ # by 12% change in length.
+ utils/data/perturb_speed_to_allowed_lengths.py --speed-perturb false 12 data/${combined_train_set} \
+ data/${combined_train_set}_e2e_hires
+ cat data/${combined_train_set}_e2e_hires/utt2dur | \
+ awk '{print $1 " " substr($1,5)}' >data/${combined_train_set}_e2e_hires/utt2uniq.tmp
+ utils/apply_map.pl -f 2 data/${combined_train_set}/utt2uniq \
+ data/${combined_train_set}_e2e_hires/utt2uniq
+ rm -f data/${combined_train_set}_e2e_hires/utt2uniq.tmp 2>/dev/null || true
+ utils/fix_data_dir.sh data/${combined_train_set}_e2e_hires
+
+ utils/data/get_utt2dur.sh data/train_shorter # necessary for the next command
+ utils/data/perturb_speed_to_allowed_lengths.py 12 data/train_shorter data/train_shorter_spe2e_hires
+ cat data/train_shorter_spe2e_hires/utt2dur | \
+ awk '{print $1 " " substr($1,5)}' >data/train_shorter_spe2e_hires/utt2uniq
+ utils/fix_data_dir.sh data/train_shorter_spe2e_hires
+ utils/combine_data.sh data/${combined_train_set}_spe2e_hires data/${combined_train_set}_e2e_hires data/train_shorter_spe2e_hires
+ cat data/train_shorter_spe2e_hires/allowed_lengths.txt >data/${combined_train_set}_spe2e_hires/allowed_lengths.txt
+ fi
+
+ if [ $stage -le 12 ]; then
+ echo "$0: extracting MFCC features for the training data..."
+ mfccdir=data/${combined_train_set}_spe2e_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+ fi
+ steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" \
+ data/${combined_train_set}_spe2e_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/${combined_train_set}_spe2e_hires || exit 1;
+ utils/fix_data_dir.sh data/${combined_train_set}_spe2e_hires
+ utils/validate_data_dir.sh data/${combined_train_set}_spe2e_hires
+ fi
+fi
+
+if [ $stage -le 13 ]; then
+ if [ -f data/eval_hires/feats.scp ]; then
+ echo "$0: It seems that features for the test sets already exist."
+ echo "skipping this stage..."
+ else
+ echo "$0: extracting MFCC features for the test sets"
+ for datadir in dev eval; do
+ utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+ utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+ done
+ fi
+fi
+
+if [ $stage -le 14 ]; then
+ local/chain/run_e2e_tdnn.sh --train-set ${combined_train_set}_spe2e
+fi
+
+combined_train_set=train_shorter_sp_combined
+if [ -f data/${combined_train_set}_hires/feats.scp ]; then
+ echo "$0: It seems that features for the perturbed training data already exist."
+ echo "If you want to extract them anyway, remove them first and run this"
+ echo "stage again. Skipping this stage..."
+else
+ if [ $stage -le 15 ]; then
+ echo "$0: preparing for speed-perturbed data"
+ utils/data/perturb_data_dir_speed_3way.sh data/train_shorter data/train_shorter_sp_hires
+ echo "$0: creating high-resolution MFCC features for speed-perturbed data"
+ mfccdir=data/train_shorter_sp_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+ fi
+
+ # do volume-perturbation on the training data prior to extracting hires
+ # features; this helps make trained nnets more invariant to test data volume.
+ utils/data/perturb_data_dir_volume.sh data/train_shorter_sp_hires || exit 1;
+
+ steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/train_shorter_sp_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_sp_hires || exit 1;
+ utils/fix_data_dir.sh data/train_shorter_sp_hires || exit 1;
+ fi
+
+ if [ $stage -le 16 ]; then
+ for name in $aug_affix; do
+ echo "$0: creating high-resolution MFCC features for train_shorter_${name}"
+ mfccdir=data/train_shorter_${name}_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+ fi
+ utils/copy_data_dir.sh data/train_shorter_${name} data/train_shorter_${name}_hires
+ steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/train_shorter_${name}_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_${name}_hires || exit 1;
+ utils/fix_data_dir.sh data/train_shorter_${name}_hires || exit 1;
+ done
+ eval utils/combine_data.sh data/${combined_train_set}_hires data/train_shorter_sp_hires \
+ data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}_hires
+ fi
+fi
+
+if [ $stage -le 17 ]; then
+ echo "$0: Aligning the training data using the e2e chain model..."
+ steps/nnet3/align.sh --nj 50 --cmd "$train_cmd" \
+ --use-gpu false \
+ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
+ data/${combined_train_set}_hires data/lang exp/chain/e2e_tdnn_1a exp/chain/e2e_ali_${combined_train_set}
+fi
+
+if [ $stage -le 18 ]; then
+ echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
+ local/chain/run_tdnn_e2eali.sh --train-set ${combined_train_set} --e2echain-model-dir exp/chain/e2e_tdnn_1a
+fi
+
+exit 0
diff --git a/egs/mobvoi/v1/steps b/egs/mobvoi/v1/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/mobvoi/v1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/mobvoi/v1/utils b/egs/mobvoi/v1/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/mobvoi/v1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/mobvoihotwords/README.txt b/egs/mobvoihotwords/README.txt
new file mode 100644
index 00000000000..4c43c3f8eb5
--- /dev/null
+++ b/egs/mobvoihotwords/README.txt
@@ -0,0 +1,17 @@
+
+ The MobvoiHotwords dataset is a ~144-hour corpus of wake word corpus which is
+ publicly availble on https://www.openslr.org/87
+
+ For wake word data, wake word utterances contain either 'Hi xiaowen' or 'Nihao
+ Wenwen' are collected. For each wake word, there are about 36k utterances. All
+ wake word data is collected from 788 subjects, ages 3-65, with different
+ distances from the smart speaker (1, 3 and 5 meters). Different noises
+ (typical home environment noises like music and TV) with varying sound
+ pressure levels are played in the background during the collection.
+
+ The recipe is in v1/
+
+ The E2E LF-MMI recipe does not require any prior alignments for training
+ LF-MMI, making the alignment more flexible during training. It can be optionally
+ followed by a regular LF-MMI training to further improve the performance.
+
diff --git a/egs/mobvoihotwords/v1/cmd.sh b/egs/mobvoihotwords/v1/cmd.sh
new file mode 100644
index 00000000000..fc5d4aa9e1c
--- /dev/null
+++ b/egs/mobvoihotwords/v1/cmd.sh
@@ -0,0 +1,24 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine). queue.pl works with GridEngine (qsub). slurm.pl works
+# with slurm. Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration. Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl"
+export decode_cmd="queue.pl --mem 4G"
+# the use of cuda_cmd is deprecated, used only in 'nnet1',
+export cuda_cmd="queue.pl --gpu 1"
+
+if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then
+ queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+ export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+ export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+ export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
+
diff --git a/egs/mobvoihotwords/v1/conf/mfcc.conf b/egs/mobvoihotwords/v1/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/mobvoihotwords/v1/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false # only non-default option.
diff --git a/egs/mobvoihotwords/v1/conf/mfcc_hires.conf b/egs/mobvoihotwords/v1/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..d96b86ddfcb
--- /dev/null
+++ b/egs/mobvoihotwords/v1/conf/mfcc_hires.conf
@@ -0,0 +1,9 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false # use average of log energy, not energy.
+--num-mel-bins=40 # similar to Google's setup.
+--num-ceps=40 # there is no dimensionality reduction.
+--low-freq=20 # low cutoff frequency for mel bins
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
diff --git a/egs/mobvoihotwords/v1/conf/online_cmvn.conf b/egs/mobvoihotwords/v1/conf/online_cmvn.conf
new file mode 100644
index 00000000000..a173510e433
--- /dev/null
+++ b/egs/mobvoihotwords/v1/conf/online_cmvn.conf
@@ -0,0 +1,3 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
+--norm-means=true
+--norm-vars=false
diff --git a/egs/mobvoihotwords/v1/local/add_prefix_to_scp.py b/egs/mobvoihotwords/v1/local/add_prefix_to_scp.py
new file mode 120000
index 00000000000..b6750c78e16
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/add_prefix_to_scp.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/add_prefix_to_scp.py
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/chain/build_tree.sh b/egs/mobvoihotwords/v1/local/chain/build_tree.sh
new file mode 120000
index 00000000000..fb4d74cc9ae
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/build_tree.sh
@@ -0,0 +1 @@
+../../../../mobvoi/v1/local/chain/build_tree.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/chain/run_e2e_tdnn.sh b/egs/mobvoihotwords/v1/local/chain/run_e2e_tdnn.sh
new file mode 120000
index 00000000000..891eec02423
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/run_e2e_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/chain/run_tdnn.sh b/egs/mobvoihotwords/v1/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/chain/run_tdnn_e2eali.sh b/egs/mobvoihotwords/v1/local/chain/run_tdnn_e2eali.sh
new file mode 120000
index 00000000000..38f0bd07e6c
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/run_tdnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_e2eali_1a.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/chain/tuning/run_e2e_tdnn_1a.sh b/egs/mobvoihotwords/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
new file mode 100755
index 00000000000..a0df6b0ce14
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
@@ -0,0 +1,258 @@
+#!/bin/bash
+# Copyright 2018-2020 Daniel Povey
+# 2018-2020 Yiming Wang
+
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+minibatch_size=150=128,64/300=100,64,32/600=50,32,16/1200=16,8
+common_egs_dir=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_combined_spe2e
+test_sets="dev eval"
+wake_word0="HiXiaowen"
+wake_word1="NihaoWenwen"
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat <$lang/topo
+ fi
+fi
+
+if [ $stage -le 1 ]; then
+ echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+ mkdir -p $tree_dir
+ id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+ id_word0=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+ id_word1=`cat data/lang/phones.txt | grep "nihaowenwen" | awk '{print $2}'`
+ id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+ cat < $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+0 5 $id_sil $id_sil
+1 2 $id_word0 $id_word0
+2 3 $id_sil $id_sil
+1 4 $id_word1 $id_word1
+4 5 $id_sil $id_sil
+1 6 $id_freetext $id_freetext
+6 7 $id_sil $id_sil
+3 2.3
+5 2.3
+7 0.0
+EOF
+ fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+ fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+ mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+ steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$train_cmd" \
+ data/${train_set}_hires $lang $tree_dir
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: creating neural net configs using the xconfig parser";
+ num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+ learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+ affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+ tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+ linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+ prefinal_opts="l2-regularize=0.01"
+ output_opts="l2-regularize=0.002"
+
+ mkdir -p $dir/configs
+ cat < $dir/configs/network.xconfig
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+
+ relu-batchnorm-dropout-layer name=tdnn1 input=Append(-2,-1,0,1,2) $affine_opts dim=$dim
+ tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+ tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ linear-component name=prefinal-l dim=30 $linear_opts
+
+ prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+ prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+ # no need to store the egs in a shared storage because we always
+ # remove them. Anyway, it takes only 5 minutes to generate them.
+
+ steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+ --cmd "$decode_cmd" \
+ --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+ --chain.xent-regularize $xent_regularize \
+ --chain.leaky-hmm-coefficient=0.1 \
+ --chain.l2-regularize=0.0 \
+ --chain.apply-deriv-weights=false \
+ --chain.frame-subsampling-factor=3 \
+ --trainer.add-option="--optimization.memory-compression-level=2" \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs=$num_epochs \
+ --trainer.frames-per-iter $frames_per_iter \
+ --trainer.optimization.num-jobs-initial $num_jobs_initial \
+ --trainer.optimization.num-jobs-final $num_jobs_final \
+ --trainer.optimization.initial-effective-lrate 0.00003 \
+ --trainer.optimization.final-effective-lrate 0.000003 \
+ --trainer.optimization.backstitch-training-scale $bs_scale \
+ --trainer.num-chunk-per-minibatch $minibatch_size \
+ --trainer.optimization.momentum=0.0 \
+ --egs.dir "$common_egs_dir" \
+ --egs.opts "--num-utts-subset 300 --online-cmvn $online_cmvn" \
+ --cleanup.remove-egs=$remove_egs \
+ --feat-dir data/${train_set}_hires \
+ --tree-dir $tree_dir \
+ --dir=$dir || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+ steps/online/nnet3/prepare_online_decoding.sh \
+ --mfcc-config conf/mfcc_hires.conf \
+ --online-cmvn-config conf/online_cmvn.conf \
+ $lang ${dir} ${dir}_online
+
+ rm $dir/.error 2>/dev/null || true
+ for wake_word in $wake_word0 $wake_word1; do
+ if [[ "$wake_word" == "$wake_word0" ]]; then
+ wake_word0_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+ wake_word1_cost_range="0.0"
+ else
+ wake_word0_cost_range="0.0"
+ wake_word1_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+ fi
+ for wake_word0_cost in $wake_word0_cost_range; do
+ for wake_word1_cost in $wake_word1_cost_range; do
+ rm -rf $lang_decode
+ utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+ --position-dependent-phones false \
+ data/local/dict "" $lang_decode/temp $lang_decode
+
+ sil_id=`cat $lang_decode/words.txt | grep "" | awk '{print $2}'`
+ freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+ id0=`cat $lang_decode/words.txt | grep $wake_word0 | awk '{print $2}'`
+ id1=`cat $lang_decode/words.txt | grep $wake_word1 | awk '{print $2}'`
+ mkdir -p $lang_decode/lm
+ cat < $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.0
+4 0 $sil_id $sil_id
+1 2 $id0 $id0 $wake_word0_cost
+1 3 $id1 $id1 $wake_word1_cost
+2 0 $sil_id $sil_id
+3 0 $sil_id $sil_id
+0
+EOF
+ fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+ set +e
+ fstisstochastic $lang_decode/G.fst
+ set -e
+ utils/validate_lang.pl $lang_decode
+ cp $lang/topo $lang_decode/topo
+
+ utils/lang/check_phones_compatible.sh \
+ data/lang/phones.txt $lang_decode/phones.txt
+ rm -rf $tree_dir/graph_online/HCLG.fst
+ utils/mkgraph.sh \
+ --self-loop-scale 1.0 $lang_decode \
+ $dir $tree_dir/graph_online || exit 1;
+
+ frames_per_chunk=150
+ for data in $test_sets; do
+ (
+ nj=30
+ steps/online/nnet3/decode_wake_word.sh \
+ --beam 10 --acwt 1.0 \
+ --wake-word $wake_word \
+ --extra-left-context-initial 0 \
+ --frames-per-chunk $frames_per_chunk \
+ --nj $nj --cmd "$decode_cmd" \
+ $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_${wake_word}_cost${wake_word0_cost}_${wake_word1_cost} || exit 1
+ ) || touch $dir/.error &
+ done
+ wait
+ [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+ done
+ done
+ done
+ for data in $test_sets; do
+ for wake_word in $wake_word0 $wake_word1; do
+ echo "Results on $data set with wake word ${wake_word}:"
+ cat ${dir}_online/decode_${data}_${wake_word}_cost*/scoring_kaldi/all_results
+ done
+ done
+fi
diff --git a/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_1a.sh b/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..edb1a8524db
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,312 @@
+#!/bin/bash
+#
+# Copyright 2018-2020 Daniel Povey
+# 2018-2020 Yiming Wang
+# Apache 2.0
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=30
+gmm=mono
+train_stage=-5 # starting from -5 to skip phone-lm estimation
+get_egs_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+chunk_width=140,100,160
+common_egs_dir=
+reporting_email=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter
+combined_train_set=train_shorter_sp_combined
+test_sets="dev eval"
+aug_prefix="rev1 noise music babble"
+wake_word0="HiXiaowen"
+wake_word1="NihaoWenwen"
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat <$lang/topo
+ fi
+fi
+
+if [ $stage -le 2 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom)
+ # use the same num-jobs as the alignments
+ steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \
+ data/lang $gmm_dir $lat_dir
+ rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 3 ]; then
+ local/copy_lat_dir.sh --nj 75 --cmd "$train_cmd" --utt-prefixes "$aug_prefix" \
+ $combined_train_data_dir $lat_dir $combined_lat_dir
+fi
+
+if [ $stage -le 4 ]; then
+ # Build a tree using our new topology. We know we have alignments from
+ # steps/align_fmllr.sh, so use those.
+ # The num-leaves is always somewhat less than the num-leaves from the GMM baseline.
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ local/chain/build_tree.sh \
+ --frame-subsampling-factor 3 \
+ --cmd "$train_cmd" ${lores_train_data_dir} \
+ $lang $ali_dir $tree_dir
+
+ echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+ id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+ id_word0=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+ id_word1=`cat data/lang/phones.txt | grep "nihaowenwen" | awk '{print $2}'`
+ id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+ cat < $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+0 5 $id_sil $id_sil
+1 2 $id_word0 $id_word0
+2 3 $id_sil $id_sil
+1 4 $id_word1 $id_word1
+4 5 $id_sil $id_sil
+1 6 $id_freetext $id_freetext
+6 7 $id_sil $id_sil
+3 2.3
+5 2.3
+7 0.0
+EOF
+ fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+ fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+ mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+fi
+
+if [ $stage -le 5 ]; then
+ echo "$0: creating neural net configs using the xconfig parser";
+ num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+ learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+ affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+ tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+ linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+ prefinal_opts="l2-regularize=0.01"
+ output_opts="l2-regularize=0.002"
+
+ mkdir -p $dir/configs
+ cat < $dir/configs/network.xconfig
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+ relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=$dim
+ tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+ tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ linear-component name=prefinal-l dim=30 $linear_opts
+
+ prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+ prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 6 ]; then
+ # no need to store the egs in a shared storage because we always
+ # remove them. Anyway, it takes only 5 minutes to generate them.
+
+ cp $tree_dir/phone_lm.fst $dir/phone_lm.fst
+
+ steps/nnet3/chain/train.py --stage=$train_stage \
+ --cmd="$decode_cmd" \
+ --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+ --chain.xent-regularize $xent_regularize \
+ --chain.leaky-hmm-coefficient=0.1 \
+ --chain.l2-regularize=0.0 \
+ --chain.apply-deriv-weights=false \
+ --trainer.add-option="--optimization.memory-compression-level=2" \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs=$num_epochs \
+ --trainer.frames-per-iter $frames_per_iter \
+ --trainer.optimization.num-jobs-initial $num_jobs_initial \
+ --trainer.optimization.num-jobs-final $num_jobs_final \
+ --trainer.optimization.initial-effective-lrate 0.00005 \
+ --trainer.optimization.final-effective-lrate 0.000005 \
+ --trainer.optimization.backstitch-training-scale $bs_scale \
+ --trainer.num-chunk-per-minibatch=128,64 \
+ --trainer.optimization.momentum=0.0 \
+ --egs.chunk-width=$chunk_width \
+ --egs.chunk-left-context=0 \
+ --egs.chunk-right-context=0 \
+ --egs.chunk-left-context-initial=0 \
+ --egs.chunk-right-context-final=0 \
+ --egs.dir="$common_egs_dir" \
+ --egs.opts="--frames-overlap-per-eg 0 --online-cmvn $online_cmvn" \
+ --cleanup.remove-egs=$remove_egs \
+ --use-gpu=true \
+ --reporting.email="$reporting_email" \
+ --feat-dir $combined_train_data_dir \
+ --tree-dir $tree_dir \
+ --lat-dir=$combined_lat_dir \
+ --dir=$dir || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+ steps/online/nnet3/prepare_online_decoding.sh \
+ --mfcc-config conf/mfcc_hires.conf \
+ --online-cmvn-config conf/online_cmvn.conf \
+ $lang ${dir} ${dir}_online
+
+ rm $dir/.error 2>/dev/null || true
+ for wake_word in $wake_word0 $wake_word1; do
+ if [[ "$wake_word" == "$wake_word0" ]]; then
+ wake_word0_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+ wake_word1_cost_range="0.0"
+ else
+ wake_word0_cost_range="0.0"
+ wake_word1_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+ fi
+ for wake_word0_cost in $wake_word0_cost_range; do
+ for wake_word1_cost in $wake_word1_cost_range; do
+ rm -rf $lang_decode
+ utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+ --position-dependent-phones false \
+ data/local/dict "" $lang_decode/temp $lang_decode
+
+ sil_id=`cat $lang_decode/words.txt | grep "" | awk '{print $2}'`
+ freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+ id0=`cat $lang_decode/words.txt | grep $wake_word0 | awk '{print $2}'`
+ id1=`cat $lang_decode/words.txt | grep $wake_word1 | awk '{print $2}'`
+ mkdir -p $lang_decode/lm
+ cat < $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.0
+4 0 $sil_id $sil_id
+1 2 $id0 $id0 $wake_word0_cost
+1 3 $id1 $id1 $wake_word1_cost
+2 0 $sil_id $sil_id
+3 0 $sil_id $sil_id
+0
+EOF
+ fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+ set +e
+ fstisstochastic $lang_decode/G.fst
+ set -e
+ utils/validate_lang.pl $lang_decode
+ cp $lang/topo $lang_decode/topo
+
+ utils/lang/check_phones_compatible.sh \
+ data/lang/phones.txt $lang_decode/phones.txt
+ rm -rf $tree_dir/graph_online/HCLG.fst
+ utils/mkgraph.sh \
+ --self-loop-scale 1.0 $lang_decode \
+ $dir $tree_dir/graph_online || exit 1;
+
+ frames_per_chunk=150
+ for data in $test_sets; do
+ (
+ nj=30
+ steps/online/nnet3/decode_wake_word.sh \
+ --beam 10 --acwt 1.0 \
+ --wake-word $wake_word \
+ --extra-left-context-initial 0 \
+ --frames-per-chunk $frames_per_chunk \
+ --nj $nj --cmd "$decode_cmd" \
+ $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_${wake_word}_cost${wake_word0_cost}_${wake_word1_cost} || exit 1
+ ) || touch $dir/.error &
+ done
+ wait
+ [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+ done
+ done
+ done
+ for data in $test_sets; do
+ for wake_word in $wake_word0 $wake_word1; do
+ echo "Results on $data set with wake word ${wake_word}:"
+ cat ${dir}_online/decode_${data}_${wake_word}_cost*/scoring_kaldi/all_results
+ done
+ done
+fi
diff --git a/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh b/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
new file mode 100755
index 00000000000..12c8448f65e
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/chain/tuning/run_tdnn_e2eali_1a.sh
@@ -0,0 +1,302 @@
+#!/bin/bash
+#
+# Copyright 2019-2020 Daniel Povey
+# 2019-2020 Yiming Wang
+# Apache 2.0
+
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=30
+e2echain_model_dir=exp/chain/e2e_tdnn_1a
+train_stage=-5 # starting from -5 to skip phone-lm estimation
+get_egs_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=6
+num_jobs_initial=2
+num_jobs_final=5
+chunk_width=140,100,160
+common_egs_dir=
+reporting_email=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_sp_combined
+test_sets="dev eval"
+wake_word0="HiXiaowen"
+wake_word1="NihaoWenwen"
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat <$lang/topo
+ fi
+fi
+
+if [ $stage -le 2 ]; then
+ # Get the alignments as lattices (gives the chain training more freedom)
+ # use the same num-jobs as the alignments
+ steps/nnet3/align_lats.sh --nj 75 --cmd "$train_cmd" \
+ --acoustic-scale 1.0 \
+ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+ $train_data_dir data/lang $e2echain_model_dir $lat_dir
+ echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+ # Build a tree using our new topology. We know we have alignments from
+ # steps/align_fmllr.sh, so use those.
+ # The num-leaves is always somewhat less than the num-leaves from the GMM baseline.
+ if [ -f $tree_dir/final.mdl ]; then
+ echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+ exit 1;
+ fi
+ local/chain/build_tree.sh \
+ --frame-subsampling-factor 3 --cmd "$train_cmd" \
+ $train_data_dir $lang $ali_dir $tree_dir
+
+ echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+ id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+ id_word0=`cat data/lang/phones.txt | grep "hixiaowen" | awk '{print $2}'`
+ id_word1=`cat data/lang/phones.txt | grep "nihaowenwen" | awk '{print $2}'`
+ id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+ cat < $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+0 5 $id_sil $id_sil
+1 2 $id_word0 $id_word0
+2 3 $id_sil $id_sil
+1 4 $id_word1 $id_word1
+4 5 $id_sil $id_sil
+1 6 $id_freetext $id_freetext
+6 7 $id_sil $id_sil
+3 2.3
+5 2.3
+7 0.0
+EOF
+ fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+ fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+ mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+fi
+
+if [ $stage -le 4 ]; then
+ echo "$0: creating neural net configs using the xconfig parser";
+ num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+ learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+ affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+ tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+ linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+ prefinal_opts="l2-regularize=0.01"
+ output_opts="l2-regularize=0.002"
+
+ mkdir -p $dir/configs
+ cat < $dir/configs/network.xconfig
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-1,0,1) affine-transform-file=$dir/configs/lda.mat
+
+ relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=$dim
+ tdnnf-layer name=tdnnf2 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf3 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf4 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf5 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf6 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf7 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf8 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=1
+ tdnnf-layer name=tdnnf9 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=0
+ tdnnf-layer name=tdnnf10 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf11 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf12 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf13 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf14 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf15 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf16 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf17 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf18 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf19 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ tdnnf-layer name=tdnnf20 $tdnnf_opts dim=$dim bottleneck-dim=$bn_dim time-stride=3
+ linear-component name=prefinal-l dim=30 $linear_opts
+
+ prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+ prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=$dim small-dim=30
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 6 ]; then
+ # no need to store the egs in a shared storage because we always
+ # remove them. Anyway, it takes only 5 minutes to generate them.
+
+ cp $tree_dir/phone_lm.fst $dir/phone_lm.fst
+
+ steps/nnet3/chain/train.py --stage=$train_stage \
+ --cmd="$decode_cmd" \
+ --feat.cmvn-opts="--config=conf/online_cmvn.conf" \
+ --chain.xent-regularize $xent_regularize \
+ --chain.leaky-hmm-coefficient=0.1 \
+ --chain.l2-regularize=0.0 \
+ --chain.apply-deriv-weights=false \
+ --chain.alignment-subsampling-factor=1 \
+ --trainer.add-option="--optimization.memory-compression-level=2" \
+ --trainer.srand=$srand \
+ --trainer.max-param-change=2.0 \
+ --trainer.num-epochs=$num_epochs \
+ --trainer.frames-per-iter $frames_per_iter \
+ --trainer.optimization.num-jobs-initial $num_jobs_initial \
+ --trainer.optimization.num-jobs-final $num_jobs_final \
+ --trainer.optimization.initial-effective-lrate 0.00005 \
+ --trainer.optimization.final-effective-lrate 0.000005 \
+ --trainer.optimization.backstitch-training-scale $bs_scale \
+ --trainer.num-chunk-per-minibatch=128,64 \
+ --trainer.optimization.momentum=0.0 \
+ --egs.chunk-width=$chunk_width \
+ --egs.chunk-left-context=0 \
+ --egs.chunk-right-context=0 \
+ --egs.chunk-left-context-initial=0 \
+ --egs.chunk-right-context-final=0 \
+ --egs.dir="$common_egs_dir" \
+ --egs.opts="--frames-overlap-per-eg 0 --online-cmvn $online_cmvn" \
+ --cleanup.remove-egs=$remove_egs \
+ --use-gpu=true \
+ --reporting.email="$reporting_email" \
+ --feat-dir $train_data_dir \
+ --tree-dir $tree_dir \
+ --lat-dir=$lat_dir \
+ --dir=$dir || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+ steps/online/nnet3/prepare_online_decoding.sh \
+ --mfcc-config conf/mfcc_hires.conf \
+ --online-cmvn-config conf/online_cmvn.conf \
+ $lang ${dir} ${dir}_online
+
+ rm $dir/.error 2>/dev/null || true
+ for wake_word in $wake_word0 $wake_word1; do
+ if [[ "$wake_word" == "$wake_word0" ]]; then
+ wake_word0_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+ wake_word1_cost_range="0.0"
+ else
+ wake_word0_cost_range="0.0"
+ wake_word1_cost_range="0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0"
+ fi
+ for wake_word0_cost in $wake_word0_cost_range; do
+ for wake_word1_cost in $wake_word1_cost_range; do
+ rm -rf $lang_decode
+ utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.0 \
+ --position-dependent-phones false \
+ data/local/dict "" $lang_decode/temp $lang_decode
+
+ sil_id=`cat $lang_decode/words.txt | grep "" | awk '{print $2}'`
+ freetext_id=`cat $lang_decode/words.txt | grep "FREETEXT" | awk '{print $2}'`
+ id0=`cat $lang_decode/words.txt | grep $wake_word0 | awk '{print $2}'`
+ id1=`cat $lang_decode/words.txt | grep $wake_word1 | awk '{print $2}'`
+ mkdir -p $lang_decode/lm
+ cat < $lang_decode/lm/fst.txt
+0 1 $sil_id $sil_id
+0 4 $sil_id $sil_id 7.0
+1 4 $freetext_id $freetext_id 0.0
+4 0 $sil_id $sil_id
+1 2 $id0 $id0 $wake_word0_cost
+1 3 $id1 $id1 $wake_word1_cost
+2 0 $sil_id $sil_id
+3 0 $sil_id $sil_id
+0
+EOF
+ fstcompile $lang_decode/lm/fst.txt $lang_decode/G.fst
+ set +e
+ fstisstochastic $lang_decode/G.fst
+ set -e
+ utils/validate_lang.pl $lang_decode
+ cp $lang/topo $lang_decode/topo
+
+ utils/lang/check_phones_compatible.sh \
+ data/lang/phones.txt $lang_decode/phones.txt
+ rm -rf $tree_dir/graph_online/HCLG.fst
+ utils/mkgraph.sh \
+ --self-loop-scale 1.0 $lang_decode \
+ $dir $tree_dir/graph_online || exit 1;
+
+ frames_per_chunk=150
+ for data in $test_sets; do
+ (
+ nj=30
+ steps/online/nnet3/decode_wake_word.sh \
+ --beam 10 --acwt 1.0 \
+ --wake-word $wake_word \
+ --extra-left-context-initial 0 \
+ --frames-per-chunk $frames_per_chunk \
+ --nj $nj --cmd "$decode_cmd" \
+ $tree_dir/graph_online data/${data}_hires ${dir}_online/decode_${data}_${wake_word}_cost${wake_word0_cost}_${wake_word1_cost} || exit 1
+ ) || touch $dir/.error &
+ done
+ wait
+ [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+ done
+ done
+ done
+ for data in $test_sets; do
+ for wake_word in $wake_word0 $wake_word1; do
+ echo "Results on $data set with wake word ${wake_word}:"
+ cat ${dir}_online/decode_${data}_${wake_word}_cost*/scoring_kaldi/all_results
+ done
+ done
+fi
diff --git a/egs/mobvoihotwords/v1/local/compute_metrics.py b/egs/mobvoihotwords/v1/local/compute_metrics.py
new file mode 120000
index 00000000000..695a2ca5f6d
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/compute_metrics.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/compute_metrics.py
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/copy_lat_dir.sh b/egs/mobvoihotwords/v1/local/copy_lat_dir.sh
new file mode 120000
index 00000000000..6be684730ad
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/copy_lat_dir.sh
@@ -0,0 +1 @@
+../../../../scripts/wakeword/copy_lat_dir.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/gen_topo.pl b/egs/mobvoihotwords/v1/local/gen_topo.pl
new file mode 120000
index 00000000000..fd5959cebaf
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/gen_topo.pl
@@ -0,0 +1 @@
+../../../../scripts/wakeword/gen_topo.pl
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/get_random_subsegments.py b/egs/mobvoihotwords/v1/local/get_random_subsegments.py
new file mode 120000
index 00000000000..24631471ff6
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/get_random_subsegments.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/get_random_subsegments.py
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/mobvoi_data_download.sh b/egs/mobvoihotwords/v1/local/mobvoi_data_download.sh
new file mode 100755
index 00000000000..9857c97ee80
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/mobvoi_data_download.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# Copyright 2018-2020 Yiming Wang
+# 2018-2020 Daniel Povey
+# Apache 2.0
+
+[ -f ./path.sh ] && . ./path.sh
+
+dl_dir=data/download
+
+mkdir -p $dl_dir
+
+dataset=mobvoi_hotword_dataset.tgz
+resources=mobvoi_hotword_dataset_resources.tgz
+
+# base url for downloads.
+data_url=http://www.openslr.org/resources/87
+
+if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
+ src_path=/export/fs04/a07/ywang/mobvoihotwords
+else
+ src_path=$dl_dir
+fi
+
+if [ ! -f $src_path/$dataset ] || [ ! -f $src_path/$resources ]; then
+ if ! which wget >/dev/null; then
+ echo "$0: wget is not installed."
+ exit 1;
+ fi
+
+ if [ ! -f $src_path/$dataset ]; then
+ echo "$0: downloading data from $data_url/$dataset. This may take some time, please be patient."
+ if ! wget --no-check-certificate -O $dl_dir/$dataset $data_url/$dataset; then
+ echo "$0: error executing wget $data_url/$dataset"
+ exit 1;
+ fi
+ fi
+
+ if [ ! -f $src_path/$resources ]; then
+ if ! wget --no-check-certificate -O $dl_dir/$resources $data_url/$resources; then
+ echo "$0: error executing wget $data_url/$resources"
+ exit 1;
+ fi
+ fi
+fi
+
+if [ -d $dl_dir/$(basename "$dataset" .tgz) ]; then
+ echo "Not extracting $(basename "$dataset" .tgz) as it is already there."
+else
+ echo "Extracting $dataset..."
+ tar -xvzf $src_path/$dataset -C $dl_dir || exit 1;
+ echo "Done extracting $dataset."
+fi
+
+if [ -d $dl_dir/$(basename "$resources" .tgz) ]; then
+ echo "Not extracting $(basename "$dataset" .tar.gz) as it is already there."
+else
+ echo "Extracting $resources..."
+ tar -xvzf $src_path/$resources -C $dl_dir || exit 1;
+ echo "Done extracting $resources."
+fi
+
+exit 0
diff --git a/egs/mobvoihotwords/v1/local/prepare_data.py b/egs/mobvoihotwords/v1/local/prepare_data.py
new file mode 100755
index 00000000000..3e11d313491
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/prepare_data.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2020 Yiming Wang
+# 2018-2020 Daniel Povey
+# Apache 2.0
+
+""" This script prepares the Mobvoi data into kaldi format.
+"""
+
+
+import argparse
+import os
+import sys
+import json
+
+def main():
+ parser = argparse.ArgumentParser(description="""Prepare data.""")
+ parser.add_argument("wav_dir", type=str,
+ help="dir containing all the wav files")
+ parser.add_argument("path", type=str,
+ help="path to the json file")
+ parser.add_argument("out_dir", type=str,
+ help="out dir")
+ parser.add_argument("--non-wake-word", type=str, default="FREETEXT",
+ help="non-wake word transcript")
+ args = parser.parse_args()
+
+ assert args.non_wake_word != "HiXiaowen" and args.non_wake_word != "NihaoWenwen"
+ with open(args.path, "r", encoding="utf-8") as f:
+ data = json.load(f)
+ utt_id, spk_id, wav_file, label = [], [], [], []
+ for entry in data:
+ utt_id.append(entry["utt_id"])
+ spk_id.append(entry["speaker_id"])
+ label.append(entry["keyword_id"])
+
+ abs_dir = os.path.abspath(args.wav_dir)
+ with open(os.path.join(args.out_dir, "wav.scp"), "w", encoding="utf-8") as f_wav, \
+ open(os.path.join(args.out_dir, "text"), "w", encoding="utf-8") as f_text, \
+ open(os.path.join(args.out_dir, 'utt2spk'), 'w', encoding="utf-8") as f_utt2spk:
+ for utt, spk, l in zip(utt_id, spk_id, label):
+ if spk is None:
+ spk = utt # deal with None speaker
+ f_wav.write(spk + "-" + utt + " " + os.path.join(abs_dir, utt + ".wav") + "\n")
+ if l == 0:
+ text = "HiXiaowen"
+ elif l == 1:
+ text = "NihaoWenwen"
+ else:
+ assert l == -1
+ text = args.non_wake_word
+ f_text.write(spk + "-" + utt + " " + text + "\n")
+ f_utt2spk.write(spk + "-" + utt + " " + spk + "\n")
+
+if __name__ == "__main__":
+ main()
diff --git a/egs/mobvoihotwords/v1/local/prepare_dict.sh b/egs/mobvoihotwords/v1/local/prepare_dict.sh
new file mode 100755
index 00000000000..afe17d57d3a
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/prepare_dict.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+
+set -e
+dir=data/local/dict
+
+. ./utils/parse_options.sh
+
+mkdir -p $dir
+
+# First get the set of all letters that occur in data/train/text
+echo "hixiaowen" > $dir/nonsilence_phones.txt
+echo "nihaowenwen" >> $dir/nonsilence_phones.txt
+echo "freetext" >> $dir/nonsilence_phones.txt
+
+echo "HiXiaowen hixiaowen" > $dir/lexicon.txt
+echo "NihaoWenwen nihaowenwen" >> $dir/lexicon.txt
+echo "FREETEXT freetext" >> $dir/lexicon.txt
+echo " SIL" >> $dir/lexicon.txt
+
+echo SIL > $dir/silence_phones.txt
+
+echo SIL >$dir/optional_silence.txt
+
+echo -n "" >$dir/extra_questions.txt
diff --git a/egs/mobvoihotwords/v1/local/score_online.sh b/egs/mobvoihotwords/v1/local/score_online.sh
new file mode 120000
index 00000000000..c2b12f23b08
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/score_online.sh
@@ -0,0 +1 @@
+../../../mobvoi/v1/local/score_online.sh
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/local/wer_output_filter b/egs/mobvoihotwords/v1/local/wer_output_filter
new file mode 100755
index 00000000000..bb4de1d1572
--- /dev/null
+++ b/egs/mobvoihotwords/v1/local/wer_output_filter
@@ -0,0 +1,24 @@
+#!/usr/bin/env perl
+# Copyright 2012-2014 Johns Hopkins University (Author: Yenda Trmal)
+# Apache 2.0
+use utf8;
+
+use open qw(:encoding(utf8));
+binmode STDIN, ":utf8";
+binmode STDOUT, ":utf8";
+binmode STDERR, ":utf8";
+
+while (<>) {
+ @F = split " ";
+ print $F[0] . " ";
+ foreach $s (@F[1..$#F]) {
+ if ($s =~ /\<.*\>/) {
+ print "";
+ } else {
+ print "$s "
+ }
+ }
+ print "\n";
+}
+
+
diff --git a/egs/mobvoihotwords/v1/path.sh b/egs/mobvoihotwords/v1/path.sh
new file mode 100755
index 00000000000..2d17b17a84a
--- /dev/null
+++ b/egs/mobvoihotwords/v1/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/mobvoihotwords/v1/run.sh b/egs/mobvoihotwords/v1/run.sh
new file mode 100755
index 00000000000..6a5adc14527
--- /dev/null
+++ b/egs/mobvoihotwords/v1/run.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+# Copyright 2018-2020 Daniel Povey
+# 2018-2020 Yiming Wang
+
+stage=0
+
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -euo pipefail
+
+if [ $stage -le 0 ]; then
+ local/mobvoi_data_download.sh
+ echo "$0: Extracted all datasets into data/download/"
+fi
+
+if [ $stage -le 1 ]; then
+ echo "$0: Preparing datasets..."
+ wav_dir=data/download/mobvoi_hotword_dataset
+ for folder in train dev eval; do
+ mkdir -p data/$folder
+ for prefix in p n; do
+ mkdir -p data/${prefix}_$folder
+ json_path=data/download/mobvoi_hotword_dataset_resources/${prefix}_$folder.json
+ if [ $folder = "eval" ]; then
+ json_path=data/download/mobvoi_hotword_dataset_resources/${prefix}_test.json
+ fi
+ local/prepare_data.py $wav_dir $json_path data/${prefix}_$folder --non-wake-word "FREETEXT"
+ done
+ cat data/p_$folder/wav.scp data/n_$folder/wav.scp > data/$folder/wav.scp
+ cat data/p_$folder/text data/n_$folder/text > data/$folder/text
+ cat data/p_$folder/utt2spk data/n_$folder/utt2spk > data/$folder/utt2spk
+ rm -rf data/p_$folder data/n_$folder
+ done
+ echo "$0: text, utt2spk and wav.scp have been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: Extracting MFCC..."
+ for folder in train dev eval; do
+ dir=data/$folder
+ utils/fix_data_dir.sh $dir
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 16 $dir
+ steps/compute_cmvn_stats.sh $dir
+ utils/fix_data_dir.sh $dir
+ utils/data/get_utt2dur.sh $dir
+ utils/validate_data_dir.sh $dir
+ done
+fi
+
+if [ $stage -le 3 ]; then
+ echo "$0: Preparing dictionary and lang..."
+ local/prepare_dict.sh
+ utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.5 \
+ --position-dependent-phones false \
+ data/local/dict "" data/lang/temp data/lang
+fi
+
+if [ $stage -le 4 ]; then
+ id_sil=`cat data/lang/words.txt | grep "" | awk '{print $2}'`
+ id_freetext=`cat data/lang/words.txt | grep "FREETEXT" | awk '{print $2}'`
+ id_word0=`cat data/lang/words.txt | grep "HiXiaowen" | awk '{print $2}'`
+ id_word1=`cat data/lang/words.txt | grep "NihaoWenwen" | awk '{print $2}'`
+ mkdir -p data/lang/lm
+ cat < data/lang/lm/fst.txt
+0 1 $id_sil $id_sil
+0 4 $id_sil $id_sil 7.0
+1 4 $id_freetext $id_freetext 0.0
+4 0 $id_sil $id_sil
+1 2 $id_word0 $id_word0 2.3
+2 0 $id_sil $id_sil
+1 3 $id_word1 $id_word1 2.3
+3 0 $id_sil $id_sil
+0
+EOF
+ fstcompile data/lang/lm/fst.txt data/lang/G.fst
+ set +e
+ fstisstochastic data/lang/G.fst
+ set -e
+ utils/validate_lang.pl data/lang
+fi
+
+if [ $stage -le 5 ]; then
+ echo "$0: subsegmenting for the training data..."
+ srcdir=data/train
+ utils/data/convert_data_dir_to_whole.sh $srcdir ${srcdir}_whole
+
+ utils/data/get_segments_for_data.sh $srcdir > ${srcdir}_whole/segments
+ utils/filter_scp.pl <(awk '{if ($2 == "FREETEXT") print $1}' ${srcdir}_whole/text) \
+ ${srcdir}_whole/segments >${srcdir}_whole/neg_segments
+ utils/filter_scp.pl --exclude ${srcdir}_whole/neg_segments ${srcdir}_whole/segments \
+ >${srcdir}_whole/pos_segments
+ utils/filter_scp.pl ${srcdir}_whole/pos_segments ${srcdir}_whole/utt2dur >${srcdir}_whole/pos_utt2dur
+ local/get_random_subsegments.py --overlap-duration=0.3 --max-remaining-duration=0.3 \
+ ${srcdir}_whole/neg_segments ${srcdir}_whole/pos_utt2dur | \
+ cat ${srcdir}_whole/pos_segments - | sort >${srcdir}_whole/sub_segments
+ utils/data/subsegment_data_dir.sh ${srcdir}_whole \
+ ${srcdir}_whole/sub_segments data/train_segmented
+ awk '{print $1,$2}' ${srcdir}_whole/sub_segments | \
+ utils/apply_map.pl -f 2 ${srcdir}_whole/text >data/train_segmented/text
+ utils/data/extract_wav_segments_data_dir.sh --nj 50 --cmd "$train_cmd" \
+ data/train_segmented data/train_shorter
+ steps/compute_cmvn_stats.sh data/train_shorter
+ utils/fix_data_dir.sh data/train_shorter
+ utils/validate_data_dir.sh data/train_shorter
+fi
+
+# In this section, we augment the training data with reverberation,
+# noise, music, and babble, and combined it with the clean data.
+if [ $stage -le 6 ]; then
+ utils/data/get_utt2dur.sh data/train_shorter
+ cp data/train/utt2dur data/train/reco2dur
+ # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+ [ ! -f rirs_noises.zip ] && wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+ [ ! -d "RIRS_NOISES" ] && unzip rirs_noises.zip
+
+ # Make a version with reverberated speech
+ rvb_opts=()
+ rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+ rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+ # Make a reverberated version of the SWBD+SRE list. Note that we don't add any
+ # additive noise here.
+ steps/data/reverberate_data_dir.py \
+ "${rvb_opts[@]}" \
+ --speech-rvb-probability 1 \
+ --prefix "rev" \
+ --pointsource-noise-addition-probability 0 \
+ --isotropic-noise-addition-probability 0 \
+ --num-replications 1 \
+ --source-sampling-rate 16000 \
+ data/train_shorter data/train_shorter_reverb
+ cat data/train_shorter/utt2dur | awk -v name=rev1 '{print name"-"$0}' >data/train_shorter_reverb/utt2dur
+
+ # Prepare the MUSAN corpus, which consists of music, speech, and noise
+ # suitable for augmentation.
+ steps/data/make_musan.sh /export/corpora/JHU/musan data
+
+ # Get the duration of the MUSAN recordings. This will be used by the
+ # script augment_data_dir.py.
+ for name in speech noise music; do
+ utils/data/get_utt2dur.sh data/musan_${name}
+ cp data/musan_${name}/utt2dur data/musan_${name}/reco2dur
+ done
+
+ # Augment with musan_noise
+ export LC_ALL=en_US.UTF-8
+ steps/data/augment_data_dir.py --utt-prefix "noise" --modify-spk-id true --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_shorter data/train_shorter_noise
+ # Augment with musan_music
+ steps/data/augment_data_dir.py --utt-prefix "music" --modify-spk-id true --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_shorter data/train_shorter_music
+ # Augment with musan_speech
+ steps/data/augment_data_dir.py --utt-prefix "babble" --modify-spk-id true --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_shorter data/train_shorter_babble
+ export LC_ALL=C
+fi
+
+if [ $stage -le 7 ]; then
+ # Now make MFCC features
+ for name in reverb noise music babble; do
+ steps/make_mfcc.sh --nj 16 --cmd "$train_cmd" \
+ data/train_shorter_${name} || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_${name}
+ utils/fix_data_dir.sh data/train_shorter_${name}
+ utils/validate_data_dir.sh data/train_shorter_${name}
+ done
+fi
+
+# monophone training
+if [ $stage -le 8 ]; then
+ steps/train_mono.sh --nj 50 --cmd "$train_cmd" \
+ data/train_shorter data/lang exp/mono
+ (
+ utils/mkgraph.sh data/lang \
+ exp/mono exp/mono/graph
+ )&
+
+ steps/align_si.sh --nj 50 --cmd "$train_cmd" \
+ data/train_shorter data/lang exp/mono exp/mono_ali_train_shorter
+fi
+
+if [ $stage -le 9 ]; then
+ echo "$0: preparing for low-resolution speed-perturbed data (for alignment)"
+ utils/data/perturb_data_dir_speed_3way.sh data/train_shorter data/train_shorter_sp
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 30 data/train_shorter_sp || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_sp || exit 1;
+ utils/fix_data_dir.sh data/train_shorter_sp
+fi
+
+if [ $stage -le 10 ]; then
+ echo "$0: aligning with the perturbed low-resolution data"
+ steps/align_fmllr.sh --nj 50 --cmd "$train_cmd" \
+ data/train_shorter_sp data/lang exp/mono exp/mono_ali_train_shorter_sp || exit 1
+fi
+
+if [ $stage -le 11 ]; then
+ echo "$0: creating high-resolution MFCC features"
+ mfccdir=data/train_shorter_sp_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(dte +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+ fi
+
+ for datadir in train_shorter_sp dev eval; do
+ utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+ done
+
+ # do volume-perturbation on the training data prior to extracting hires
+ # features; this helps make trained nnets more invariant to test data volume.
+ utils/data/perturb_data_dir_volume.sh data/train_shorter_sp_hires || exit 1;
+
+ for datadir in train_shorter_sp dev eval; do
+ steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+ utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+ done
+fi
+
+combined_train_set=train_shorter_sp_combined
+aug_affix="reverb noise music babble"
+if [ $stage -le 12 ]; then
+ for name in $aug_affix; do
+ echo "$0: creating high-resolution MFCC features for train_shorter_${name}"
+ mfccdir=data/train_shorter_${name}_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/mobvoi-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+ fi
+ utils/copy_data_dir.sh data/train_shorter_${name} data/train_shorter_${name}_hires
+ steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/train_shorter_${name}_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_${name}_hires || exit 1;
+ utils/fix_data_dir.sh data/train_shorter_${name}_hires || exit 1;
+ done
+ eval utils/combine_data.sh data/${combined_train_set}_hires data/train_shorter_sp_hires \
+ data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}_hires
+fi
+
+
+if [ $stage -le 13 ]; then
+ local/chain/run_tdnn.sh --train-set train_shorter --combined-train-set ${combined_train_set}
+fi
+
+exit 0
+
diff --git a/egs/mobvoihotwords/v1/run_e2e.sh b/egs/mobvoihotwords/v1/run_e2e.sh
new file mode 100755
index 00000000000..540adc0cfb9
--- /dev/null
+++ b/egs/mobvoihotwords/v1/run_e2e.sh
@@ -0,0 +1,299 @@
+#!/bin/bash
+# Copyright 2018-2020 Daniel Povey
+# 2018-2020 Yiming Wang
+
+# Please visit https://github.com/snipsco/keyword-spotting-research-datasets for downloading the dataset.
+
+# This recipe uses E2E LF-MMI training which doesn't require GMM training to obtain alignments.
+# Its performance is slightly better than those based on alignments (cross-entropy or regular LF-MMI)
+# on this dataset.
+
+stage=0
+
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+set -euo pipefail
+
+if [ $stage -le 0 ]; then
+ local/mobvoi_data_download.sh
+ echo "$0: Extracted all datasets into data/download/"
+fi
+
+if [ $stage -le 1 ]; then
+ echo "$0: Preparing datasets..."
+ wav_dir=data/download/mobvoi_hotword_dataset
+ for folder in train dev eval; do
+ mkdir -p data/$folder
+ for prefix in p n; do
+ mkdir -p data/${prefix}_$folder
+ json_path=data/download/mobvoi_hotword_dataset_resources/${prefix}_$folder.json
+ if [ $folder = "eval" ]; then
+ json_path=data/download/mobvoi_hotword_dataset_resources/${prefix}_test.json
+ fi
+ local/prepare_data.py $wav_dir $json_path data/${prefix}_$folder --non-wake-word "FREETEXT"
+ done
+ cat data/p_$folder/wav.scp data/n_$folder/wav.scp > data/$folder/wav.scp
+ cat data/p_$folder/text data/n_$folder/text > data/$folder/text
+ cat data/p_$folder/utt2spk data/n_$folder/utt2spk > data/$folder/utt2spk
+ rm -rf data/p_$folder data/n_$folder
+ done
+ echo "$0: text, utt2spk and wav.scp have been generated in data/{train|dev|eval}."
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: Extracting MFCC..."
+ for folder in train dev eval; do
+ dir=data/$folder
+ utils/fix_data_dir.sh $dir
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 16 $dir
+ steps/compute_cmvn_stats.sh $dir
+ utils/fix_data_dir.sh $dir
+ utils/data/get_utt2dur.sh $dir
+ utils/validate_data_dir.sh $dir
+ done
+fi
+
+if [ $stage -le 3 ]; then
+ echo "$0: Preparing dictionary and lang..."
+ local/prepare_dict.sh
+ utils/prepare_lang.sh --num-sil-states 1 --num-nonsil-states 4 --sil-prob 0.5 \
+ --position-dependent-phones false \
+ data/local/dict "" data/lang/temp data/lang
+fi
+
+if [ $stage -le 4 ]; then
+ id_sil=`cat data/lang/words.txt | grep "" | awk '{print $2}'`
+ id_freetext=`cat data/lang/words.txt | grep "FREETEXT" | awk '{print $2}'`
+ id_word0=`cat data/lang/words.txt | grep "HiXiaowen" | awk '{print $2}'`
+ id_word1=`cat data/lang/words.txt | grep "NihaoWenwen" | awk '{print $2}'`
+ mkdir -p data/lang/lm
+ cat < data/lang/lm/fst.txt
+0 1 $id_sil $id_sil
+0 4 $id_sil $id_sil 7.0
+1 4 $id_freetext $id_freetext 0.0
+4 0 $id_sil $id_sil
+1 2 $id_word0 $id_word0 2.3
+2 0 $id_sil $id_sil
+1 3 $id_word1 $id_word1 2.3
+3 0 $id_sil $id_sil
+0
+EOF
+ fstcompile data/lang/lm/fst.txt data/lang/G.fst
+ set +e
+ fstisstochastic data/lang/G.fst
+ set -e
+ utils/validate_lang.pl data/lang
+fi
+
+if [ $stage -le 5 ]; then
+ echo "$0: subsegmenting for the training data..."
+ srcdir=data/train
+ utils/data/convert_data_dir_to_whole.sh $srcdir ${srcdir}_whole
+
+ utils/data/get_segments_for_data.sh $srcdir > ${srcdir}_whole/segments
+ utils/filter_scp.pl <(awk '{if ($2 == "FREETEXT") print $1}' ${srcdir}_whole/text) \
+ ${srcdir}_whole/segments >${srcdir}_whole/neg_segments
+ utils/filter_scp.pl --exclude ${srcdir}_whole/neg_segments ${srcdir}_whole/segments \
+ >${srcdir}_whole/pos_segments
+ utils/filter_scp.pl ${srcdir}_whole/pos_segments ${srcdir}_whole/utt2dur >${srcdir}_whole/pos_utt2dur
+ local/get_random_subsegments.py --overlap-duration=0.3 --max-remaining-duration=0.3 \
+ ${srcdir}_whole/neg_segments ${srcdir}_whole/pos_utt2dur | \
+ cat ${srcdir}_whole/pos_segments - | sort >${srcdir}_whole/sub_segments
+ utils/data/subsegment_data_dir.sh ${srcdir}_whole \
+ ${srcdir}_whole/sub_segments data/train_segmented
+ awk '{print $1,$2}' ${srcdir}_whole/sub_segments | \
+ utils/apply_map.pl -f 2 ${srcdir}_whole/text >data/train_segmented/text
+ utils/data/extract_wav_segments_data_dir.sh --nj 50 --cmd "$train_cmd" \
+ data/train_segmented data/train_shorter
+ steps/compute_cmvn_stats.sh data/train_shorter
+ utils/fix_data_dir.sh data/train_shorter
+ utils/validate_data_dir.sh data/train_shorter
+fi
+
+# In this section, we augment the training data with reverberation,
+# noise, music, and babble, and combined it with the clean data.
+if [ $stage -le 6 ]; then
+ utils/data/get_utt2dur.sh data/train_shorter
+ cp data/train_shorter/utt2dur data/train_shorter/reco2dur
+ # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+ [ ! -f rirs_noises.zip ] && wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+ [ ! -d "RIRS_NOISES" ] && unzip rirs_noises.zip
+
+ # Make a version with reverberated speech
+ rvb_opts=()
+ rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
+ rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
+
+ # Make a reverberated version of the SWBD+SRE list. Note that we don't add any
+ # additive noise here.
+ python3 steps/data/reverberate_data_dir.py \
+ "${rvb_opts[@]}" \
+ --speech-rvb-probability 1 \
+ --prefix "rev" \
+ --pointsource-noise-addition-probability 0 \
+ --isotropic-noise-addition-probability 0 \
+ --num-replications 1 \
+ --source-sampling-rate 16000 \
+ data/train_shorter data/train_shorter_reverb
+ cat data/train_shorter/utt2dur | awk -v name=rev1 '{print name"-"$0}' >data/train_shorter_reverb/utt2dur
+
+ # Prepare the MUSAN corpus, which consists of music, speech, and noise
+ # suitable for augmentation.
+ steps/data/make_musan.sh /export/corpora/JHU/musan data
+
+ # Get the duration of the MUSAN recordings. This will be used by the
+ # script augment_data_dir.py.
+ for name in speech noise music; do
+ utils/data/get_utt2dur.sh data/musan_${name}
+ cp data/musan_${name}/utt2dur data/musan_${name}/reco2dur
+ done
+
+ # Augment with musan_noise
+ steps/data/augment_data_dir.py --utt-prefix "noise" --modify-spk-id true --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_shorter data/train_shorter_noise
+ # Augment with musan_music
+ steps/data/augment_data_dir.py --utt-prefix "music" --modify-spk-id true --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_shorter data/train_shorter_music
+ # Augment with musan_speech
+ steps/data/augment_data_dir.py --utt-prefix "babble" --modify-spk-id true --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_shorter data/train_shorter_babble
+fi
+
+if [ $stage -le 7 ]; then
+ # Now make MFCC features
+ for name in reverb noise music babble; do
+ steps/make_mfcc.sh --nj 16 --cmd "$train_cmd" \
+ data/train_shorter_${name} || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_${name}
+ utils/fix_data_dir.sh data/train_shorter_${name}
+ utils/validate_data_dir.sh data/train_shorter_${name}
+ done
+fi
+
+combined_train_set=train_shorter_combined
+aug_affix="reverb noise music babble"
+if [ $stage -le 8 ]; then
+ eval utils/combine_data.sh data/${combined_train_set} data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}
+fi
+
+if [ -f data/${combined_train_set}_spe2e_hires/feats.scp ]; then
+ echo "$0: It seems that features for the perturbed training data already exist."
+ echo "If you want to extract them anyway, remove them first and run this"
+ echo "stage again. Skipping this stage..."
+else
+ if [ $stage -le 9 ]; then
+ echo "$0: perturbing the training data to allowed lengths..."
+ utils/data/get_utt2dur.sh data/${combined_train_set} # necessary for the next command
+
+ # 12 in the following command means the allowed lengths are spaced
+ # by 12% change in length.
+ utils/data/perturb_speed_to_allowed_lengths.py --speed-perturb false 12 data/${combined_train_set} \
+ data/${combined_train_set}_e2e_hires
+ cat data/${combined_train_set}_e2e_hires/utt2dur | \
+ awk '{print $1 " " substr($1,5)}' >data/${combined_train_set}_e2e_hires/utt2uniq.tmp
+ utils/apply_map.pl -f 2 data/${combined_train_set}/utt2uniq \
+ data/${combined_train_set}_e2e_hires/utt2uniq
+ rm -f data/${combined_train_set}_e2e_hires/utt2uniq.tmp 2>/dev/null || true
+ utils/fix_data_dir.sh data/${combined_train_set}_e2e_hires
+
+ utils/data/get_utt2dur.sh data/train_shorter # necessary for the next command
+ utils/data/perturb_speed_to_allowed_lengths.py 12 data/train_shorter data/train_shorter_spe2e_hires
+ cat data/train_shorter_spe2e_hires/utt2dur | \
+ awk '{print $1 " " substr($1,5)}' >data/train_shorter_spe2e_hires/utt2uniq
+ utils/fix_data_dir.sh data/train_shorter_spe2e_hires
+ utils/combine_data.sh data/${combined_train_set}_spe2e_hires data/${combined_train_set}_e2e_hires data/train_shorter_spe2e_hires
+ cat data/train_shorter_spe2e_hires/allowed_lengths.txt >data/${combined_train_set}_spe2e_hires/allowed_lengths.txt
+ fi
+
+ if [ $stage -le 10 ]; then
+ echo "$0: extracting MFCC features for the training data..."
+ mfccdir=data/${combined_train_set}_spe2e_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+ fi
+ steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" \
+ data/${combined_train_set}_spe2e_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/${combined_train_set}_spe2e_hires || exit 1;
+ utils/fix_data_dir.sh data/${combined_train_set}_spe2e_hires
+ utils/validate_data_dir.sh data/${combined_train_set}_spe2e_hires
+ fi
+fi
+
+if [ $stage -le 11 ]; then
+ if [ -f data/eval_hires/feats.scp ]; then
+ echo "$0: It seems that features for the test sets already exist."
+ echo "skipping this stage..."
+ else
+ echo "$0: extracting MFCC features for the test sets"
+ for datadir in dev eval; do
+ utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/${datadir}_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1;
+ utils/fix_data_dir.sh data/${datadir}_hires || exit 1;
+ done
+ fi
+fi
+
+if [ $stage -le 12 ]; then
+ local/chain/run_e2e_tdnn.sh --train-set ${combined_train_set}_spe2e
+fi
+
+combined_train_set=train_shorter_sp_combined
+if [ -f data/${combined_train_set}_hires/feats.scp ]; then
+ echo "$0: It seems that features for the perturbed training data already exist."
+ echo "If you want to extract them anyway, remove them first and run this"
+ echo "stage again. Skipping this stage..."
+else
+ if [ $stage -le 13 ]; then
+ echo "$0: preparing for speed-perturbed data"
+ utils/data/perturb_data_dir_speed_3way.sh data/train_shorter data/train_shorter_sp_hires
+ echo "$0: creating high-resolution MFCC features for speed-perturbed data"
+ mfccdir=data/train_shorter_sp_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+ fi
+
+ # do volume-perturbation on the training data prior to extracting hires
+ # features; this helps make trained nnets more invariant to test data volume.
+ utils/data/perturb_data_dir_volume.sh data/train_shorter_sp_hires || exit 1;
+
+ steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/train_shorter_sp_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_sp_hires || exit 1;
+ utils/fix_data_dir.sh data/train_shorter_sp_hires || exit 1;
+ fi
+
+ if [ $stage -le 14 ]; then
+ for name in $aug_affix; do
+ echo "$0: creating high-resolution MFCC features for train_shorter_${name}"
+ mfccdir=data/train_shorter_${name}_hires/data
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+ utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/snips-$(date +'%m_%d_%H_%M')/v1/$mfccdir/storage $mfccdir/storage
+ fi
+ utils/copy_data_dir.sh data/train_shorter_${name} data/train_shorter_${name}_hires
+ steps/make_mfcc.sh --nj 50 --mfcc-config conf/mfcc_hires.conf \
+ --cmd "$train_cmd" data/train_shorter_${name}_hires || exit 1;
+ steps/compute_cmvn_stats.sh data/train_shorter_${name}_hires || exit 1;
+ utils/fix_data_dir.sh data/train_shorter_${name}_hires || exit 1;
+ done
+ eval utils/combine_data.sh data/${combined_train_set}_hires data/train_shorter_sp_hires \
+ data/train_shorter_{$(echo $aug_affix | sed 's/ /,/g')}_hires
+ fi
+fi
+
+if [ $stage -le 15 ]; then
+ echo "$0: Aligning the training data using the e2e chain model..."
+ steps/nnet3/align.sh --nj 50 --cmd "$train_cmd" \
+ --use-gpu false \
+ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
+ data/${combined_train_set}_hires data/lang exp/chain/e2e_tdnn_1a exp/chain/e2e_ali_${combined_train_set}
+fi
+
+if [ $stage -le 16 ]; then
+ echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
+ local/chain/run_tdnn_e2eali.sh --train-set ${combined_train_set} --e2echain-model-dir exp/chain/e2e_tdnn_1a
+fi
+
+exit 0
diff --git a/egs/mobvoihotwords/v1/steps b/egs/mobvoihotwords/v1/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/mobvoihotwords/v1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/mobvoihotwords/v1/utils b/egs/mobvoihotwords/v1/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/mobvoihotwords/v1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/multi_cn/s5/README.md b/egs/multi_cn/s5/README.md
index 4cfcb8d6941..fed933512b9 100644
--- a/egs/multi_cn/s5/README.md
+++ b/egs/multi_cn/s5/README.md
@@ -5,6 +5,7 @@ This is a Chinese speech recognition recipe that trains on all Chinese corpora o
* Primewords (99 hours)
* ST-CMDS (110 hours)
* THCHS-30 (26 hours)
+* optional AISHELL2 (~1000 hours) if available
This recipe was developed by Xingyu Na (Microsoft Corporation) and Hui Bu (AISHELL Foundation).
diff --git a/egs/multi_cn/s5/RESULTS b/egs/multi_cn/s5/RESULTS
index 16b50c61cdb..0b9f652a2ff 100644
--- a/egs/multi_cn/s5/RESULTS
+++ b/egs/multi_cn/s5/RESULTS
@@ -6,8 +6,8 @@
%WER 19.03 [ 19941 / 104765, 725 ins, 1222 del, 17994 sub ] exp/tri3a/decode_aishell_test_tg/cer_13_0.5
%WER 21.68 [ 22710 / 104765, 902 ins, 2361 del, 19447 sub ] exp/tri4a/decode_aishell_test_tg/cer_14_0.0
%WER 16.64 [ 17436 / 104765, 857 ins, 706 del, 15873 sub ] exp/tri4a_cleaned/decode_aishell_test_tg/cer_14_0.5
-%WER 6.01 [ 6299 / 104765, 129 ins, 175 del, 5995 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_aishell_tg/cer_11_1.0
-%WER 6.01 [ 6298 / 104765, 128 ins, 176 del, 5994 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_aishell_tg/cer_11_1.0
+%WER 5.90 [ 6176 / 104765, 119 ins, 169 del, 5888 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_aishell_tg/cer_11_1.0
+%WER 5.90 [ 6177 / 104765, 121 ins, 168 del, 5888 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_aishell_tg/cer_11_1.0
# aidatatang test set results
%WER 33.86 [ 158799 / 468933, 3856 ins, 33811 del, 121132 sub ] exp/tri1b/decode_aidatatang_test_tg/cer_14_0.0
@@ -15,8 +15,8 @@
%WER 23.67 [ 111009 / 468933, 4535 ins, 19118 del, 87356 sub ] exp/tri3a/decode_aidatatang_test_tg/cer_14_0.0
%WER 20.01 [ 93829 / 468933, 4563 ins, 16970 del, 72296 sub ] exp/tri4a/decode_aidatatang_test_tg/cer_15_0.0
%WER 17.85 [ 83717 / 468933, 6506 ins, 13716 del, 63495 sub ] exp/tri4a_cleaned/decode_aidatatang_test_tg/cer_15_0.0
-%WER 4.99 [ 23403 / 468933, 1954 ins, 3371 del, 18078 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_aidatatang_tg/cer_11_0.0
-%WER 4.99 [ 23385 / 468933, 1965 ins, 3356 del, 18064 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_aidatatang_tg/cer_11_0.0
+%WER 4.98 [ 23370 / 468933, 2190 ins, 3188 del, 17992 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_aidatatang_tg/cer_10_0.0
+%WER 4.98 [ 23371 / 468933, 2224 ins, 3171 del, 17976 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_aidatatang_tg/cer_10_0.0
# magicdata test set results
%WER 27.01 [ 64815 / 239927, 4838 ins, 14852 del, 45125 sub ] exp/tri1b/decode_magicdata_test_tg/cer_17_0.0
@@ -24,8 +24,8 @@
%WER 22.42 [ 53784 / 239927, 6513 ins, 7409 del, 39862 sub ] exp/tri3a/decode_magicdata_test_tg/cer_17_0.0
%WER 15.45 [ 37076 / 239927, 3942 ins, 5217 del, 27917 sub ] exp/tri4a/decode_magicdata_test_tg/cer_17_0.0
%WER 13.99 [ 33568 / 239927, 6267 ins, 3705 del, 23596 sub ] exp/tri4a_cleaned/decode_magicdata_test_tg/cer_17_0.5
-%WER 4.21 [ 10112 / 239927, 1443 ins, 1927 del, 6742 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_magicdata_tg/cer_11_0.5
-%WER 4.23 [ 10158 / 239927, 1299 ins, 2032 del, 6827 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_magicdata_tg/cer_11_1.0
+%WER 4.24 [ 10180 / 239927, 1405 ins, 2001 del, 6774 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_magicdata_tg/cer_11_1.0
+%WER 4.25 [ 10188 / 239927, 1428 ins, 1997 del, 6763 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_magicdata_tg/cer_11_1.0
# thchs test set results
%WER 35.75 [ 29005 / 81139, 353 ins, 1824 del, 26828 sub ] exp/tri1b/decode_thchs_test_tg/cer_10_1.0
@@ -33,8 +33,8 @@
%WER 30.26 [ 24549 / 81139, 328 ins, 1412 del, 22809 sub ] exp/tri3a/decode_thchs_test_tg/cer_10_1.0
%WER 27.67 [ 22449 / 81139, 410 ins, 1102 del, 20937 sub ] exp/tri4a/decode_thchs_test_tg/cer_10_0.5
%WER 25.41 [ 20615 / 81139, 399 ins, 847 del, 19369 sub ] exp/tri4a_cleaned/decode_thchs_test_tg/cer_11_0.5
-%WER 13.02 [ 10561 / 81139, 134 ins, 261 del, 10166 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_thchs_tg/cer_9_1.0
-%WER 13.00 [ 10552 / 81139, 132 ins, 259 del, 10161 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_thchs_tg/cer_9_1.0
+%WER 12.96 [ 10514 / 81139, 120 ins, 300 del, 10094 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp/decode_thchs_tg/cer_10_1.0
+%WER 12.94 [ 10499 / 81139, 120 ins, 299 del, 10080 sub ] exp/chain_cleaned/tdnn_cnn_1a_sp_online/decode_thchs_tg/cer_10_1.0
# GMM results w/ corpus LM
# ./run.sh --stage 17 --corpus-lm true
diff --git a/egs/multi_cn/s5/local/aidatatang_data_prep.sh b/egs/multi_cn/s5/local/aidatatang_data_prep.sh
index 518a0e99866..93898338722 100755
--- a/egs/multi_cn/s5/local/aidatatang_data_prep.sh
+++ b/egs/multi_cn/s5/local/aidatatang_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Xingyu Na
# Apache 2.0
diff --git a/egs/multi_cn/s5/local/aidatatang_download_and_untar.sh b/egs/multi_cn/s5/local/aidatatang_download_and_untar.sh
index a2616ba0e20..2cbf88f8190 100755
--- a/egs/multi_cn/s5/local/aidatatang_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/aidatatang_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# 2017 Xingyu Na
diff --git a/egs/multi_cn/s5/local/aishell2_data_prep.sh b/egs/multi_cn/s5/local/aishell2_data_prep.sh
new file mode 100755
index 00000000000..d281083cb04
--- /dev/null
+++ b/egs/multi_cn/s5/local/aishell2_data_prep.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
+# 2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
+# Apache 2.0
+
+# This script is copied from aishell2/s5/local/prepare_data.sh
+# but using difference word segmentation script.
+
+# transform raw AISHELL-2 data to kaldi format
+
+. ./path.sh || exit 1;
+
+tmp=
+dir=
+
+if [ $# != 2 ]; then
+ echo "Usage: $0 "
+ echo " $0 /export/AISHELL-2/iOS/train data/train"
+ exit 1;
+fi
+
+corpus=$1
+dir=$2/train
+tmp=$2/tmp
+
+echo "prepare_data.sh: Preparing data in $corpus"
+
+mkdir -p $tmp
+mkdir -p $dir
+
+# corpus check
+if [ ! -d $corpus ] || [ ! -f $corpus/wav.scp ] || [ ! -f $corpus/trans.txt ]; then
+ echo "Error: $0 requires wav.scp and trans.txt under $corpus directory."
+ exit 1;
+fi
+
+# validate utt-key list
+awk '{print $1}' $corpus/wav.scp > $tmp/wav_utt.list
+awk '{print $1}' $corpus/trans.txt > $tmp/trans_utt.list
+utils/filter_scp.pl -f 1 $tmp/wav_utt.list $tmp/trans_utt.list > $tmp/utt.list
+
+# wav.scp
+awk -F'\t' -v path_prefix=$corpus '{printf("%s\t%s/%s\n",$1,path_prefix,$2)}' $corpus/wav.scp > $tmp/tmp_wav.scp
+utils/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_wav.scp | sort -k 1 | uniq > $tmp/wav.scp
+
+# text
+dos2unix < $corpus/trans.txt | \
+ utils/filter_scp.pl -f 1 $tmp/utt.list - | \
+ sort -k 1 | uniq | tr '[a-z]' '[A-Z]' | \
+ local/word_segment.py > $tmp/text
+
+# utt2spk & spk2utt
+awk -F'\t' '{print $2}' $tmp/wav.scp > $tmp/wav.list
+sed -e 's:\.wav::g' $tmp/wav.list | \
+ awk -F'/' '{i=NF-1;printf("%s\t%s\n",$NF,$i)}' > $tmp/tmp_utt2spk
+utils/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_utt2spk | sort -k 1 | uniq > $tmp/utt2spk
+utils/utt2spk_to_spk2utt.pl $tmp/utt2spk | sort -k 1 | uniq > $tmp/spk2utt
+
+# copy prepared resources from tmp_dir to target dir
+mkdir -p $dir
+for f in wav.scp text spk2utt utt2spk; do
+ cp $tmp/$f $dir/$f || exit 1;
+done
+
+utils/data/validate_data_dir.sh --no-feats $dir || exit 1;
+echo "local/prepare_data.sh succeeded"
+exit 0;
diff --git a/egs/multi_cn/s5/local/aishell_data_prep.sh b/egs/multi_cn/s5/local/aishell_data_prep.sh
index 7896e208f33..25f75fc1ae2 100755
--- a/egs/multi_cn/s5/local/aishell_data_prep.sh
+++ b/egs/multi_cn/s5/local/aishell_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Xingyu Na
# Apache 2.0
diff --git a/egs/multi_cn/s5/local/aishell_download_and_untar.sh b/egs/multi_cn/s5/local/aishell_download_and_untar.sh
index e251a9aae2f..74a8e36cf2a 100755
--- a/egs/multi_cn/s5/local/aishell_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/aishell_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# 2017 Xingyu Na
diff --git a/egs/multi_cn/s5/local/chain/compare_cer.sh b/egs/multi_cn/s5/local/chain/compare_cer.sh
new file mode 100755
index 00000000000..3daa43834a6
--- /dev/null
+++ b/egs/multi_cn/s5/local/chain/compare_cer.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+
+# This script is modified from egs/librispeech/s5/local/chain/compare_wer.sh
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_cer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_cer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+ echo "Usage: $0: [--online] [ ... ]"
+ echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+ echo "or (with epoch numbers for discriminative training):"
+ echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+ exit 1
+fi
+
+echo "# $0 $*"
+
+include_online=false
+if [ "$1" == "--online" ]; then
+ include_online=true
+ shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+# set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+ if [ $# != 1 ]; then
+ echo "compare_cer.sh: internal error"
+ exit 1 # exit the program
+ fi
+ dirname=$(echo $1 | cut -d: -f1)
+ epoch=$(echo $1 | cut -s -d: -f2)
+ if [ -z $epoch ]; then
+ epoch_infix=""
+ else
+ used_epochs=true
+ epoch_infix=_epoch${epoch}
+ fi
+}
+
+
+
+echo -n "# System "
+for x in $*; do printf "% 10s" " $(basename $x)"; done
+echo
+
+strings=(
+ "# CER on aidatatang(tg) "
+ "# CER on aishell(tg) "
+ "# CER on magicdata(tg) "
+ "# CER on thchs30(tg) ")
+
+for n in 0 1 2 3; do
+ echo -n "${strings[$n]}"
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ decode_names=(aidatatang_tg aishell_tg magicdata_tg thchs_tg)
+
+ wer=$(grep WER $dirname/decode_${decode_names[$n]}/cer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ if $include_online; then
+ echo -n "# [online:] "
+ for x in $*; do
+ set_names $x # sets $dirname and $epoch_infix
+ wer=$(grep WER ${dirname}_online/decode_${decode_names[$n]}/cer_* | utils/best_wer.sh | awk '{print $2}')
+ printf "% 10s" $wer
+ done
+ echo
+ fi
+done
+
+echo -n "# Final train prob "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent) "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent) "
+for x in $*; do
+ prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+ printf "% 10s" $prob
+done
+echo
+
+echo -n "# Num-parameters "
+for x in $*; do
+ num_params=$(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
+ printf "% 10d" $num_params
+done
+echo
diff --git a/egs/multi_cn/s5/local/chain/run_chain_common.sh b/egs/multi_cn/s5/local/chain/run_chain_common.sh
index 2f57c4765cf..4b00784f32a 100755
--- a/egs/multi_cn/s5/local/chain/run_chain_common.sh
+++ b/egs/multi_cn/s5/local/chain/run_chain_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script has common stages shared across librispeech chain recipes.
# It generates a new topology in a new lang directory, gets the alignments as
diff --git a/egs/multi_cn/s5/local/chain/run_ivector_common.sh b/egs/multi_cn/s5/local/chain/run_ivector_common.sh
index 5a09d44a79b..b52b6945a40 100755
--- a/egs/multi_cn/s5/local/chain/run_ivector_common.sh
+++ b/egs/multi_cn/s5/local/chain/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
@@ -75,29 +75,22 @@ if [ $stage -le 3 ]; then
# do volume-perturbation on the training data prior to extracting hires
# features; this helps make trained nnets more invariant to test data volume.
- # create MFCC data dir without pitch to extract iVector
utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires
- steps/make_mfcc_pitch_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
+ steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
--cmd "$train_cmd" data/${train_set}_sp_hires || exit 1;
steps/compute_cmvn_stats.sh data/${train_set}_sp_hires || exit 1;
utils/fix_data_dir.sh data/${train_set}_sp_hires
- utils/data/limit_feature_dim.sh 0:39 \
- data/${train_set}_sp_hires data/${train_set}_sp_hires_nopitch || exit 1;
- steps/compute_cmvn_stats.sh data/${train_set}_sp_hires_nopitch || exit 1;
for datadir in $test_sets; do
- steps/make_mfcc_pitch_online.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
+ steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \
--cmd "$train_cmd" data/$datadir/test_hires || exit 1;
steps/compute_cmvn_stats.sh data/$datadir/test_hires || exit 1;
utils/fix_data_dir.sh data/$datadir/test_hires
- utils/data/limit_feature_dim.sh 0:39 \
- data/$datadir/test_hires data/$datadir/test_hires_nopitch || exit 1;
- steps/compute_cmvn_stats.sh data/$datadir/test_hires_nopitch || exit 1;
done
# now create a data subset. 60k is 1/5th of the training dataset (around 200 hours).
- utils/subset_data_dir.sh data/${train_set}_sp_hires_nopitch 60000 \
- data/${train_set}_sp_hires_nopitch_60k
+ utils/subset_data_dir.sh data/${train_set}_sp_hires 60000 \
+ data/${train_set}_sp_hires_60k
fi
@@ -107,16 +100,16 @@ if [ $stage -le 4 ]; then
mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
- num_utts_total=$(wc -l $dir/configs/network.xconfig
input dim=100 name=ivector
- input dim=43 name=input
+ input dim=40 name=input
# MFCC to filterbank
idct-layer name=idct input=input dim=40 cepstral-lifter=22 affine-transform-file=$dir/configs/idct.mat
@@ -237,7 +232,7 @@ if $test_online_decoding && [ $stage -le 18 ]; then
# note: if the features change (e.g. you add pitch features), you will have to
# change the options of the following command line.
steps/online/nnet3/prepare_online_decoding.sh \
- --mfcc-config conf/mfcc_hires.conf --add-pitch true \
+ --mfcc-config conf/mfcc_hires.conf \
$lang exp/nnet3${nnet3_affix}/extractor $dir ${dir}_online
rm $dir/.error 2>/dev/null || true
diff --git a/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1b.sh b/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
index e3b8fa71175..dd9a238548a 100755
--- a/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
+++ b/egs/multi_cn/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
@@ -1,23 +1,10 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script is copied from mini_librispeech/s5
# 1b is as 1a but adding SpecAugment and removing dropout (which, in
# combination with SpecAugment, no longer seemed to give an improvement).
-# local/chain/compare_wer.sh --online exp/chain/cnn_tdnn1{a,a2,b,b2}_sp
-# System cnn_tdnn1a_sp cnn_tdnn1a2_sp cnn_tdnn1b_sp cnn_tdnn1b2_sp
-#WER dev_clean_2 (tgsmall) 10.89 10.96 10.04 9.93
-# [online:] 10.91 10.93 9.99 9.99
-#WER dev_clean_2 (tglarge) 7.50 7.80 6.94 6.89
-# [online:] 7.58 7.84 6.97 7.04
-# Final train prob -0.0476 -0.0470 -0.0577 -0.0575
-# Final valid prob -0.0754 -0.0760 -0.0742 -0.0746
-# Final train prob (xent) -1.0930 -1.0995 -1.3090 -1.3043
-# Final valid prob (xent) -1.2916 -1.2904 -1.4242 -1.4225
-# Num-params 4492816 4492816 4492816 4492816
-
-
# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
@@ -26,7 +13,6 @@ set -euo pipefail
stage=0
decode_nj=10
train_set=train_all_cleaned
-test_sets=""
gmm=tri4a_cleaned
nnet3_affix=_cleaned
@@ -50,6 +36,7 @@ remove_egs=true
reporting_email=
# decode options
+test_sets=""
test_online_decoding=true # if true, it will run the last decoding stage.
@@ -120,7 +107,7 @@ if [ $stage -le 14 ]; then
mkdir -p $dir/configs
cat < $dir/configs/network.xconfig
input dim=100 name=ivector
- input dim=43 name=input
+ input dim=40 name=input
# this takes the MFCCs and generates filterbank coefficients. The MFCCs
# are more compressible so we prefer to dump the MFCCs to disk rather
@@ -236,7 +223,7 @@ if $test_online_decoding && [ $stage -le 18 ]; then
# note: if the features change (e.g. you add pitch features), you will have to
# change the options of the following command line.
steps/online/nnet3/prepare_online_decoding.sh \
- --mfcc-config conf/mfcc_hires.conf --add-pitch true \
+ --mfcc-config conf/mfcc_hires.conf \
$lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online
rm $dir/.error 2>/dev/null || true
diff --git a/egs/multi_cn/s5/local/magicdata_data_prep.sh b/egs/multi_cn/s5/local/magicdata_data_prep.sh
index f8d47716751..4c96a40c9b6 100755
--- a/egs/multi_cn/s5/local/magicdata_data_prep.sh
+++ b/egs/multi_cn/s5/local/magicdata_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 Xingyu Na
# Apache 2.0
diff --git a/egs/multi_cn/s5/local/magicdata_download_and_untar.sh b/egs/multi_cn/s5/local/magicdata_download_and_untar.sh
index df8ca8d2296..c322edc98cf 100755
--- a/egs/multi_cn/s5/local/magicdata_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/magicdata_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# 2019 Xingyu Na
diff --git a/egs/multi_cn/s5/local/prepare_dict.sh b/egs/multi_cn/s5/local/prepare_dict.sh
index 6b160b60580..3a86e160cf1 100755
--- a/egs/multi_cn/s5/local/prepare_dict.sh
+++ b/egs/multi_cn/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is copied from egs/hkust/s5/local/hkust_prepare_dict.sh
@@ -316,7 +316,7 @@ cat $dict_dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ",
# Add to the lexicon the silences, noises etc.
(echo '!SIL SIL'; echo '[SPK] SPN'; echo '[FIL] NSN'; echo ' SPN' ) | \
- cat - $dict_dir/lexicon1.txt > $dict_dir/lexicon.txt || exit 1;
+ cat - $dict_dir/lexicon1.txt | sed '/^HH$/d' > $dict_dir/lexicon.txt || exit 1;
echo "$0: dict preparation succeeded"
exit 0;
diff --git a/egs/multi_cn/s5/local/primewords_data_prep.sh b/egs/multi_cn/s5/local/primewords_data_prep.sh
index bcf3b6698a4..9ce4140f801 100755
--- a/egs/multi_cn/s5/local/primewords_data_prep.sh
+++ b/egs/multi_cn/s5/local/primewords_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 Xingyu Na
# Apache 2.0
diff --git a/egs/multi_cn/s5/local/primewords_download_and_untar.sh b/egs/multi_cn/s5/local/primewords_download_and_untar.sh
index 7e716c7a0a6..5828f1e2d7e 100755
--- a/egs/multi_cn/s5/local/primewords_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/primewords_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# 2017 Xingyu Na
diff --git a/egs/multi_cn/s5/local/run_cleanup_segmentation.sh b/egs/multi_cn/s5/local/run_cleanup_segmentation.sh
index f1ea4a2f574..ea93ab97386 100755
--- a/egs/multi_cn/s5/local/run_cleanup_segmentation.sh
+++ b/egs/multi_cn/s5/local/run_cleanup_segmentation.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vimal Manohar
# 2016 Yiming Wang
diff --git a/egs/multi_cn/s5/local/score.sh b/egs/multi_cn/s5/local/score.sh
index a9786169973..d283ceb68dc 100755
--- a/egs/multi_cn/s5/local/score.sh
+++ b/egs/multi_cn/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e -o pipefail
set -x
diff --git a/egs/multi_cn/s5/local/stcmds_data_prep.sh b/egs/multi_cn/s5/local/stcmds_data_prep.sh
index 6375d0d9a1b..a751729ef91 100755
--- a/egs/multi_cn/s5/local/stcmds_data_prep.sh
+++ b/egs/multi_cn/s5/local/stcmds_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2019 Xingyu Na
# Apache 2.0
diff --git a/egs/multi_cn/s5/local/stcmds_download_and_untar.sh b/egs/multi_cn/s5/local/stcmds_download_and_untar.sh
index ca89b5a292a..37379ab28e6 100755
--- a/egs/multi_cn/s5/local/stcmds_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/stcmds_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# 2017 Xingyu Na
diff --git a/egs/multi_cn/s5/local/thchs-30_data_prep.sh b/egs/multi_cn/s5/local/thchs-30_data_prep.sh
index 8f48133a1dd..2d3af9fdaba 100755
--- a/egs/multi_cn/s5/local/thchs-30_data_prep.sh
+++ b/egs/multi_cn/s5/local/thchs-30_data_prep.sh
@@ -1,8 +1,8 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Tsinghua University (Author: Dong Wang, Xuewei Zhang). Apache 2.0.
# 2016 LeSpeech (Author: Xingyu Na)
-#This script pepares the data directory for thchs30 recipe.
+#This script pepares the data directory for thchs30 recipe.
#It reads the corpus and get wav.scp and transcriptions.
corpus_dir=$1
@@ -23,13 +23,13 @@ for x in train dev test; do
spkid=`echo $nn | awk -F"_" '{print "" $1}'`
spk_char=`echo $spkid | sed 's/\([A-Z]\).*/\1/'`
spk_num=`echo $spkid | sed 's/[A-Z]\([0-9]\)/\1/'`
- spkid=$(printf '%s%.2d' "$spk_char" "$spk_num")
+ spkid=$(printf 'TH%s%.2d' "$spk_char" "$spk_num")
utt_num=`echo $nn | awk -F"_" '{print $2}'`
- uttid=$(printf '%s%.2d_%.3d' "$spk_char" "$spk_num" "$utt_num")
+ uttid=$(printf 'TH%s%.2d-%.3d' "$spk_char" "$spk_num" "$utt_num")
echo $uttid $corpus_dir/$x/$nn.wav >> $part/wav.scp
echo $uttid $spkid >> $part/utt2spk
echo $uttid `sed -n 1p $corpus_dir/data/$nn.wav.trn` | sed 's/ l =//' >> $part/text
- done
+ done
sort $part/wav.scp -o $part/wav.scp
sort $part/utt2spk -o $part/utt2spk
sort $part/text -o $part/text
@@ -40,6 +40,3 @@ done
utils/data/validate_data_dir.sh --no-feats $data/train || exit 1;
utils/data/validate_data_dir.sh --no-feats $data/dev || exit 1;
utils/data/validate_data_dir.sh --no-feats $data/test || exit 1;
-
-
-
diff --git a/egs/multi_cn/s5/local/thchs_download_and_untar.sh b/egs/multi_cn/s5/local/thchs_download_and_untar.sh
index 6294fca7d9b..878e29a80ed 100755
--- a/egs/multi_cn/s5/local/thchs_download_and_untar.sh
+++ b/egs/multi_cn/s5/local/thchs_download_and_untar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2014 Johns Hopkins University (author: Daniel Povey)
# Copyright 2016 Tsinghua University (author: Dong Wang)
diff --git a/egs/multi_cn/s5/local/train_corpus_lm.sh b/egs/multi_cn/s5/local/train_corpus_lm.sh
index 181ff4c5522..fe464d41520 100755
--- a/egs/multi_cn/s5/local/train_corpus_lm.sh
+++ b/egs/multi_cn/s5/local/train_corpus_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
@@ -42,13 +42,13 @@ cat $text | awk '{$1=""; print substr($0, 2)}' | awk -v lex=$lexicon 'BEGIN{whil
{for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \
> $cleantext || exit 1;
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | sort | uniq -c | \
sort -nr > $dir/word.counts || exit 1;
# Get counts from acoustic training transcripts, and add one-count
# for each word in the lexicon (but not silence, we don't want it
# in the LM-- we'll add it optionally later).
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | \
cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
@@ -58,7 +58,7 @@ cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "0)map[$1]=$2;}
- { for(n=2;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \
+ { for(n=1;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \
|| exit 1;
if [ `wc -l < $cleantext` -le 10000 ]; then
diff --git a/egs/multi_cn/s5/local/train_lms.sh b/egs/multi_cn/s5/local/train_lms.sh
index ac632538ec5..c0dfc46a298 100755
--- a/egs/multi_cn/s5/local/train_lms.sh
+++ b/egs/multi_cn/s5/local/train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from one directory above this script.
@@ -29,17 +29,18 @@ fi
cleantext=$dir/text.no_oov
+# note: ignore 1st field of text, it's the utterance-id.
cat $text | awk '{$1=""; print substr($0, 2)}' | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } }
{for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \
> $cleantext || exit 1;
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | sort | uniq -c | \
sort -nr > $dir/word.counts || exit 1;
# Get counts from acoustic training transcripts, and add one-count
# for each word in the lexicon (but not silence, we don't want it
# in the LM-- we'll add it optionally later).
-cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
+cat $cleantext | awk '{for(n=1;n<=NF;n++) print $n; }' | \
cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
@@ -47,9 +48,8 @@ cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map \
|| exit 1;
-# note: ignore 1st field of train.txt, it's the utterance-id.
cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1]=$2;}
- { for(n=2;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \
+ { for(n=1;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \
|| exit 1;
train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1;
diff --git a/egs/multi_cn/s5/run.sh b/egs/multi_cn/s5/run.sh
index bd03355ea61..3fb48e72263 100755
--- a/egs/multi_cn/s5/run.sh
+++ b/egs/multi_cn/s5/run.sh
@@ -1,13 +1,14 @@
-#!/bin/bash
+#!/usr/bin/env bash
-# Copyright 2019 Microsoft Corporation (authors: Xingyu Na)
+# Copyright 2019-2020 Microsoft Corporation (authors: Xingyu Na)
# Apache 2.0
. ./cmd.sh
. ./path.sh
stage=0
-dbase=/mnt/data/openslr
+dbase=/mnt/data/openslr # it is recommanded practice to provide absolute path here,
+ # otherwise some data downloading scripts might break.
aidatatang_url=www.openslr.org/resources/62
aishell_url=www.openslr.org/resources/33
magicdata_url=www.openslr.org/resources/68
@@ -18,6 +19,9 @@ thchs_url=www.openslr.org/resources/18
test_sets="aishell aidatatang magicdata thchs"
corpus_lm=false # interpolate with corpus lm
+has_aishell2=false # AISHELL2 train set is not publically downloadable
+ # with this option true, the script assumes you have it in $dbase
+
. utils/parse_options.sh
if [ $stage -le 0 ]; then
@@ -42,12 +46,21 @@ if [ $stage -le 1 ]; then
local/magicdata_data_prep.sh $dbase/magicdata data/magicdata || exit 1;
local/primewords_data_prep.sh $dbase/primewords data/primewords || exit 1;
local/stcmds_data_prep.sh $dbase/stcmds data/stcmds || exit 1;
+ if $has_aishell2; then
+ local/aishell2_data_prep.sh $dbase/aishell2/iOS/data data/aishell2 || exit 1;
+ fi
fi
if [ $stage -le 2 ]; then
# normalize transcripts
utils/combine_data.sh data/train_combined \
data/{aidatatang,aishell,magicdata,primewords,stcmds,thchs}/train || exit 1;
+ if $has_aishell2; then
+ mv data/train_combined data/train_combined_tmp
+ utils/combine_data.sh data/train_combined \
+ data/train_combined_tmp data/aishell2/train || exit 1;
+ rm -rf data/train_combined_tmp
+ fi
utils/combine_data.sh data/test_combined \
data/{aidatatang,aishell,magicdata,thchs}/{dev,test} || exit 1;
local/prepare_dict.sh || exit 1;
@@ -89,6 +102,12 @@ if [ $stage -le 5 ]; then
) &
done
wait
+ if $has_aishell2; then
+ steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 20 \
+ data/aishell2/train exp/make_mfcc/aishell2/train $mfccdir/aishell2 || exit 1;
+ steps/compute_cmvn_stats.sh data/aishell2/train \
+ exp/make_mfcc/aishell2/train $mfccdir/aishell2 || exit 1;
+ fi
fi
if [ $stage -le 6 ]; then
@@ -205,6 +224,12 @@ if [ $stage -le 14 ]; then
# train tri4a using all
utils/combine_data.sh data/train_all \
data/{aidatatang,aishell,magicdata,primewords,stcmds,thchs}/train || exit 1;
+ if $has_aishell2; then
+ mv data/train_all data/train_all_tmp
+ utils/combine_data.sh data/train_all \
+ data/train_all_tmp data/aishell2/train || exit 1;
+ rm -rf data/train_all_tmp
+ fi
steps/align_fmllr.sh --cmd "$train_cmd" --nj 100 \
data/train_all data/lang exp/tri3a exp/tri3a_ali || exit 1;
diff --git a/egs/multi_en/s5/local/ami_ihm_data_prep.sh b/egs/multi_en/s5/local/ami_ihm_data_prep.sh
index 55f8bb22d41..7c202438e6f 100755
--- a/egs/multi_en/s5/local/ami_ihm_data_prep.sh
+++ b/egs/multi_en/s5/local/ami_ihm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/ami/s5/local/ami_ihm_data_prep.sh
@@ -78,7 +78,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \
awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp
#replace path with an appropriate sox command that select single channel only
-awk '{print $1" sox -c 1 -t wavpcm -s "$2" -r 8000 -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp
+awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -r 8000 -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp
# (1d) reco2file_and_channel
cat $dir/wav.scp \
diff --git a/egs/multi_en/s5/local/ami_sdm_data_prep.sh b/egs/multi_en/s5/local/ami_sdm_data_prep.sh
index a5d55640d1e..282887b0341 100755
--- a/egs/multi_en/s5/local/ami_sdm_data_prep.sh
+++ b/egs/multi_en/s5/local/ami_sdm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/ami/s5/local/ami_sdm_data_prep.sh
diff --git a/egs/multi_en/s5/local/ami_text_prep.sh b/egs/multi_en/s5/local/ami_text_prep.sh
index fb769a0c019..72e8e770e50 100755
--- a/egs/multi_en/s5/local/ami_text_prep.sh
+++ b/egs/multi_en/s5/local/ami_text_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/ami/s5/local/ami_text_prep.sh
diff --git a/egs/multi_en/s5/local/ami_xml2text.sh b/egs/multi_en/s5/local/ami_xml2text.sh
index 49ce740d44f..fd8b496dab6 100755
--- a/egs/multi_en/s5/local/ami_xml2text.sh
+++ b/egs/multi_en/s5/local/ami_xml2text.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/ami/s5/local/ami_xml2text.sh
diff --git a/egs/multi_en/s5/local/chain/run_blstm_6h.sh b/egs/multi_en/s5/local/chain/run_blstm_6h.sh
index 126d29350a1..8840b3b188e 100644
--- a/egs/multi_en/s5/local/chain/run_blstm_6h.sh
+++ b/egs/multi_en/s5/local/chain/run_blstm_6h.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh.
diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh
index 96f5fdac8f3..40979b4fd5b 100755
--- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh
+++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Xiaohui Zhang
# 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index 62266334962..8a6371d2f44 100755
--- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# 2018 Xiaohui Zhang
# 2018 Vimal Manohar
@@ -107,7 +107,7 @@ lang=data/${multi}/lang_${gmm}_chain
lang_dir=data/lang_${multi}_${gmm}_fsh_sw1_tg
rescore_lang_dir=data/lang_${multi}_${gmm}_fsh_sw1_fg
-local/nnet3/run_ivector_common.sh --stage $stage --nnet3-affix "$nnet3_affix" \
+local/nnet3/run_ivector_common.sh --stage $stage \
--multi $multi \
--gmm $gmm \
--speed-perturb $speed_perturb || exit 1
diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
index 79cd3eb3014..3ac4078d507 100755
--- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
+++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Xiaohui Zhang
# 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh
index a7170af9431..6e705fa8724 100755
--- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh
+++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Xiaohui Zhang
# 2017 University of Chinese Academy of Sciences (UCAS) Gaofeng Cheng
# Apache 2.0
diff --git a/egs/multi_en/s5/local/cmu_tedlium_prepare_dict.sh b/egs/multi_en/s5/local/cmu_tedlium_prepare_dict.sh
index 972ce491326..16fa98ded9d 100755
--- a/egs/multi_en/s5/local/cmu_tedlium_prepare_dict.sh
+++ b/egs/multi_en/s5/local/cmu_tedlium_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Allen Guo
# 2017 Xiaohui Zhang
diff --git a/egs/multi_en/s5/local/eval2000_data_prep.sh b/egs/multi_en/s5/local/eval2000_data_prep.sh
index cf6b6a78580..68bd015d8b6 100755
--- a/egs/multi_en/s5/local/eval2000_data_prep.sh
+++ b/egs/multi_en/s5/local/eval2000_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/eval2000_data_prep.sh
diff --git a/egs/multi_en/s5/local/fisher_data_prep.sh b/egs/multi_en/s5/local/fisher_data_prep.sh
index cf46954575c..174d86f9a17 100755
--- a/egs/multi_en/s5/local/fisher_data_prep.sh
+++ b/egs/multi_en/s5/local/fisher_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/fisher_data_prep.sh
diff --git a/egs/multi_en/s5/local/hub4_96_data_prep.sh b/egs/multi_en/s5/local/hub4_96_data_prep.sh
index f258ea7b7f5..144f88cffca 100755
--- a/egs/multi_en/s5/local/hub4_96_data_prep.sh
+++ b/egs/multi_en/s5/local/hub4_96_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/hub4_english/s5/local/data_prep/prepare_1996_bn_data.sh
@@ -6,7 +6,7 @@
# Changes in lower level script/dir names were made
###########################################################################################
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# 2017 Vimal Manohar
# License: Apache 2.0
diff --git a/egs/multi_en/s5/local/hub4_97_data_prep.sh b/egs/multi_en/s5/local/hub4_97_data_prep.sh
index 096c2142c36..86b9482e4c4 100755
--- a/egs/multi_en/s5/local/hub4_97_data_prep.sh
+++ b/egs/multi_en/s5/local/hub4_97_data_prep.sh
@@ -4,7 +4,7 @@
# No change was made
###########################################################################################
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2017, Johns Hopkins University (Jan "Yenda" Trmal)
# 2017 Vimal Manohar
# License: Apache 2.0
diff --git a/egs/multi_en/s5/local/hub4_en_data_prep.sh b/egs/multi_en/s5/local/hub4_en_data_prep.sh
index e8173111038..6034e012701 100755
--- a/egs/multi_en/s5/local/hub4_en_data_prep.sh
+++ b/egs/multi_en/s5/local/hub4_en_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 1996/1997 English Broadcast News training data preparation (HUB4)
diff --git a/egs/multi_en/s5/local/librispeech_data_prep.sh b/egs/multi_en/s5/local/librispeech_data_prep.sh
index b34072a4f61..9512fe70b65 100755
--- a/egs/multi_en/s5/local/librispeech_data_prep.sh
+++ b/egs/multi_en/s5/local/librispeech_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/librispeech/s5/local/data_prep.sh
diff --git a/egs/multi_en/s5/local/librispeech_lm_decode.sh b/egs/multi_en/s5/local/librispeech_lm_decode.sh
index 7e79c788636..563f870b721 100755
--- a/egs/multi_en/s5/local/librispeech_lm_decode.sh
+++ b/egs/multi_en/s5/local/librispeech_lm_decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Allen Guo
# Apache 2.0
diff --git a/egs/multi_en/s5/local/make_partitions.sh b/egs/multi_en/s5/local/make_partitions.sh
index 74f23ae9746..167c5086368 100755
--- a/egs/multi_en/s5/local/make_partitions.sh
+++ b/egs/multi_en/s5/local/make_partitions.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Allen Guo
# 2017 Xiaohui Zhang
diff --git a/egs/multi_en/s5/local/nnet3/run_ivector_common.sh b/egs/multi_en/s5/local/nnet3/run_ivector_common.sh
index d36cb0e6083..1060d101aec 100755
--- a/egs/multi_en/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/multi_en/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh
diff --git a/egs/multi_en/s5/local/nnet3/run_tdnn.sh b/egs/multi_en/s5/local/nnet3/run_tdnn.sh
index 266c4af52d7..b8441c99009 100755
--- a/egs/multi_en/s5/local/nnet3/run_tdnn.sh
+++ b/egs/multi_en/s5/local/nnet3/run_tdnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/nnet3/run_tdnn.sh
diff --git a/egs/multi_en/s5/local/prepare_dict.sh b/egs/multi_en/s5/local/prepare_dict.sh
index 8bf54a3dddc..8501b14de10 100755
--- a/egs/multi_en/s5/local/prepare_dict.sh
+++ b/egs/multi_en/s5/local/prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Intellisist, Inc. (Author: Navneeth K)
# 2017 Xiaohui Zhang
diff --git a/egs/multi_en/s5/local/rt03_data_prep.sh b/egs/multi_en/s5/local/rt03_data_prep.sh
index aa1e2ba4cc2..ea252b7753a 100755
--- a/egs/multi_en/s5/local/rt03_data_prep.sh
+++ b/egs/multi_en/s5/local/rt03_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/rt03_data_prep.sh
diff --git a/egs/multi_en/s5/local/score.sh b/egs/multi_en/s5/local/score.sh
index cada400acda..9bc36b3197c 100755
--- a/egs/multi_en/s5/local/score.sh
+++ b/egs/multi_en/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/score.sh
diff --git a/egs/multi_en/s5/local/score_sclite.sh b/egs/multi_en/s5/local/score_sclite.sh
index 07dd63950d5..2a10ba2d1cf 100755
--- a/egs/multi_en/s5/local/score_sclite.sh
+++ b/egs/multi_en/s5/local/score_sclite.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/score_sclite.sh
diff --git a/egs/multi_en/s5/local/swbd1_data_download.sh b/egs/multi_en/s5/local/swbd1_data_download.sh
index 0c28e480a60..aa85cd7176a 100755
--- a/egs/multi_en/s5/local/swbd1_data_download.sh
+++ b/egs/multi_en/s5/local/swbd1_data_download.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/swbd1_data_download.sh
@@ -44,7 +44,7 @@ if [ ! -d $SWBD_DIR/transcriptions/swb_ms98_transcriptions ]; then
if [ ! -d swb_ms98_transcriptions ]; then
echo " *** Downloading trascriptions and dictionary ***"
wget http://www.openslr.org/resources/5/switchboard_word_alignments.tar.gz ||
- wget http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
+ wget -c http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
tar -xf switchboard_word_alignments.tar.gz
fi
)
diff --git a/egs/multi_en/s5/local/swbd1_data_prep.sh b/egs/multi_en/s5/local/swbd1_data_prep.sh
index 4c1b6c7a9e6..551e870ae62 100755
--- a/egs/multi_en/s5/local/swbd1_data_prep.sh
+++ b/egs/multi_en/s5/local/swbd1_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/swbd1_data_prep.sh
diff --git a/egs/multi_en/s5/local/swbd1_prepare_dict.sh b/egs/multi_en/s5/local/swbd1_prepare_dict.sh
index 78e208f720d..87292bef94d 100755
--- a/egs/multi_en/s5/local/swbd1_prepare_dict.sh
+++ b/egs/multi_en/s5/local/swbd1_prepare_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/swbd/s5c/local/swbd1_prepare_dict.sh
diff --git a/egs/multi_en/s5/local/tedlium_lm_decode.sh b/egs/multi_en/s5/local/tedlium_lm_decode.sh
index 1df850648f8..e9755f47a1e 100755
--- a/egs/multi_en/s5/local/tedlium_lm_decode.sh
+++ b/egs/multi_en/s5/local/tedlium_lm_decode.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Allen Guo
# Apache 2.0
diff --git a/egs/multi_en/s5/local/tedlium_prepare_data.sh b/egs/multi_en/s5/local/tedlium_prepare_data.sh
index 22f79b0b117..2f240c953ff 100755
--- a/egs/multi_en/s5/local/tedlium_prepare_data.sh
+++ b/egs/multi_en/s5/local/tedlium_prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/tedlium/s5_r2/local/prepare_data.sh
diff --git a/egs/multi_en/s5/local/train_lms.sh b/egs/multi_en/s5/local/train_lms.sh
index 02fd66e0368..302ff75c8ff 100755
--- a/egs/multi_en/s5/local/train_lms.sh
+++ b/egs/multi_en/s5/local/train_lms.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/fisher_swbd/s5/local/fisher_train_lms.sh
diff --git a/egs/multi_en/s5/local/wsj_data_prep.sh b/egs/multi_en/s5/local/wsj_data_prep.sh
index cc11f179eca..e42b9f09cd3 100755
--- a/egs/multi_en/s5/local/wsj_data_prep.sh
+++ b/egs/multi_en/s5/local/wsj_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/wsj/s5/local/wsj_data_prep.sh
diff --git a/egs/multi_en/s5/local/wsj_format_data.sh b/egs/multi_en/s5/local/wsj_format_data.sh
index 00ef0f49fd5..a54ad9c8d78 100755
--- a/egs/multi_en/s5/local/wsj_format_data.sh
+++ b/egs/multi_en/s5/local/wsj_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
###########################################################################################
# This script was copied from egs/wsj/s5/local/wsj_format_data.sh
diff --git a/egs/multi_en/s5/run.sh b/egs/multi_en/s5/run.sh
index 034ffeb4e66..229c0939d5c 100755
--- a/egs/multi_en/s5/run.sh
+++ b/egs/multi_en/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Allen Guo
# 2017 Xiaohui Zhang
diff --git a/egs/ptb/s5/local/rnnlm/download_ptb.sh b/egs/ptb/s5/local/rnnlm/download_ptb.sh
index 858e152bff9..129c90e10b0 100755
--- a/egs/ptb/s5/local/rnnlm/download_ptb.sh
+++ b/egs/ptb/s5/local/rnnlm/download_ptb.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. path.sh
data_dir=data/ptb
diff --git a/egs/ptb/s5/local/rnnlm/prepare_rnnlm_data.sh b/egs/ptb/s5/local/rnnlm/prepare_rnnlm_data.sh
index 3e3b6087495..be3b6cc6848 100755
--- a/egs/ptb/s5/local/rnnlm/prepare_rnnlm_data.sh
+++ b/egs/ptb/s5/local/rnnlm/prepare_rnnlm_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from the directory egs/ptb/s5.
diff --git a/egs/ptb/s5/local/rnnlm/train_backoff_lm.sh b/egs/ptb/s5/local/rnnlm/train_backoff_lm.sh
index 17bdab92878..1281693db10 100644
--- a/egs/ptb/s5/local/rnnlm/train_backoff_lm.sh
+++ b/egs/ptb/s5/local/rnnlm/train_backoff_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is to train a small, pruned n-gram backoff LM to be used for sampling
# purposes during RNNLM training. We ue pocolm for this because it's good at pruning,
diff --git a/egs/ptb/s5/local/rnnlm/train_backoff_lm2.sh b/egs/ptb/s5/local/rnnlm/train_backoff_lm2.sh
index 98eb1b64e21..28b53fef079 100644
--- a/egs/ptb/s5/local/rnnlm/train_backoff_lm2.sh
+++ b/egs/ptb/s5/local/rnnlm/train_backoff_lm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is to train a small, pruned n-gram backoff LM to be used for sampling
# purposes during RNNLM training. It uses the C++ tool that we wrote for this
diff --git a/egs/ptb/s5/local/rnnlm/tuning/run_tdnn_a.sh b/egs/ptb/s5/local/rnnlm/tuning/run_tdnn_a.sh
index f0cac23231e..94065ace401 100755
--- a/egs/ptb/s5/local/rnnlm/tuning/run_tdnn_a.sh
+++ b/egs/ptb/s5/local/rnnlm/tuning/run_tdnn_a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# To be run from the directory egs/ptb/s5.
# This is to be done after local/prepare_rnnlm_data.sh.
diff --git a/egs/ptb/s5/run.sh b/egs/ptb/s5/run.sh
index d62be6871f3..2dcd8101860 100755
--- a/egs/ptb/s5/run.sh
+++ b/egs/ptb/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
stage=0
diff --git a/egs/reverb/s5/local/chain/compare_wer.sh b/egs/reverb/s5/local/chain/compare_wer.sh
index cd6be14ed88..736a3177f17 100755
--- a/egs/reverb/s5/local/chain/compare_wer.sh
+++ b/egs/reverb/s5/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh
index c8b4997161e..a6767db3652 100755
--- a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh
+++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
index 4723400c76b..294d3d505e1 100755
--- a/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
diff --git a/egs/reverb/s5/local/compute_se_scores.sh b/egs/reverb/s5/local/compute_se_scores.sh
index 8168c2c46a2..4c6b8c2da0e 100755
--- a/egs/reverb/s5/local/compute_se_scores.sh
+++ b/egs/reverb/s5/local/compute_se_scores.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/reverb/s5/local/download_se_eval_tool.sh b/egs/reverb/s5/local/download_se_eval_tool.sh
index 0d7bb8305ea..aa29766b927 100755
--- a/egs/reverb/s5/local/download_se_eval_tool.sh
+++ b/egs/reverb/s5/local/download_se_eval_tool.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# This script downloads the official REVERB challenge SE scripts and SRMR toolbox
# This script also downloads and compiles PESQ
diff --git a/egs/reverb/s5/local/generate_data.sh b/egs/reverb/s5/local/generate_data.sh
index 3228f0e1b3c..91c9c96a260 100755
--- a/egs/reverb/s5/local/generate_data.sh
+++ b/egs/reverb/s5/local/generate_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe)
# Apache 2.0
diff --git a/egs/reverb/s5/local/get_results.sh b/egs/reverb/s5/local/get_results.sh
index 8867961dcdd..5945109708a 100755
--- a/egs/reverb/s5/local/get_results.sh
+++ b/egs/reverb/s5/local/get_results.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# "Our baselines"
echo "########################################"
diff --git a/egs/reverb/s5/local/nnet3/compare_wer.sh b/egs/reverb/s5/local/nnet3/compare_wer.sh
index 095e85cc338..4888de1f159 100755
--- a/egs/reverb/s5/local/nnet3/compare_wer.sh
+++ b/egs/reverb/s5/local/nnet3/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
diff --git a/egs/reverb/s5/local/nnet3/run_ivector_common.sh b/egs/reverb/s5/local/nnet3/run_ivector_common.sh
index 3af3ad77565..4963ce3cf7c 100755
--- a/egs/reverb/s5/local/nnet3/run_ivector_common.sh
+++ b/egs/reverb/s5/local/nnet3/run_ivector_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -euo pipefail
diff --git a/egs/reverb/s5/local/prepare_real_data.sh b/egs/reverb/s5/local/prepare_real_data.sh
index 2da51b9786b..5cf3ec56b26 100755
--- a/egs/reverb/s5/local/prepare_real_data.sh
+++ b/egs/reverb/s5/local/prepare_real_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe)
# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
diff --git a/egs/reverb/s5/local/prepare_simu_data.sh b/egs/reverb/s5/local/prepare_simu_data.sh
index 8757021ddd7..a229ffd5115 100755
--- a/egs/reverb/s5/local/prepare_simu_data.sh
+++ b/egs/reverb/s5/local/prepare_simu_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2018 Johns Hopkins University (Author: Shinji Watanabe)
# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
diff --git a/egs/reverb/s5/local/run_beamform.sh b/egs/reverb/s5/local/run_beamform.sh
index 1c8aade7287..ab62b7a6fdc 100755
--- a/egs/reverb/s5/local/run_beamform.sh
+++ b/egs/reverb/s5/local/run_beamform.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe)
# Copyright 2018, Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
diff --git a/egs/reverb/s5/local/run_wpe.sh b/egs/reverb/s5/local/run_wpe.sh
index d1ea56c6c55..5ed4a5b9832 100755
--- a/egs/reverb/s5/local/run_wpe.sh
+++ b/egs/reverb/s5/local/run_wpe.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Johns Hopkins University (Author: Aswin Shanmugam Subramanian)
# Apache 2.0
diff --git a/egs/reverb/s5/local/score.sh b/egs/reverb/s5/local/score.sh
index 66bc976333f..87867679fd6 100755
--- a/egs/reverb/s5/local/score.sh
+++ b/egs/reverb/s5/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal)
# Apache 2.0
diff --git a/egs/reverb/s5/local/wsj_prepare_beep_dict.sh b/egs/reverb/s5/local/wsj_prepare_beep_dict.sh
index 879ef956844..73e434373c8 100755
--- a/egs/reverb/s5/local/wsj_prepare_beep_dict.sh
+++ b/egs/reverb/s5/local/wsj_prepare_beep_dict.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 MERL (author: Felix Weninger)
# Contains some code by Microsoft Corporation, Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/reverb/s5/local/wsjcam0_data_prep.sh b/egs/reverb/s5/local/wsjcam0_data_prep.sh
index cf87aa355d4..65fe9c7f3af 100755
--- a/egs/reverb/s5/local/wsjcam0_data_prep.sh
+++ b/egs/reverb/s5/local/wsjcam0_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 MERL (author: Felix Weninger)
# Contains some code by Microsoft Corporation, Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/reverb/s5/local/wsjcam0_format_data.sh b/egs/reverb/s5/local/wsjcam0_format_data.sh
index 883cb20ed0e..26b5172f338 100755
--- a/egs/reverb/s5/local/wsjcam0_format_data.sh
+++ b/egs/reverb/s5/local/wsjcam0_format_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013 MERL (author: Felix Weninger)
# Contains some code by Microsoft Corporation, Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/reverb/s5/run.sh b/egs/reverb/s5/run.sh
index 999ec98e637..a7e3dd75167 100755
--- a/egs/reverb/s5/run.sh
+++ b/egs/reverb/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2013-2014 MERL (author: Felix Weninger and Shinji Watanabe)
# Johns Hopkins University (author: Szu-Jui Chen)
diff --git a/egs/rimes/v1/local/chain/compare_wer.sh b/egs/rimes/v1/local/chain/compare_wer.sh
index 4a2cc29481c..ae575b29d4f 100755
--- a/egs/rimes/v1/local/chain/compare_wer.sh
+++ b/egs/rimes/v1/local/chain/compare_wer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
diff --git a/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index 33eb9dcb98c..b8be489e7d1 100755
--- a/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# e2eali_1a is a 6 cnn layer 3 tdnn layer model with dropout, l2-regularization, batch-normalization
diff --git a/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 9d28a41316d..da6a0bf5cb2 100755
--- a/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# This script does end2end chain training (i.e. from scratch)
diff --git a/egs/rimes/v1/local/extract_features.sh b/egs/rimes/v1/local/extract_features.sh
index ec3bc8a268c..e4adf8bf85d 100755
--- a/egs/rimes/v1/local/extract_features.sh
+++ b/egs/rimes/v1/local/extract_features.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Yiwen Shao
# 2018 Ashish Arora
diff --git a/egs/rimes/v1/local/prepare_data.sh b/egs/rimes/v1/local/prepare_data.sh
index 502718e7777..232ecf7c9ef 100755
--- a/egs/rimes/v1/local/prepare_data.sh
+++ b/egs/rimes/v1/local/prepare_data.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script creates traing and validations splits, downloads text corpus for language modeling,
# prepares the training, validation and test data for rimes dataset
diff --git a/egs/rimes/v1/local/score.sh b/egs/rimes/v1/local/score.sh
index 0cfbda9b556..bb325ab793c 100755
--- a/egs/rimes/v1/local/score.sh
+++ b/egs/rimes/v1/local/score.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
cmd=run.pl
diff --git a/egs/rimes/v1/local/score_paragraph.sh b/egs/rimes/v1/local/score_paragraph.sh
index c6ef4da1d5b..810e9a6ccc7 100755
--- a/egs/rimes/v1/local/score_paragraph.sh
+++ b/egs/rimes/v1/local/score_paragraph.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
min_lmwt=7
max_lmwt=17
diff --git a/egs/rimes/v1/local/train_lm.sh b/egs/rimes/v1/local/train_lm.sh
index 51927b7a97e..29579cbca23 100755
--- a/egs/rimes/v1/local/train_lm.sh
+++ b/egs/rimes/v1/local/train_lm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/rimes/v1/run_end2end.sh b/egs/rimes/v1/run_end2end.sh
index d3e3da2be13..a56d54f9727 100755
--- a/egs/rimes/v1/run_end2end.sh
+++ b/egs/rimes/v1/run_end2end.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2018 Hossein Hadian
# Ashish Arora
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh
index c393a9aa28b..035c607ecc1 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is modified from run_tdnn_5f.sh, to use the old topology, as a baseline
# to test the modified transition-model code (by which we hope to be able to
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh
index 131bcf98de9..f608c13260b 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is a modified version of run_tdnn_5g.sh. It uses
# the new transition model and the python version of training scripts.
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh
index db5944fdbea..32b50cde7f4 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is a modified version of run_tdnn_5n.sh. It uses
# a new configs convention for chain model after kaldi 5.2.
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5p.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5p.sh
index 37073a53eba..67f67fbdfd8 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_5p.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5p.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is a modified version of run_tdnn_5o.sh. It uses online-cmn
# for input features, both for ivector extractor and the chain model.
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh
index f77ebb2a071..c457a75d6fc 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script uses weight transfer as a transfer learning method to transfer
# already trained neural net model on wsj to rm.
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh
index e38fa0b231c..8f15a1ed1d9 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# _1b is as _1a, but different as follows
# 1) It uses wsj phone set phones.txt and new lexicon generated using word pronunciation
# in swj lexincon.txt. rm words, that are not presented in wsj, are added as oov
diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh
index 04bef13fab0..4d1cb76bdc1 100755
--- a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh
+++ b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# _1c is as _1b but it uses source chain-trained DNN model instead of GMM model
# to generate alignments for RM using WSJ model.
diff --git a/egs/rm/s5/local/nnet/run_autoencoder.sh b/egs/rm/s5/local/nnet/run_autoencoder.sh
index d9a309deee0..a0a0be33237 100755
--- a/egs/rm/s5/local/nnet/run_autoencoder.sh
+++ b/egs/rm/s5/local/nnet/run_autoencoder.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_blocksoftmax.sh b/egs/rm/s5/local/nnet/run_blocksoftmax.sh
index 175a6021778..81a5ecabafa 100755
--- a/egs/rm/s5/local/nnet/run_blocksoftmax.sh
+++ b/egs/rm/s5/local/nnet/run_blocksoftmax.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2015 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_blstm.sh b/egs/rm/s5/local/nnet/run_blstm.sh
index ce0baecb5c6..b2fd495f851 100755
--- a/egs/rm/s5/local/nnet/run_blstm.sh
+++ b/egs/rm/s5/local/nnet/run_blstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_cnn.sh b/egs/rm/s5/local/nnet/run_cnn.sh
index 8c5730a1c85..b57d6e47111 100755
--- a/egs/rm/s5/local/nnet/run_cnn.sh
+++ b/egs/rm/s5/local/nnet/run_cnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2015 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_cnn2d.sh b/egs/rm/s5/local/nnet/run_cnn2d.sh
index be17bce7a57..e493cf44497 100755
--- a/egs/rm/s5/local/nnet/run_cnn2d.sh
+++ b/egs/rm/s5/local/nnet/run_cnn2d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2015 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_dnn.sh b/egs/rm/s5/local/nnet/run_dnn.sh
index c2ba26970ad..9059cff43c2 100755
--- a/egs/rm/s5/local/nnet/run_dnn.sh
+++ b/egs/rm/s5/local/nnet/run_dnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_dnn_fbank.sh b/egs/rm/s5/local/nnet/run_dnn_fbank.sh
index ff6916346c8..bfc0bdd3ed4 100755
--- a/egs/rm/s5/local/nnet/run_dnn_fbank.sh
+++ b/egs/rm/s5/local/nnet/run_dnn_fbank.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_dnn_fbank_relu.sh b/egs/rm/s5/local/nnet/run_dnn_fbank_relu.sh
index 11b1547051d..b3b544ad3f0 100755
--- a/egs/rm/s5/local/nnet/run_dnn_fbank_relu.sh
+++ b/egs/rm/s5/local/nnet/run_dnn_fbank_relu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_dummy_ivec.sh b/egs/rm/s5/local/nnet/run_dummy_ivec.sh
index 956d22d2e54..f92e4460af8 100755
--- a/egs/rm/s5/local/nnet/run_dummy_ivec.sh
+++ b/egs/rm/s5/local/nnet/run_dummy_ivec.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_lstm.sh b/egs/rm/s5/local/nnet/run_lstm.sh
index 48e8592fd7b..6985443b86b 100755
--- a/egs/rm/s5/local/nnet/run_lstm.sh
+++ b/egs/rm/s5/local/nnet/run_lstm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
diff --git a/egs/rm/s5/local/nnet/run_multilingual.sh b/egs/rm/s5/local/nnet/run_multilingual.sh
index 126f616c34d..74c722cceb9 100755
--- a/egs/rm/s5/local/nnet/run_multilingual.sh
+++ b/egs/rm/s5/local/nnet/run_multilingual.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2015 University of Illinois (Author: Amit Das)
# Copyright 2012-2015 Brno University of Technology (Author: Karel Vesely)
diff --git a/egs/rm/s5/local/nnet2/run_4a.sh b/egs/rm/s5/local/nnet2/run_4a.sh
index 42695abdccb..81b3fe08b4d 100755
--- a/egs/rm/s5/local/nnet2/run_4a.sh
+++ b/egs/rm/s5/local/nnet2/run_4a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/rm/s5/local/nnet2/run_4b.sh b/egs/rm/s5/local/nnet2/run_4b.sh
index 741340412c0..a160799a804 100755
--- a/egs/rm/s5/local/nnet2/run_4b.sh
+++ b/egs/rm/s5/local/nnet2/run_4b.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
stage=0
diff --git a/egs/rm/s5/local/nnet2/run_4b_gpu.sh b/egs/rm/s5/local/nnet2/run_4b_gpu.sh
index 9cde9f1694e..904a78e91a8 100755
--- a/egs/rm/s5/local/nnet2/run_4b_gpu.sh
+++ b/egs/rm/s5/local/nnet2/run_4b_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
stage=0
diff --git a/egs/rm/s5/local/nnet2/run_4c.sh b/egs/rm/s5/local/nnet2/run_4c.sh
index 7a2bd0360d7..28a7bb27e33 100755
--- a/egs/rm/s5/local/nnet2/run_4c.sh
+++ b/egs/rm/s5/local/nnet2/run_4c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is neural net training on top of adapted 40-dimensional features.
diff --git a/egs/rm/s5/local/nnet2/run_4d.sh b/egs/rm/s5/local/nnet2/run_4d.sh
index e7765fb28d5..425f82dc51d 100755
--- a/egs/rm/s5/local/nnet2/run_4d.sh
+++ b/egs/rm/s5/local/nnet2/run_4d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# local/nnet2/run_4d.sh is the new, faster version of the p-norm training script.
diff --git a/egs/rm/s5/local/nnet2/run_4d2.sh b/egs/rm/s5/local/nnet2/run_4d2.sh
index 5f64b3aef2e..e9d60d134a0 100755
--- a/egs/rm/s5/local/nnet2/run_4d2.sh
+++ b/egs/rm/s5/local/nnet2/run_4d2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# 4d2 is as 4d but adding perturbed training with multiplier=1.0
diff --git a/egs/rm/s5/local/nnet2/run_4d3.sh b/egs/rm/s5/local/nnet2/run_4d3.sh
index 3e486acabb4..d204092a7d1 100755
--- a/egs/rm/s5/local/nnet2/run_4d3.sh
+++ b/egs/rm/s5/local/nnet2/run_4d3.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# run_4d3.sh is as run_4d.sh, but using a newer version of the scripts that
diff --git a/egs/rm/s5/local/nnet2/run_4e.sh b/egs/rm/s5/local/nnet2/run_4e.sh
index 425af853bf1..85ba114adda 100755
--- a/egs/rm/s5/local/nnet2/run_4e.sh
+++ b/egs/rm/s5/local/nnet2/run_4e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/rm/s5/local/nnet2/run_4e_gpu.sh b/egs/rm/s5/local/nnet2/run_4e_gpu.sh
index 9fe72669802..df471cca273 100755
--- a/egs/rm/s5/local/nnet2/run_4e_gpu.sh
+++ b/egs/rm/s5/local/nnet2/run_4e_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is GPU based pnorm neural net ensemble training on top of adapted 40-dimensional features.
diff --git a/egs/rm/s5/local/nnet2/run_5c.sh b/egs/rm/s5/local/nnet2/run_5c.sh
index edcf366f0fd..77beac9c4cd 100755
--- a/egs/rm/s5/local/nnet2/run_5c.sh
+++ b/egs/rm/s5/local/nnet2/run_5c.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is neural net training on top of adapted 40-dimensional features.
# This version of the script uses GPUs. We distinguish it by putting "_gpu"
diff --git a/egs/rm/s5/local/nnet2/run_5c_gpu.sh b/egs/rm/s5/local/nnet2/run_5c_gpu.sh
index 219e2cb808e..edabae780f8 100755
--- a/egs/rm/s5/local/nnet2/run_5c_gpu.sh
+++ b/egs/rm/s5/local/nnet2/run_5c_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script demonstrates discriminative training of neural nets.
diff --git a/egs/rm/s5/local/nnet2/run_5d.sh b/egs/rm/s5/local/nnet2/run_5d.sh
index 3617ea0b126..8e3321435ec 100755
--- a/egs/rm/s5/local/nnet2/run_5d.sh
+++ b/egs/rm/s5/local/nnet2/run_5d.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script demonstrates discriminative training of p-norm neural nets.
diff --git a/egs/rm/s5/local/nnet2/run_5d_gpu.sh b/egs/rm/s5/local/nnet2/run_5d_gpu.sh
index f83cd3db20a..e665ca7e3f1 100755
--- a/egs/rm/s5/local/nnet2/run_5d_gpu.sh
+++ b/egs/rm/s5/local/nnet2/run_5d_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script demonstrates discriminative training of p-norm neural nets.
diff --git a/egs/rm/s5/local/nnet2/run_5e_gpu.sh b/egs/rm/s5/local/nnet2/run_5e_gpu.sh
index 37c9fb4238d..9d0f43f9279 100755
--- a/egs/rm/s5/local/nnet2/run_5e_gpu.sh
+++ b/egs/rm/s5/local/nnet2/run_5e_gpu.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script demonstrates discriminative training of ensemble-trained p-norm neural nets.
diff --git a/egs/rm/s5/local/online/run_gmm.sh b/egs/rm/s5/local/online/run_gmm.sh
index 90a2a48437d..b9bf04dfd56 100755
--- a/egs/rm/s5/local/online/run_gmm.sh
+++ b/egs/rm/s5/local/online/run_gmm.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/rm/s5/local/online/run_gmm_pitch.sh b/egs/rm/s5/local/online/run_gmm_pitch.sh
index 65388fda3c2..ad87935d281 100755
--- a/egs/rm/s5/local/online/run_gmm_pitch.sh
+++ b/egs/rm/s5/local/online/run_gmm_pitch.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/rm/s5/local/online/run_nnet2.sh b/egs/rm/s5/local/online/run_nnet2.sh
index 243be25764e..035ab75678f 100755
--- a/egs/rm/s5/local/online/run_nnet2.sh
+++ b/egs/rm/s5/local/online/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/rm/s5/local/online/run_nnet2_baseline.sh b/egs/rm/s5/local/online/run_nnet2_baseline.sh
index de977b29f43..cc4708b6a33 100755
--- a/egs/rm/s5/local/online/run_nnet2_baseline.sh
+++ b/egs/rm/s5/local/online/run_nnet2_baseline.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this is a baseline for ./run_nnet2.sh, without
diff --git a/egs/rm/s5/local/online/run_nnet2_common.sh b/egs/rm/s5/local/online/run_nnet2_common.sh
index dfacfcf9c9f..6a721f93e1a 100755
--- a/egs/rm/s5/local/online/run_nnet2_common.sh
+++ b/egs/rm/s5/local/online/run_nnet2_common.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script extracts mfcc features using mfcc_config and trains ubm model and
# ivector extractor and extracts ivector for train and test.
. ./cmd.sh
diff --git a/egs/rm/s5/local/online/run_nnet2_multisplice.sh b/egs/rm/s5/local/online/run_nnet2_multisplice.sh
index a05d6856a0e..e8bb0762710 100755
--- a/egs/rm/s5/local/online/run_nnet2_multisplice.sh
+++ b/egs/rm/s5/local/online/run_nnet2_multisplice.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/rm/s5/local/online/run_nnet2_multisplice_disc.sh b/egs/rm/s5/local/online/run_nnet2_multisplice_disc.sh
index c1d424ab58b..044e32c0891 100755
--- a/egs/rm/s5/local/online/run_nnet2_multisplice_disc.sh
+++ b/egs/rm/s5/local/online/run_nnet2_multisplice_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is to be run after run_nnet2_multisplice.sh.
# It demonstrates discriminative training for the online-nnet2 models
diff --git a/egs/rm/s5/local/online/run_nnet2_perturbed.sh b/egs/rm/s5/local/online/run_nnet2_perturbed.sh
index 5583ad745ea..cede0620b5a 100755
--- a/egs/rm/s5/local/online/run_nnet2_perturbed.sh
+++ b/egs/rm/s5/local/online/run_nnet2_perturbed.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/rm/s5/local/online/run_nnet2_wsj.sh b/egs/rm/s5/local/online/run_nnet2_wsj.sh
index 5a5b293f790..79bb22866e1 100755
--- a/egs/rm/s5/local/online/run_nnet2_wsj.sh
+++ b/egs/rm/s5/local/online/run_nnet2_wsj.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# note: see the newer, better script run_nnet2_wsj_joint.sh
diff --git a/egs/rm/s5/local/online/run_nnet2_wsj_joint.sh b/egs/rm/s5/local/online/run_nnet2_wsj_joint.sh
index 68a25f49b3a..6e108065a88 100755
--- a/egs/rm/s5/local/online/run_nnet2_wsj_joint.sh
+++ b/egs/rm/s5/local/online/run_nnet2_wsj_joint.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is the latest version of training that combines RM and WSJ, in a setup where
# there are no shared phones (so it's like a multilingual setup).
diff --git a/egs/rm/s5/local/online/run_nnet2_wsj_joint_disc.sh b/egs/rm/s5/local/online/run_nnet2_wsj_joint_disc.sh
index c7d31427e8f..87808e4c4a0 100755
--- a/egs/rm/s5/local/online/run_nnet2_wsj_joint_disc.sh
+++ b/egs/rm/s5/local/online/run_nnet2_wsj_joint_disc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# this script is discriminative training after multi-language training (as
diff --git a/egs/rm/s5/local/prepare_wsj_rm_lang.sh b/egs/rm/s5/local/prepare_wsj_rm_lang.sh
index fd8cb958925..9be949e887b 100755
--- a/egs/rm/s5/local/prepare_wsj_rm_lang.sh
+++ b/egs/rm/s5/local/prepare_wsj_rm_lang.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Pegah Ghahremani
# This script prepares a dictionary for wsj-to-rm transfer learning experiment,
diff --git a/egs/rm/s5/local/rm_data_prep.sh b/egs/rm/s5/local/rm_data_prep.sh
index 9fe759aef4a..4c7fa015f93 100755
--- a/egs/rm/s5/local/rm_data_prep.sh
+++ b/egs/rm/s5/local/rm_data_prep.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
diff --git a/egs/rm/s5/local/rm_prepare_grammar.sh b/egs/rm/s5/local/rm_prepare_grammar.sh
index 20c31b7d208..4255e48eb77 100755
--- a/egs/rm/s5/local/rm_prepare_grammar.sh
+++ b/egs/rm/s5/local/rm_prepare_grammar.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
diff --git a/egs/rm/s5/local/rm_prepare_grammar_ug.sh b/egs/rm/s5/local/rm_prepare_grammar_ug.sh
index 427635caadc..e42efd22aa5 100755
--- a/egs/rm/s5/local/rm_prepare_grammar_ug.sh
+++ b/egs/rm/s5/local/rm_prepare_grammar_ug.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
diff --git a/egs/rm/s5/local/run_dnn_convert_nnet2.sh b/egs/rm/s5/local/run_dnn_convert_nnet2.sh
index 664ecf3f80b..72c3b83bbc2 100755
--- a/egs/rm/s5/local/run_dnn_convert_nnet2.sh
+++ b/egs/rm/s5/local/run_dnn_convert_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This script demonstrates some commands that you could run after run_dnn.sh,
# that relate to conversion to the nnet2 model format.
diff --git a/egs/rm/s5/local/run_nnet2.sh b/egs/rm/s5/local/run_nnet2.sh
index 1d874324856..3605b6dcbac 100755
--- a/egs/rm/s5/local/run_nnet2.sh
+++ b/egs/rm/s5/local/run_nnet2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# You don't have to run all these.
# you can pick and choose. Look at the RESULTS file..
diff --git a/egs/rm/s5/local/run_pitch.sh b/egs/rm/s5/local/run_pitch.sh
index d123fc8901b..cab78b28305 100755
--- a/egs/rm/s5/local/run_pitch.sh
+++ b/egs/rm/s5/local/run_pitch.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is like ../run.sh but with pitch; it's included to demonstrate the
# online-decoding with pitch.
diff --git a/egs/rm/s5/local/run_raw_fmllr.sh b/egs/rm/s5/local/run_raw_fmllr.sh
index e02002aa1d0..2889cf1f5c5 100755
--- a/egs/rm/s5/local/run_raw_fmllr.sh
+++ b/egs/rm/s5/local/run_raw_fmllr.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
diff --git a/egs/rm/s5/local/run_sgmm2.sh b/egs/rm/s5/local/run_sgmm2.sh
index 95a40141892..808a52dc95c 100755
--- a/egs/rm/s5/local/run_sgmm2.sh
+++ b/egs/rm/s5/local/run_sgmm2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is as run_sgmm.sh but using the "sgmm2" code, which uses "state-clustered tied mixtures"
# and the symmetric SGMM, and one or two other small changes (e.g. no updating of M for a few
diff --git a/egs/rm/s5/local/run_sgmm2x.sh b/egs/rm/s5/local/run_sgmm2x.sh
index 00730697693..1c76f57754e 100755
--- a/egs/rm/s5/local/run_sgmm2x.sh
+++ b/egs/rm/s5/local/run_sgmm2x.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This is as run_sgmm2.sh but excluding the "speaker-dependent weights",
# so not doing the symmetric SGMM.
diff --git a/egs/rm/s5/local/run_sgmm_multiling.sh b/egs/rm/s5/local/run_sgmm_multiling.sh
index 42369cd2937..a3e138a64eb 100755
--- a/egs/rm/s5/local/run_sgmm_multiling.sh
+++ b/egs/rm/s5/local/run_sgmm_multiling.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Multilingual setup for SGMMs.
# Caution: this is just a stub, intended to show some others what to do, it
diff --git a/egs/rm/s5/local/run_vtln.sh b/egs/rm/s5/local/run_vtln.sh
index 793829653d2..032884d50a9 100755
--- a/egs/rm/s5/local/run_vtln.sh
+++ b/egs/rm/s5/local/run_vtln.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# This scripts tests the VTLN estimation where the system used to get the
# VTLN warps is based on delta+delta-deltas.
diff --git a/egs/rm/s5/local/run_vtln2.sh b/egs/rm/s5/local/run_vtln2.sh
index b87030d2e3d..5f02aeb1d73 100755
--- a/egs/rm/s5/local/run_vtln2.sh
+++ b/egs/rm/s5/local/run_vtln2.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
featdir=mfcc
diff --git a/egs/rm/s5/local/test_decoders.sh b/egs/rm/s5/local/test_decoders.sh
index 2b1d4172139..d4080351828 100755
--- a/egs/rm/s5/local/test_decoders.sh
+++ b/egs/rm/s5/local/test_decoders.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
dir=exp/tri1/decode/tmp
diff --git a/egs/rm/s5/run.sh b/egs/rm/s5/run.sh
index 61dcaa0e34a..2a8e5add17c 100755
--- a/egs/rm/s5/run.sh
+++ b/egs/rm/s5/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
. ./cmd.sh
set -e # exit on error
diff --git a/egs/sitw/v1/local/make_sitw.sh b/egs/sitw/v1/local/make_sitw.sh
index 7c0bcd0fea1..699b6f7cea2 100755
--- a/egs/sitw/v1/local/make_sitw.sh
+++ b/egs/sitw/v1/local/make_sitw.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyrigh 2017 Ignacio Viñals
# 2017-2018 David Snyder
#
diff --git a/egs/sitw/v1/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/sitw/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
index 480b2cc2fe8..ebf0a2cd21f 100755
--- a/egs/sitw/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
+++ b/egs/sitw/v1/local/nnet3/xvector/prepare_feats_for_egs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
#
# Copied from egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh (commit 3ea534070fd2cccd2e4ee21772132230033022ce).
#
diff --git a/egs/sitw/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/sitw/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
index 892c1ad55bd..54a226e1fc9 100755
--- a/egs/sitw/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
+++ b/egs/sitw/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 David Snyder
# 2017 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2017 Johns Hopkins University (Author: Daniel Povey)
diff --git a/egs/sitw/v1/run.sh b/egs/sitw/v1/run.sh
index 797451df263..592d9f213a0 100755
--- a/egs/sitw/v1/run.sh
+++ b/egs/sitw/v1/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2017 Johns Hopkins University (Author: Daniel Povey)
# 2017-2018 David Snyder
diff --git a/egs/sitw/v2/run.sh b/egs/sitw/v2/run.sh
index aad58e4a853..7f382dc5fd5 100755
--- a/egs/sitw/v2/run.sh
+++ b/egs/sitw/v2/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright 2017 Johns Hopkins University (Author: Daniel Povey)
# 2017 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2018 Ewald Enzinger
diff --git a/egs/snips/README.txt b/egs/snips/README.txt
new file mode 100644
index 00000000000..6eb871140fa
--- /dev/null
+++ b/egs/snips/README.txt
@@ -0,0 +1,15 @@
+
+ The SNIPS dataset is a ~54-hour corpus of wake word corpus covering 3300 speakers.
+ The wake word is "Hey Snips" pronounced with no pause between the two words.
+ It contains a large variety of English accents and recording environments.
+ Negative samples have been recorded in the same conditions than wake-word utterances.
+ To download the dataset you need to follow the instructions on
+ https://github.com/snipsco/keyword-spotting-research-datasets. It is provided
+ by Snips, Paris, France (https://snips.ai)
+
+ The recipe is in v1/
+
+ The E2E LF-MMI recipe does not require any prior alignments for training
+ LF-MMI, making the alignment more flexible during training. It can be optionally
+ followed by a regular LF-MMI training to further improve the performance.
+
diff --git a/egs/snips/v1/cmd.sh b/egs/snips/v1/cmd.sh
new file mode 100644
index 00000000000..fc5d4aa9e1c
--- /dev/null
+++ b/egs/snips/v1/cmd.sh
@@ -0,0 +1,24 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine). queue.pl works with GridEngine (qsub). slurm.pl works
+# with slurm. Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration. Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl"
+export decode_cmd="queue.pl --mem 4G"
+# the use of cuda_cmd is deprecated, used only in 'nnet1',
+export cuda_cmd="queue.pl --gpu 1"
+
+if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then
+ queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+ export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+ export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+ export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
+
diff --git a/egs/snips/v1/conf/mfcc.conf b/egs/snips/v1/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/snips/v1/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false # only non-default option.
diff --git a/egs/snips/v1/conf/mfcc_hires.conf b/egs/snips/v1/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..d96b86ddfcb
--- /dev/null
+++ b/egs/snips/v1/conf/mfcc_hires.conf
@@ -0,0 +1,9 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false # use average of log energy, not energy.
+--num-mel-bins=40 # similar to Google's setup.
+--num-ceps=40 # there is no dimensionality reduction.
+--low-freq=20 # low cutoff frequency for mel bins
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
diff --git a/egs/snips/v1/conf/online_cmvn.conf b/egs/snips/v1/conf/online_cmvn.conf
new file mode 100644
index 00000000000..a173510e433
--- /dev/null
+++ b/egs/snips/v1/conf/online_cmvn.conf
@@ -0,0 +1,3 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
+--norm-means=true
+--norm-vars=false
diff --git a/egs/snips/v1/local/add_prefix_to_scp.py b/egs/snips/v1/local/add_prefix_to_scp.py
new file mode 120000
index 00000000000..b6750c78e16
--- /dev/null
+++ b/egs/snips/v1/local/add_prefix_to_scp.py
@@ -0,0 +1 @@
+../../../../scripts/wakeword/add_prefix_to_scp.py
\ No newline at end of file
diff --git a/egs/snips/v1/local/chain/build_tree.sh b/egs/snips/v1/local/chain/build_tree.sh
new file mode 120000
index 00000000000..fb4d74cc9ae
--- /dev/null
+++ b/egs/snips/v1/local/chain/build_tree.sh
@@ -0,0 +1 @@
+../../../../mobvoi/v1/local/chain/build_tree.sh
\ No newline at end of file
diff --git a/egs/snips/v1/local/chain/run_e2e_tdnn.sh b/egs/snips/v1/local/chain/run_e2e_tdnn.sh
new file mode 120000
index 00000000000..891eec02423
--- /dev/null
+++ b/egs/snips/v1/local/chain/run_e2e_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/snips/v1/local/chain/run_tdnn.sh b/egs/snips/v1/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/snips/v1/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/snips/v1/local/chain/run_tdnn_e2eali.sh b/egs/snips/v1/local/chain/run_tdnn_e2eali.sh
new file mode 120000
index 00000000000..38f0bd07e6c
--- /dev/null
+++ b/egs/snips/v1/local/chain/run_tdnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_e2eali_1a.sh
\ No newline at end of file
diff --git a/egs/snips/v1/local/chain/tuning/run_e2e_tdnn_1a.sh b/egs/snips/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
new file mode 100755
index 00000000000..4085f923d2e
--- /dev/null
+++ b/egs/snips/v1/local/chain/tuning/run_e2e_tdnn_1a.sh
@@ -0,0 +1,239 @@
+#!/bin/bash
+# Copyright 2018-2020 Daniel Povey
+# 2018-2020 Yiming Wang
+
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+affix=1a
+remove_egs=false
+xent_regularize=0.1
+online_cmvn=true
+
+# training options
+srand=0
+num_epochs=3
+num_jobs_initial=2
+num_jobs_final=5
+minibatch_size=150=128,64/300=100,64,32/600=50,32,16/1200=16,8
+common_egs_dir=
+dim=80
+bn_dim=20
+frames_per_iter=3000000
+bs_scale=0.0
+train_set=train_shorter_combined_spe2e
+test_sets="dev eval"
+wake_word="HeySnips"
+
+# End configuration section.
+echo "$0 $@" # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+ cat <$lang/topo
+ fi
+fi
+
+if [ $stage -le 1 ]; then
+ echo "$0: Creating an unnormalized phone language model for the denominator graph..."
+ mkdir -p $tree_dir
+ id_sil=`cat data/lang/phones.txt | grep "SIL" | awk '{print $2}'`
+ id_word=`cat data/lang/phones.txt | grep "heysnips" | awk '{print $2}'`
+ id_freetext=`cat data/lang/phones.txt | grep "freetext" | awk '{print $2}'`
+ cat < $tree_dir/phone_lm.txt
+0 1 $id_sil $id_sil
+0 5 $id_sil $id_sil
+1 2 $id_word $id_word
+2 3 $id_sil $id_sil
+1 4 $id_freetext $id_freetext
+4 5 $id_sil $id_sil
+3 2.09
+5 0.0
+EOF
+ fstcompile $tree_dir/phone_lm.txt $tree_dir/phone_lm.fst
+ fstdeterminizestar $tree_dir/phone_lm.fst $tree_dir/phone_lm.fst.tmp
+ mv $tree_dir/phone_lm.fst.tmp $tree_dir/phone_lm.fst
+ steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$train_cmd" \
+ data/${train_set}_hires $lang $tree_dir
+fi
+
+if [ $stage -le 2 ]; then
+ echo "$0: creating neural net configs using the xconfig parser";
+ num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+ learning_rate_factor=$(python3 -c "print(0.5/$xent_regularize)")
+ affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+ tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+ linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+ prefinal_opts="l2-regularize=0.01"
+ output_opts="l2-regularize=0.002"
+
+ mkdir -p $dir/configs
+ cat <