From 4ff8e61e4c5dfbb5bc5256614512b3ffcb2d4d76 Mon Sep 17 00:00:00 2001 From: Shinji Watanabe Date: Thu, 28 Dec 2017 13:04:16 -0500 Subject: [PATCH 1/3] prepare chime4 1ch, 2ch, and 6ch track recipes --- egs/chime4/{asr1_1ch => asr1}/RESULTS | 0 egs/chime4/{asr1_1ch => asr1}/cmd.sh | 0 egs/chime4/{asr1_1ch => asr1}/conf/fbank.conf | 0 egs/chime4/{asr1_1ch => asr1}/conf/gpu.conf | 0 egs/chime4/{asr1_1ch => asr1}/conf/pitch.conf | 0 .../local/clean_chime4_format_data.sh | 0 .../local/clean_wsj0_data_prep.sh | 0 .../local/cstr_ndx2flist.pl | 0 .../local/find_noisy_transcripts.pl | 0 .../local/find_transcripts.pl | 0 .../{asr1_1ch => asr1}/local/flist2scp.pl | 0 .../{asr1_1ch => asr1}/local/make_stft.sh | 0 .../local/normalize_transcript.pl | 0 .../local/real_enhan_chime4_data_prep.sh | 0 .../local/real_noisy_chime4_data_prep.sh | 0 .../asr1/local/run_beamform_2ch_track.sh | 83 +++++++++++++++ .../asr1/local/run_beamform_6ch_track.sh | 100 ++++++++++++++++++ .../local/simu_enhan_chime4_data_prep.sh | 0 .../local/simu_noisy_chime4_data_prep.sh | 0 egs/chime4/{asr1_1ch => asr1}/path.sh | 0 egs/chime4/{asr1_1ch => asr1}/run.sh | 41 ++++--- egs/chime4/{asr1_1ch => asr1}/steps | 0 egs/chime4/{asr1_1ch => asr1}/utils | 0 23 files changed, 209 insertions(+), 15 deletions(-) rename egs/chime4/{asr1_1ch => asr1}/RESULTS (100%) rename egs/chime4/{asr1_1ch => asr1}/cmd.sh (100%) rename egs/chime4/{asr1_1ch => asr1}/conf/fbank.conf (100%) rename egs/chime4/{asr1_1ch => asr1}/conf/gpu.conf (100%) rename egs/chime4/{asr1_1ch => asr1}/conf/pitch.conf (100%) rename egs/chime4/{asr1_1ch => asr1}/local/clean_chime4_format_data.sh (100%) rename egs/chime4/{asr1_1ch => asr1}/local/clean_wsj0_data_prep.sh (100%) rename egs/chime4/{asr1_1ch => asr1}/local/cstr_ndx2flist.pl (100%) rename egs/chime4/{asr1_1ch => asr1}/local/find_noisy_transcripts.pl (100%) rename egs/chime4/{asr1_1ch => asr1}/local/find_transcripts.pl (100%) rename egs/chime4/{asr1_1ch => asr1}/local/flist2scp.pl (100%) rename egs/chime4/{asr1_1ch => asr1}/local/make_stft.sh (100%) rename egs/chime4/{asr1_1ch => asr1}/local/normalize_transcript.pl (100%) rename egs/chime4/{asr1_1ch => asr1}/local/real_enhan_chime4_data_prep.sh (100%) rename egs/chime4/{asr1_1ch => asr1}/local/real_noisy_chime4_data_prep.sh (100%) create mode 100755 egs/chime4/asr1/local/run_beamform_2ch_track.sh create mode 100755 egs/chime4/asr1/local/run_beamform_6ch_track.sh rename egs/chime4/{asr1_1ch => asr1}/local/simu_enhan_chime4_data_prep.sh (100%) rename egs/chime4/{asr1_1ch => asr1}/local/simu_noisy_chime4_data_prep.sh (100%) rename egs/chime4/{asr1_1ch => asr1}/path.sh (100%) rename egs/chime4/{asr1_1ch => asr1}/run.sh (79%) rename egs/chime4/{asr1_1ch => asr1}/steps (100%) rename egs/chime4/{asr1_1ch => asr1}/utils (100%) diff --git a/egs/chime4/asr1_1ch/RESULTS b/egs/chime4/asr1/RESULTS similarity index 100% rename from egs/chime4/asr1_1ch/RESULTS rename to egs/chime4/asr1/RESULTS diff --git a/egs/chime4/asr1_1ch/cmd.sh b/egs/chime4/asr1/cmd.sh similarity index 100% rename from egs/chime4/asr1_1ch/cmd.sh rename to egs/chime4/asr1/cmd.sh diff --git a/egs/chime4/asr1_1ch/conf/fbank.conf b/egs/chime4/asr1/conf/fbank.conf similarity index 100% rename from egs/chime4/asr1_1ch/conf/fbank.conf rename to egs/chime4/asr1/conf/fbank.conf diff --git a/egs/chime4/asr1_1ch/conf/gpu.conf b/egs/chime4/asr1/conf/gpu.conf similarity index 100% rename from egs/chime4/asr1_1ch/conf/gpu.conf rename to egs/chime4/asr1/conf/gpu.conf diff --git a/egs/chime4/asr1_1ch/conf/pitch.conf b/egs/chime4/asr1/conf/pitch.conf similarity index 100% rename from egs/chime4/asr1_1ch/conf/pitch.conf rename to egs/chime4/asr1/conf/pitch.conf diff --git a/egs/chime4/asr1_1ch/local/clean_chime4_format_data.sh b/egs/chime4/asr1/local/clean_chime4_format_data.sh similarity index 100% rename from egs/chime4/asr1_1ch/local/clean_chime4_format_data.sh rename to egs/chime4/asr1/local/clean_chime4_format_data.sh diff --git a/egs/chime4/asr1_1ch/local/clean_wsj0_data_prep.sh b/egs/chime4/asr1/local/clean_wsj0_data_prep.sh similarity index 100% rename from egs/chime4/asr1_1ch/local/clean_wsj0_data_prep.sh rename to egs/chime4/asr1/local/clean_wsj0_data_prep.sh diff --git a/egs/chime4/asr1_1ch/local/cstr_ndx2flist.pl b/egs/chime4/asr1/local/cstr_ndx2flist.pl similarity index 100% rename from egs/chime4/asr1_1ch/local/cstr_ndx2flist.pl rename to egs/chime4/asr1/local/cstr_ndx2flist.pl diff --git a/egs/chime4/asr1_1ch/local/find_noisy_transcripts.pl b/egs/chime4/asr1/local/find_noisy_transcripts.pl similarity index 100% rename from egs/chime4/asr1_1ch/local/find_noisy_transcripts.pl rename to egs/chime4/asr1/local/find_noisy_transcripts.pl diff --git a/egs/chime4/asr1_1ch/local/find_transcripts.pl b/egs/chime4/asr1/local/find_transcripts.pl similarity index 100% rename from egs/chime4/asr1_1ch/local/find_transcripts.pl rename to egs/chime4/asr1/local/find_transcripts.pl diff --git a/egs/chime4/asr1_1ch/local/flist2scp.pl b/egs/chime4/asr1/local/flist2scp.pl similarity index 100% rename from egs/chime4/asr1_1ch/local/flist2scp.pl rename to egs/chime4/asr1/local/flist2scp.pl diff --git a/egs/chime4/asr1_1ch/local/make_stft.sh b/egs/chime4/asr1/local/make_stft.sh similarity index 100% rename from egs/chime4/asr1_1ch/local/make_stft.sh rename to egs/chime4/asr1/local/make_stft.sh diff --git a/egs/chime4/asr1_1ch/local/normalize_transcript.pl b/egs/chime4/asr1/local/normalize_transcript.pl similarity index 100% rename from egs/chime4/asr1_1ch/local/normalize_transcript.pl rename to egs/chime4/asr1/local/normalize_transcript.pl diff --git a/egs/chime4/asr1_1ch/local/real_enhan_chime4_data_prep.sh b/egs/chime4/asr1/local/real_enhan_chime4_data_prep.sh similarity index 100% rename from egs/chime4/asr1_1ch/local/real_enhan_chime4_data_prep.sh rename to egs/chime4/asr1/local/real_enhan_chime4_data_prep.sh diff --git a/egs/chime4/asr1_1ch/local/real_noisy_chime4_data_prep.sh b/egs/chime4/asr1/local/real_noisy_chime4_data_prep.sh similarity index 100% rename from egs/chime4/asr1_1ch/local/real_noisy_chime4_data_prep.sh rename to egs/chime4/asr1/local/real_noisy_chime4_data_prep.sh diff --git a/egs/chime4/asr1/local/run_beamform_2ch_track.sh b/egs/chime4/asr1/local/run_beamform_2ch_track.sh new file mode 100755 index 00000000000..a974adbcad6 --- /dev/null +++ b/egs/chime4/asr1/local/run_beamform_2ch_track.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe) + +. ./cmd.sh +. ./path.sh + +# Config: +nj=10 +cmd=run.pl + +. utils/parse_options.sh || exit 1; + +if [ $# != 2 ]; then + echo "Wrong #arguments ($#, expected 3)" + echo "Usage: local/run_beamform_2ch_track.sh [options] " + echo "main options (for others, see top of script file)" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + exit 1; +fi + +sdir=$1 +odir=$2 + +wdir=data/beamforming_2ch_track + +if [ -z $BEAMFORMIT ] ; then + export BEAMFORMIT=$KALDI_ROOT/tools/extras/BeamformIt +fi +export PATH=${PATH}:$BEAMFORMIT +! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/kaldi/tools; extras/install_beamformit.sh; cd -;'" && exit 1 + +# Set bash to 'debug' mode, it will exit on : +# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', +set -e +set -u +set -o pipefail + +mkdir -p $odir +mkdir -p $wdir/log + +allwavs=`find $sdir/ | grep "\.wav" | tr ' ' '\n' | awk -F '/' '{print $(NF-1)"/"$NF}'` + +# wavfiles.list can be used as the name of the output files +output_wavfiles=$wdir/wavfiles.list +echo $allwavs | tr ' ' '\n' | awk -F '.' '{print $1}' | sort | uniq > $output_wavfiles + +# channel list +input_arrays=$wdir/channels +echo $allwavs | tr ' ' '\n' | sort | awk 'NR%2==1' > $wdir/channels.1st +echo $allwavs | tr ' ' '\n' | sort | awk 'NR%2==0' > $wdir/channels.2nd +paste -d" " $output_wavfiles $wdir/channels.1st $wdir/channels.2nd > $input_arrays + +# split the list for parallel processing +split_wavfiles="" +for n in `seq $nj`; do + split_wavfiles="$split_wavfiles $output_wavfiles.$n" +done +utils/split_scp.pl $output_wavfiles $split_wavfiles || exit 1; + +echo -e "Beamforming\n" +# making a shell script for each job +for n in `seq $nj`; do +cat << EOF > $wdir/log/beamform.$n.sh +while read line; do + $BEAMFORMIT/BeamformIt -s \$line -c $input_arrays \ + --config_file `pwd`/conf/chime4.cfg \ + --source_dir $sdir \ + --result_dir $odir +done < $output_wavfiles.$n +EOF +done +# making a subdirectory for the output wav files +for x in `awk -F '/' '{print $1}' $output_wavfiles | sort | uniq`; do + mkdir -p $odir/$x +done + +chmod a+x $wdir/log/beamform.*.sh +$cmd JOB=1:$nj $wdir/log/beamform.JOB.log \ + $wdir/log/beamform.JOB.sh + +echo "`basename $0` Done." diff --git a/egs/chime4/asr1/local/run_beamform_6ch_track.sh b/egs/chime4/asr1/local/run_beamform_6ch_track.sh new file mode 100755 index 00000000000..5ce5e423391 --- /dev/null +++ b/egs/chime4/asr1/local/run_beamform_6ch_track.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Shinji Watanabe) + +. ./cmd.sh +. ./path.sh + +# Config: +nj=10 +cmd=run.pl +bmf="1 3 4 5 6" +eval_flag=true # make it true when the evaluation data are released + +. utils/parse_options.sh || exit 1; + +if [ $# != 2 ]; then + echo "Wrong #arguments ($#, expected 2)" + echo "Usage: local/run_beamform_6ch_track.sh [options] " + echo "main options (for others, see top of script file)" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + echo " --bmf \"1 3 4 5 6\" # microphones used for beamforming (2th mic is omitted in default)" + exit 1; +fi + +sdir=$1 +odir=$2 +wdir=data/beamforming_`echo $bmf | tr ' ' '_'` + +if [ -z $BEAMFORMIT ] ; then + export BEAMFORMIT=$KALDI_ROOT/tools/extras/BeamformIt +fi +export PATH=${PATH}:$BEAMFORMIT +! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/kaldi/tools; extras/install_beamformit.sh; cd -;'" && exit 1 + +# Set bash to 'debug' mode, it will exit on : +# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', +set -e +set -u +set -o pipefail + +mkdir -p $odir +mkdir -p $wdir/log + +echo "Will use the following channels: $bmf" +# number of channels +numch=`echo $bmf | tr ' ' '\n' | wc -l` +echo "the number of channels: $numch" + +# wavfiles.list can be used as the name of the output files +# we only process dev and eval waves +output_wavfiles=$wdir/wavfiles.list +if $eval_flag; then + find $sdir/{dt,et}*{simu,real}/ | grep CH1.wav \ + | awk -F '/' '{print $(NF-1) "/" $NF}' | sed -e "s/\.CH1\.wav//" | sort > $output_wavfiles +else + find $sdir/dt*{simu,real}/ | grep CH1.wav \ + | awk -F '/' '{print $(NF-1) "/" $NF}' | sed -e "s/\.CH1\.wav//" | sort > $output_wavfiles +fi + +# this is an input file list of the microphones +# format: 1st_wav 2nd_wav ... nth_wav +input_arrays=$wdir/channels_$numch +for x in `cat $output_wavfiles`; do + echo -n "$x" + for ch in $bmf; do + echo -n " $x.CH$ch.wav" + done + echo "" +done > $input_arrays + +# split the list for parallel processing +split_wavfiles="" +for n in `seq $nj`; do + split_wavfiles="$split_wavfiles $output_wavfiles.$n" +done +utils/split_scp.pl $output_wavfiles $split_wavfiles || exit 1; + +echo -e "Beamforming\n" +# making a shell script for each job +for n in `seq $nj`; do +cat << EOF > $wdir/log/beamform.$n.sh +while read line; do + $BEAMFORMIT/BeamformIt -s \$line -c $input_arrays \ + --config_file `pwd`/conf/chime4.cfg \ + --source_dir $sdir \ + --result_dir $odir +done < $output_wavfiles.$n +EOF +done +# making a subdirectory for the output wav files +for x in `awk -F '/' '{print $1}' $output_wavfiles | sort | uniq`; do + mkdir -p $odir/$x +done + +chmod a+x $wdir/log/beamform.*.sh +$cmd JOB=1:$nj $wdir/log/beamform.JOB.log \ + $wdir/log/beamform.JOB.sh + +echo "`basename $0` Done." diff --git a/egs/chime4/asr1_1ch/local/simu_enhan_chime4_data_prep.sh b/egs/chime4/asr1/local/simu_enhan_chime4_data_prep.sh similarity index 100% rename from egs/chime4/asr1_1ch/local/simu_enhan_chime4_data_prep.sh rename to egs/chime4/asr1/local/simu_enhan_chime4_data_prep.sh diff --git a/egs/chime4/asr1_1ch/local/simu_noisy_chime4_data_prep.sh b/egs/chime4/asr1/local/simu_noisy_chime4_data_prep.sh similarity index 100% rename from egs/chime4/asr1_1ch/local/simu_noisy_chime4_data_prep.sh rename to egs/chime4/asr1/local/simu_noisy_chime4_data_prep.sh diff --git a/egs/chime4/asr1_1ch/path.sh b/egs/chime4/asr1/path.sh similarity index 100% rename from egs/chime4/asr1_1ch/path.sh rename to egs/chime4/asr1/path.sh diff --git a/egs/chime4/asr1_1ch/run.sh b/egs/chime4/asr1/run.sh similarity index 79% rename from egs/chime4/asr1_1ch/run.sh rename to egs/chime4/asr1/run.sh index b9e8b6ad6e9..ab7b0437f13 100755 --- a/egs/chime4/asr1_1ch/run.sh +++ b/egs/chime4/asr1/run.sh @@ -54,8 +54,6 @@ recog_model=acc.best # set a model to be used for decoding: 'acc.best' or 'loss. # data chime4_data=/export/corpora4/CHiME4/CHiME3 # JHU setup -enhan=isolated_1ch_track -enhan_data=$chime4_data/data/audio/16kHz/$enhan # exp tag tag="" # tag for managing experiments. @@ -72,21 +70,35 @@ set -u set -o pipefail train_set=tr05_multi_noisy -train_dev=dt05_multi_${enhan} -recog_set="dt05_multi_${enhan} et05_multi_${enhan}" +train_dev=dt05_multi_isolated_1ch_track +recog_set="\ +dt05_real_isolated_1ch_track dt05_simu_isolated_1ch_track et05_real_isolated_1ch_track et05_simu_isolated_1ch_track \ +dt05_real_beamformit_2mics dt05_simu_beamformit_2mics et05_real_beamformit_2mics et05_simu_beamformit_2mics \ +dt05_real_beamformit_5mics dt05_simu_beamformit_5mics et05_real_beamformit_5mics et05_simu_beamformit_5mics \ +" if [ ${stage} -le 0 ]; then - ### Task dependent. You have to make data the following preparation part by yourself. + ### Task dependent. You have to make the following data preparation part by yourself. ### But you can utilize Kaldi recipes in most cases - echo "stage 0: Data preparation" + echo "stage 0: Data preparation" wsj0_data=${chime4_data}/data/WSJ0 local/clean_wsj0_data_prep.sh ${wsj0_data} local/clean_chime4_format_data.sh + echo "beamforming for multichannel cases" + local/run_beamform_2ch_track.sh --cmd "${train_cmd}" --nj 20 ${chime4_data}/data/audio/16kHz/isolated_2ch_track enhan/beamformit_2mics + local/run_beamform_6ch_track.sh --cmd "${train_cmd}" --nj 20 ${chime4_data}/data/audio/16kHz/isolated_6ch_track enhan/beamformit_5mics echo "prepartion for chime4 data" local/real_noisy_chime4_data_prep.sh ${chime4_data} local/simu_noisy_chime4_data_prep.sh ${chime4_data} - local/real_enhan_chime4_data_prep.sh ${enhan} ${enhan_data} - local/simu_enhan_chime4_data_prep.sh ${enhan} ${enhan_data} + echo "test data for 1ch track" + local/real_enhan_chime4_data_prep.sh isolated_1ch_track ${chime4_data}/data/audio/16kHz/isolated_1ch_track + local/simu_enhan_chime4_data_prep.sh isolated_1ch_track ${chime4_data}/data/audio/16kHz/isolated_1ch_track + echo "test data for 2ch track" + local/real_enhan_chime4_data_prep.sh beamformit_2mics enhan/beamformit_2mics + local/simu_enhan_chime4_data_prep.sh beamformit_2mics enhan/beamformit_2mics + echo "test data for 6ch track" + local/real_enhan_chime4_data_prep.sh beamformit_5mics enhan/beamformit_5mics + local/simu_enhan_chime4_data_prep.sh beamformit_5mics enhan/beamformit_5mics fi feat_tr_dir=${dumpdir}/${train_set}/delta${do_delta}; mkdir -p ${feat_tr_dir} @@ -94,10 +106,10 @@ feat_dt_dir=${dumpdir}/${train_dev}/delta${do_delta}; mkdir -p ${feat_dt_dir} if [ ${stage} -le 1 ]; then ### Task dependent. You have to design training and dev sets by yourself. ### But you can utilize Kaldi recipes in most cases - echo "stage 1: Feature Generation" + echo "stage 1: Feature Generation" # Generate the fbank features; by default 80-dimensional fbanks with pitch on each frame fbankdir=fbank - tasks="tr05_real_noisy tr05_simu_noisy dt05_real_${enhan} dt05_simu_${enhan} et05_real_${enhan} et05_simu_${enhan}" + tasks="tr05_real_noisy tr05_simu_noisy ${recog_set}" for x in ${tasks}; do utils/copy_data_dir.sh data/${x} data-fbank/${x} utils/copy_data_dir.sh data/${x} data-stft/${x} @@ -105,9 +117,8 @@ if [ ${stage} -le 1 ]; then done echo "combine real and simulation data" - utils/combine_data.sh data-fbank/tr05_multi_noisy data-fbank/tr05_simu_noisy data-fbank/tr05_real_noisy - utils/combine_data.sh data-fbank/dt05_multi_${enhan} data-fbank/dt05_simu_${enhan} data-fbank/dt05_real_${enhan} - utils/combine_data.sh data-fbank/et05_multi_${enhan} data-fbank/et05_simu_${enhan} data-fbank/et05_real_${enhan} + utils/combine_data.sh data-fbank/${train_set} data-fbank/tr05_simu_noisy data-fbank/tr05_real_noisy + utils/combine_data.sh data-fbank/${train_dev} data-fbank/dt05_simu_isolated_1ch_track data-fbank/dt05_real_isolated_1ch_track # compute global CMVN compute-cmvn-stats scp:data-fbank/${train_set}/feats.scp data-fbank/${train_set}/cmvn.ark @@ -115,12 +126,12 @@ if [ ${stage} -le 1 ]; then # dump features for training if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_tr_dir}/storage ]; then utils/create_split_dir.pl \ - /export/b{10,11,12,13}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_set}/delta${do_delta}/storage \ + /export/b{14,15,16,17}/${USER}/espnet-data/egs/chime4/asr1/dump/${train_set}/delta${do_delta}/storage \ ${feat_tr_dir}/storage fi if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_dt_dir}/storage ]; then utils/create_split_dir.pl \ - /export/b{10,11,12,13}/${USER}/espnet-data/egs/voxforge/asr1/dump/${train_dev}/delta${do_delta}/storage \ + /export/b{14,15,16,17}/${USER}/espnet-data/egs/chime4/asr1/dump/${train_dev}/delta${do_delta}/storage \ ${feat_dt_dir}/storage fi dump.sh --cmd "$train_cmd" --nj 32 --do_delta $do_delta \ diff --git a/egs/chime4/asr1_1ch/steps b/egs/chime4/asr1/steps similarity index 100% rename from egs/chime4/asr1_1ch/steps rename to egs/chime4/asr1/steps diff --git a/egs/chime4/asr1_1ch/utils b/egs/chime4/asr1/utils similarity index 100% rename from egs/chime4/asr1_1ch/utils rename to egs/chime4/asr1/utils From f8a5ad42be214bed266fbf1aa83b9bc32216c081 Mon Sep 17 00:00:00 2001 From: Shinji Watanabe Date: Thu, 28 Dec 2017 14:42:16 -0500 Subject: [PATCH 2/3] fix bugs --- egs/chime4/asr1/conf/chime4.cfg | 50 +++++++++++++++++++ egs/chime4/asr1/local/clean_wsj0_data_prep.sh | 3 +- .../asr1/local/run_beamform_2ch_track.sh | 2 +- .../asr1/local/run_beamform_6ch_track.sh | 2 +- egs/chime4/asr1/run.sh | 6 ++- 5 files changed, 57 insertions(+), 6 deletions(-) create mode 100755 egs/chime4/asr1/conf/chime4.cfg diff --git a/egs/chime4/asr1/conf/chime4.cfg b/egs/chime4/asr1/conf/chime4.cfg new file mode 100755 index 00000000000..70fdd858651 --- /dev/null +++ b/egs/chime4/asr1/conf/chime4.cfg @@ -0,0 +1,50 @@ +#BeamformIt sample configuration file for AMI data (http://groups.inf.ed.ac.uk/ami/download/) + +# scrolling size to compute the delays +scroll_size = 250 + +# cross correlation computation window size +window_size = 500 + +#amount of maximum points for the xcorrelation taken into account +nbest_amount = 4 + +#flag wether to apply an automatic noise thresholding +do_noise_threshold = 1 + +#Percentage of frames with lower xcorr taken as noisy +noise_percent = 10 + +######## acoustic modelling parameters + +#transition probabilities weight for multichannel decoding +trans_weight_multi = 25 +trans_weight_nbest = 25 + +### + +#flag wether to print the feaures after setting them, or not +print_features = 1 + +#flag wether to use the bad frames in the sum process +do_avoid_bad_frames = 1 + +#flag to use the best channel (SNR) as a reference +#defined from command line +do_compute_reference = 1 + +#flag wether to use a uem file or not(process all the file) +do_use_uem_file = 0 + +#flag wether to use an adaptative weights scheme or fixed weights +do_adapt_weights = 1 + +#flag wether to output the sph files or just run the system to create the auxiliary files +do_write_sph_files = 1 + +####directories where to store/retrieve info#### +#channels_file = ./cfg-files/channels + +#show needs to be passed as argument normally, here a default one is given just in case +#show_id = Ttmp + diff --git a/egs/chime4/asr1/local/clean_wsj0_data_prep.sh b/egs/chime4/asr1/local/clean_wsj0_data_prep.sh index d1d15568de1..170b77612d4 100755 --- a/egs/chime4/asr1/local/clean_wsj0_data_prep.sh +++ b/egs/chime4/asr1/local/clean_wsj0_data_prep.sh @@ -25,8 +25,7 @@ dir=`pwd`/data/local/data local=`pwd`/local utils=`pwd`/utils -. ./path.sh # Needed for KALDI_ROOT -sph2pipe=${KALDI_ROOT}/tools/sph2pipe_v2.5/sph2pipe +sph2pipe=../../../tools/kaldi/tools/sph2pipe_v2.5/sph2pipe if [ ! -x ${sph2pipe} ]; then echo "Could not find (or execute) the sph2pipe program at ${sph2pipe}"; exit 1; diff --git a/egs/chime4/asr1/local/run_beamform_2ch_track.sh b/egs/chime4/asr1/local/run_beamform_2ch_track.sh index a974adbcad6..2d03178a6e0 100755 --- a/egs/chime4/asr1/local/run_beamform_2ch_track.sh +++ b/egs/chime4/asr1/local/run_beamform_2ch_track.sh @@ -26,7 +26,7 @@ odir=$2 wdir=data/beamforming_2ch_track if [ -z $BEAMFORMIT ] ; then - export BEAMFORMIT=$KALDI_ROOT/tools/extras/BeamformIt + export BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt fi export PATH=${PATH}:$BEAMFORMIT ! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/kaldi/tools; extras/install_beamformit.sh; cd -;'" && exit 1 diff --git a/egs/chime4/asr1/local/run_beamform_6ch_track.sh b/egs/chime4/asr1/local/run_beamform_6ch_track.sh index 5ce5e423391..6a6f398ca6b 100755 --- a/egs/chime4/asr1/local/run_beamform_6ch_track.sh +++ b/egs/chime4/asr1/local/run_beamform_6ch_track.sh @@ -28,7 +28,7 @@ odir=$2 wdir=data/beamforming_`echo $bmf | tr ' ' '_'` if [ -z $BEAMFORMIT ] ; then - export BEAMFORMIT=$KALDI_ROOT/tools/extras/BeamformIt + export BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt fi export PATH=${PATH}:$BEAMFORMIT ! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/kaldi/tools; extras/install_beamformit.sh; cd -;'" && exit 1 diff --git a/egs/chime4/asr1/run.sh b/egs/chime4/asr1/run.sh index ab7b0437f13..bad4a461d3a 100755 --- a/egs/chime4/asr1/run.sh +++ b/egs/chime4/asr1/run.sh @@ -85,8 +85,10 @@ if [ ${stage} -le 0 ]; then local/clean_wsj0_data_prep.sh ${wsj0_data} local/clean_chime4_format_data.sh echo "beamforming for multichannel cases" - local/run_beamform_2ch_track.sh --cmd "${train_cmd}" --nj 20 ${chime4_data}/data/audio/16kHz/isolated_2ch_track enhan/beamformit_2mics - local/run_beamform_6ch_track.sh --cmd "${train_cmd}" --nj 20 ${chime4_data}/data/audio/16kHz/isolated_6ch_track enhan/beamformit_5mics + local/run_beamform_2ch_track.sh --cmd "${train_cmd}" --nj 20 \ + ${chime4_data}/data/audio/16kHz/isolated_2ch_track enhan/beamformit_2mics + local/run_beamform_6ch_track.sh --cmd "${train_cmd}" --nj 20 \ + ${chime4_data}/data/audio/16kHz/isolated_6ch_track enhan/beamformit_5mics echo "prepartion for chime4 data" local/real_noisy_chime4_data_prep.sh ${chime4_data} local/simu_noisy_chime4_data_prep.sh ${chime4_data} From 186ab03272368c39a4035d229c02073f1f1eba0d Mon Sep 17 00:00:00 2001 From: Shinji Watanabe Date: Thu, 28 Dec 2017 18:00:37 -0500 Subject: [PATCH 3/3] fix chime4 recipe --- egs/chime4/asr1/local/real_enhan_chime4_data_prep.sh | 8 -------- egs/chime4/asr1/local/real_noisy_chime4_data_prep.sh | 2 -- egs/chime4/asr1/local/simu_enhan_chime4_data_prep.sh | 8 -------- egs/chime4/asr1/local/simu_noisy_chime4_data_prep.sh | 2 -- egs/chime4/asr1/run.sh | 8 ++++---- 5 files changed, 4 insertions(+), 24 deletions(-) diff --git a/egs/chime4/asr1/local/real_enhan_chime4_data_prep.sh b/egs/chime4/asr1/local/real_enhan_chime4_data_prep.sh index 7d4f9c892a8..cb25370f1a5 100755 --- a/egs/chime4/asr1/local/real_enhan_chime4_data_prep.sh +++ b/egs/chime4/asr1/local/real_enhan_chime4_data_prep.sh @@ -33,14 +33,6 @@ local=`pwd`/local utils=`pwd`/utils odir=`pwd`/data -. ./path.sh # Needed for KALDI_ROOT -export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin -sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe -if [ ! -x $sph2pipe ]; then - echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; - exit 1; -fi - if $eval_flag; then list_set="tr05_real_$enhan dt05_real_$enhan et05_real_$enhan" else diff --git a/egs/chime4/asr1/local/real_noisy_chime4_data_prep.sh b/egs/chime4/asr1/local/real_noisy_chime4_data_prep.sh index 6efe151949c..98a8a22a556 100755 --- a/egs/chime4/asr1/local/real_noisy_chime4_data_prep.sh +++ b/egs/chime4/asr1/local/real_noisy_chime4_data_prep.sh @@ -37,8 +37,6 @@ dir=`pwd`/data/local/data local=`pwd`/local utils=`pwd`/utils -. ./path.sh # Needed for KALDI_ROOT - if ${eval_flag}; then list_set="tr05_real_noisy dt05_real_noisy et05_real_noisy" else diff --git a/egs/chime4/asr1/local/simu_enhan_chime4_data_prep.sh b/egs/chime4/asr1/local/simu_enhan_chime4_data_prep.sh index d6419fa90b9..027dd6630e2 100755 --- a/egs/chime4/asr1/local/simu_enhan_chime4_data_prep.sh +++ b/egs/chime4/asr1/local/simu_enhan_chime4_data_prep.sh @@ -33,14 +33,6 @@ local=`pwd`/local utils=`pwd`/utils odir=`pwd`/data -. ./path.sh # Needed for KALDI_ROOT -export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin -sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe -if [ ! -x $sph2pipe ]; then - echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; - exit 1; -fi - if $eval_flag; then list_set="tr05_simu_$enhan dt05_simu_$enhan et05_simu_$enhan" else diff --git a/egs/chime4/asr1/local/simu_noisy_chime4_data_prep.sh b/egs/chime4/asr1/local/simu_noisy_chime4_data_prep.sh index 28c754fe3b6..01db3ffab8b 100755 --- a/egs/chime4/asr1/local/simu_noisy_chime4_data_prep.sh +++ b/egs/chime4/asr1/local/simu_noisy_chime4_data_prep.sh @@ -37,8 +37,6 @@ dir=`pwd`/data/local/data local=`pwd`/local utils=`pwd`/utils -. ./path.sh # Needed for KALDI_ROOT - if ${eval_flag}; then list_set="tr05_simu_noisy dt05_simu_noisy et05_simu_noisy" else diff --git a/egs/chime4/asr1/run.sh b/egs/chime4/asr1/run.sh index bad4a461d3a..7cd0ee5e7a5 100755 --- a/egs/chime4/asr1/run.sh +++ b/egs/chime4/asr1/run.sh @@ -96,11 +96,11 @@ if [ ${stage} -le 0 ]; then local/real_enhan_chime4_data_prep.sh isolated_1ch_track ${chime4_data}/data/audio/16kHz/isolated_1ch_track local/simu_enhan_chime4_data_prep.sh isolated_1ch_track ${chime4_data}/data/audio/16kHz/isolated_1ch_track echo "test data for 2ch track" - local/real_enhan_chime4_data_prep.sh beamformit_2mics enhan/beamformit_2mics - local/simu_enhan_chime4_data_prep.sh beamformit_2mics enhan/beamformit_2mics + local/real_enhan_chime4_data_prep.sh beamformit_2mics ${PWD}/enhan/beamformit_2mics + local/simu_enhan_chime4_data_prep.sh beamformit_2mics ${PWD}/enhan/beamformit_2mics echo "test data for 6ch track" - local/real_enhan_chime4_data_prep.sh beamformit_5mics enhan/beamformit_5mics - local/simu_enhan_chime4_data_prep.sh beamformit_5mics enhan/beamformit_5mics + local/real_enhan_chime4_data_prep.sh beamformit_5mics ${PWD}/enhan/beamformit_5mics + local/simu_enhan_chime4_data_prep.sh beamformit_5mics ${PWD}/enhan/beamformit_5mics fi feat_tr_dir=${dumpdir}/${train_set}/delta${do_delta}; mkdir -p ${feat_tr_dir}