From 356fa8258d848e3b092c5b894f6a7b4cab63b29c Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Sun, 17 Apr 2016 12:27:15 -0400
Subject: [PATCH 01/14] A new steps/data/reverberate_data_dir.py script

---
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 299 ++++++++++++++++++
 1 file changed, 299 insertions(+)
 create mode 100755 egs/wsj/s5/steps/data/reverberate_data_dir.py

diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
new file mode 100755
index 00000000000..db93df16db3
--- /dev/null
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python
+# Copyright 2016  Tom Ko
+# Apache 2.0
+# script to generate reverberated data
+
+# we're using python 3.x style print but want it to work in python 2.x,
+from __future__ import print_function
+import argparse, glob, math, os, random, sys, warnings, copy, imp, ast
+
+train_lib = imp.load_source('ntl', 'steps/nnet3/nnet3_train_lib.py')
+
+class list_cyclic_iterator:
+  def __init__(self, list, random_seed = 0):
+    self.list_index = 0
+    self.list = list
+    random.seed(random_seed)
+    random.shuffle(self.list)
+
+  def next(self):
+    item = self.list[self.list_index]
+    self.list_index = (self.list_index + 1) % len(self.list)
+    return item
+
+
+def GetArgs():
+    # we add compulsary arguments as named arguments for readability
+    parser = argparse.ArgumentParser(description="Generate corrupted data"
+                                                 "for neural network training",
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument("--rir-list-file", type=str, required = True, 
+                        help="RIR information file")
+    parser.add_argument("--noise-list-file", type=str, default = None,
+                        help="Noise information file")
+    parser.add_argument("--num-replications", type=int, dest = "num_replica", default = 1,
+                        help="Number of replicate to generated for the data")
+    parser.add_argument('--snrs', type=str, dest = "snr_string", default = '20:10:0', help='snrs to be used for corruption')
+    parser.add_argument('--prefix', type=str, default = None, help='prefix for the id of the corrupted utterances')
+    parser.add_argument("--speech-rvb-probability", type=float, default = 0.8,
+                        help="Probability of reverberating the speech signal, e.g. 0 <= p <= 1")
+    parser.add_argument("--noise-adding-probability", type=float, default = 0.4,
+                        help="Probability of adding point-source noises, e.g. 0 <= p <= 1")
+    parser.add_argument("--max-noises-added", type=int, default = 2,
+                        help="Maximum number of point-source noises could be added")
+    parser.add_argument("input_dir",
+                        help="Input data directory")
+    parser.add_argument("output_dir",
+                        help="Output data directory")
+
+    print(' '.join(sys.argv))
+
+    args = parser.parse_args()
+    args = CheckArgs(args)
+
+    return args
+
+def CheckArgs(args):
+    if not os.path.exists(args.output_dir):
+        os.makedirs(args.output_dir)
+
+    ## Check arguments.
+    if args.rir_list_file is None:
+        raise Exception("Rir information file must be provided")
+    
+    if not os.path.isfile(args.rir_list_file):
+        raise Exception(args.rir_list_file + "not found")
+    
+    if args.noise_list_file is not None:
+        if not os.path.isfile(args.noise_list_file):
+            raise Exception(args.noise_list_file + "not found")
+
+    if args.num_replica > 1 and args.prefix is None:
+        args.prefix = "rvb"
+        warnings.warn("--prefix is set to 'rvb' as --num-replications is larger than 1.")
+
+    return args
+
+def ParseFileToDict(file, assert2fields = False, value_processor = None):
+    if value_processor is None:
+        value_processor = lambda x: x[0]
+
+    dict = {}
+    for line in open(file, 'r'):
+        parts = line.split()
+        if assert2fields:
+            assert(len(parts) == 2)
+
+        dict[parts[0]] = value_processor(parts[1:])
+    return dict
+
+
+# This is the major function to generate pipeline command for the corruption
+# The rir list would have the following format:
+# --rir-id <string,compulsary> --room-id <string,compulsary> --receiver-position-id <string,optional> --source-position-id <string,optional> --rt-60 < <float,optional> --drr <float, optional> < location(support Kaldi IO strings) >
+# The noise list would have the following format:
+# --noise-id <string,compulsary> --noise-type <choices = (isotropic, point source),compulsary> --bg-fg-type <choices=(background|foreground), default=background> --rir-file <str, compulsary if isotropic, should not be specified if point-source> < location=(support Kaldi IO strings) >
+def CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, snr_string, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
+    rooms = list_cyclic_iterator(room_list, random_seed = 1)
+    noises = None
+    if len(noise_list) > 0:
+        noises = list_cyclic_iterator(noise_list, random_seed = 1)
+    snrs = list_cyclic_iterator(snr_string.split(':'))
+    command_list = []
+    for i in range(num_replica):
+        keys = wav_scp.keys()
+        keys.sort()
+        for wav_id in keys:
+            wav_pipe = wav_scp[wav_id]
+            wav_dur = durations[wav_id]
+            if prefix is not None:
+                wav_id = prefix + str(i) + "_" + wav_id
+            command = "{0} {1} wav-reverberate".format(wav_id, wav_pipe)
+
+            # pick the room
+            room = rooms.next()
+            command_opts = ""
+            noises_added = []
+            snrs_added = []
+            start_times_added = []
+            if random.random() < speech_rvb_probability:
+                # pick the RIR to reverberate the speech
+                speech_rir = room['rir_list'][random.randint(0,len(room['rir_list'])-1)]
+                command_opts += "--impulse-response={0} ".format(speech_rir.rir_file_location)
+                # add the corresponding isotropic noise if there is any
+                if len(speech_rir.iso_noise_list) > 0:
+                    isotropic_noise = speech_rir.iso_noise_list[random.randint(0,len(speech_rir.iso_noise_list)-1)]
+                    noises_added.append("{0}".format(isotropic_noise.noise_file_location))
+                    snrs_added.append("{0}".format(snrs.next()))
+                    start_times_added.append(round(random.random() * wav_dur, 2))
+
+            if noises is not None and random.random() < noise_adding_probability:
+                for k in range(random.randint(1, max_noises_added)):
+                    # pick the RIR to reverberate the point-source noise
+                    noise = noises.next()
+                    noise_rir = room['rir_list'][random.randint(0,len(room['rir_list'])-1)]
+                    start_times_added.append(round(random.random() * wav_dur, 2))
+                    noises_added.append("\"wav-reverberate --duration={2} --impulse-response={1} {0} - |\" ".format(noise.noise_file_location, noise_rir.rir_file_location, round(random.random()*(wav_dur-start_times_added[-1]), 2)))
+                    snrs_added.append("{0}".format(snrs.next()))
+
+            if len(noises_added) > 1:
+                command_opts += "--additive-signals='{0}' ".format(','.join(noises_added))
+            if len(snrs_added) > 1:
+                command_opts += "--snrs='{0}' ".format(','.join(snrs_added))
+            if len(start_times_added) > 1:
+                command_opts += "--start-times='{0}' ".format(','.join(snrs_added))
+            
+            if command_opts == "":
+                command = "{0} {1}\n".format(wav_id, wav_pipe) 
+            else:
+                command = "{0} {1} wav-reverberate {2} - - |\n".format(wav_id, wav_pipe, command_opts)
+
+            command_list.append(command)
+
+    file_handle = open(output_dir + "/wav.scp", 'w')
+    file_handle.write("".join(command_list))
+    file_handle.close()
+
+
+# This function replicate the entries in files like text
+def ReplicateFileType1(input_file, output_file, num_replica, prefix):
+    list = map(lambda x: x.strip(), open(input_file))
+    f = open(output_file, "w")
+    for i in range(num_replica):
+        for line in list:
+            split1 = line.split()
+            if prefix is not None:
+                split1[0] = prefix + str(i) + "_" + split1[0]
+            print(" ".join(split1), file=f)
+    f.close()
+
+
+# This function replicate the entries in files like segments, utt2spk
+def ReplicateFileType2(input_file, output_file, num_replica, prefix):
+    list = map(lambda x: x.strip(), open(input_file))
+    f = open(output_file, "w")
+    for i in range(num_replica):
+        for line in list:
+            split1 = line.split()
+            if prefix is not None:
+                split1[0] = prefix + str(i) + "_" + split1[0]
+                split1[1] = prefix + str(i) + "_" + split1[1]
+            print(" ".join(split1), file=f)
+    f.close()
+
+
+def MakeCorruption(input_dir, output_dir, room_list, noise_list, snr_string, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
+    
+    if not os.path.isfile(input_dir + "/reco2dur"):
+        print("Getting the duration of the recordings...");
+        train_lib.RunKaldiCommand("wav-to-duration --read-entire-file=true scp:{0}/wav.scp ark,t:{0}/reco2dur".format(input_dir))
+    durations = ParseFileToDict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0]))
+    wav_scp = ParseFileToDict(input_dir + "/wav.scp", value_processor = lambda x: " ".join(x))
+    CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, snr_string, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added)
+
+    ReplicateFileType2(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replica, prefix)
+    train_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
+                    .format(output_dir = output_dir))
+
+    if os.path.isfile(input_dir + "/text"):
+        ReplicateFileType1(input_dir + "/text", output_dir + "/text", num_replica, prefix)
+    if os.path.isfile(input_dir + "/segments"):
+        ReplicateFileType2(input_dir + "/segments", output_dir + "/segments", num_replica, prefix)
+    if os.path.isfile(input_dir + "/reco2file_and_channel"):
+        ReplicateFileType2(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replica, prefix)
+
+    train_lib.RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}"
+                    .format(output_dir = output_dir))
+
+
+def ParseRirList(rir_list_file):
+    rir_parser = argparse.ArgumentParser()
+    rir_parser.add_argument('--rir-id', type=str, required=True, help='rir id')
+    rir_parser.add_argument('--room-id', type=str, required=True, help='room id')
+    rir_parser.add_argument('--receiver-position-id', type=str, default=None, help='receiver position id')
+    rir_parser.add_argument('--source-position-id', type=str, default=None, help='source position id')
+    rir_parser.add_argument('--rt60', type=float, default=None, help='RT60 is the time required for reflections of a direct sound to decay 60 dB.')
+    rir_parser.add_argument('--drr', type=float, default=None, help='Direct-to-reverberant-ratio of the impulse.')
+    rir_parser.add_argument('rir_file_location', type=str, help='rir file location')
+
+    rir_list = []
+    rir_lines = map(lambda x: x.strip(), open(rir_list_file))
+    for line in rir_lines:
+        rir = rir_parser.parse_args(line.split())
+        setattr(rir, "iso_noise_list", [])
+        rir.iso_noise_list = []
+        rir_list.append(rir)
+
+    return rir_list
+
+def MakeRoomList(rir_list):
+    room_list = []
+    for i in range(len(rir_list)):
+        id = -1
+        for j in range(len(room_list)):
+            if room_list[j]['room_id'] == rir_list[i].room_id:
+                id = j
+                break
+        if id == -1:
+            # add new room
+            room_list.append({'room_id': rir_list[i].room_id, 'rir_list': []})
+
+        room_list[id]['rir_list'].append(rir_list[i])
+
+    return room_list
+
+def ParseNoiseList(rir_list, noise_list_file):
+    noise_parser = argparse.ArgumentParser()
+    noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id')
+    noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"])
+    noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foregroun noise', choices = ["background", "foreground"])
+    noise_parser.add_argument('--rir-file', type=str, default=None, help='compulsary if isotropic, should not be specified if point-source')
+    noise_parser.add_argument('noise_file_location', type=str, help='noise file location')
+
+    point_noise_list = []
+    noise_lines = map(lambda x: x.strip(), open(noise_list_file))
+    for line in noise_lines:
+        noise = noise_parser.parse_args(line.split())
+        if noise.noise_type == "isotropic":
+            if noise.rir_file is None:
+                raise Exception("--rir-file must be specified is --noise-type is point-source")
+                warnings.warn("No rir file specified for noise id {0}".format(noise.noise_id))
+            else:
+                id = -1
+                for j in range(len(rir_list)):
+                    if noise.rir_file == rir_list[j].rir_file_location:
+                        id = j
+                        print(noise.rir_file)
+                        rir_list[id].iso_noise_list.append(noise)
+                        break;
+                if id == -1:
+                    warnings.warn("Rir file specified for noise id {0} is not found in rir_list".format(noise.noise_id))
+        else:
+            point_noise_list.append(noise)
+
+    return (point_noise_list, rir_list)
+
+def Main():
+    args = GetArgs()
+    rir_list = ParseRirList(args.rir_list_file)
+    noise_list = []
+    if args.noise_list_file is not None:
+        noise_list, rir_list = ParseNoiseList(rir_list, args.noise_list_file)
+        print("Number of point-source noises is {0}".format(len(noise_list)))
+    room_list = MakeRoomList(rir_list)
+
+    MakeCorruption(input_dir = args.input_dir,
+                   output_dir = args.output_dir,
+                   room_list = room_list,
+                   noise_list = noise_list,
+                   snr_string = args.snr_string,
+                   num_replica = args.num_replica,
+                   prefix = args.prefix,
+                   speech_rvb_probability = args.speech_rvb_probability,
+                   noise_adding_probability = args.noise_adding_probability,
+                   max_noises_added = args.max_noises_added)
+
+if __name__ == "__main__":
+    Main()
+

From 8671e5937c76c787d01f0801cfacb892f75a360c Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Fri, 22 Apr 2016 09:59:27 -0400
Subject: [PATCH 02/14] update function names; split snrs to background and
 foreground; user specified random seed; always handle isotropic noise as
 background noise

---
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 131 +++++++++---------
 1 file changed, 69 insertions(+), 62 deletions(-)

diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index db93df16db3..b06035de2a6 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -10,10 +10,9 @@
 train_lib = imp.load_source('ntl', 'steps/nnet3/nnet3_train_lib.py')
 
 class list_cyclic_iterator:
-  def __init__(self, list, random_seed = 0):
+  def __init__(self, list):
     self.list_index = 0
     self.list = list
-    random.seed(random_seed)
     random.shuffle(self.list)
 
   def next(self):
@@ -24,17 +23,28 @@ def next(self):
 
 def GetArgs():
     # we add compulsary arguments as named arguments for readability
-    parser = argparse.ArgumentParser(description="Generate corrupted data"
-                                                 "for neural network training",
+    parser = argparse.ArgumentParser(description="Reverberate the data directory with an option "
+                                                 "to add isotropic and point source noiseis. "
+                                                 "This script only deals with single channel wave files. "
+                                                 "If multi-channel noise/rir/speech files are provided one "
+                                                 "of the channels will be randomly picked",
                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
     parser.add_argument("--rir-list-file", type=str, required = True, 
-                        help="RIR information file")
+                        help="RIR information file, the format of the file is "
+                        "--rir-id <string,compulsary> --room-id <string,compulsary> "
+                        "--receiver-position-id <string,optional> --source-position-id <string,optional> "
+                        "--rt-60 < <float,optional> --drr <float, optional> < location(support Kaldi IO strings) >")
     parser.add_argument("--noise-list-file", type=str, default = None,
-                        help="Noise information file")
+                        help="Noise information file, the format of the file is"
+                        "--noise-id <string,compulsary> --noise-type <choices = (isotropic, point source),compulsary> "
+                        "--bg-fg-type <choices=(background|foreground), default=background> "
+                        "--rir-file <str, compulsary if isotropic, should not be specified if point-source> "
+                        "< location=(support Kaldi IO strings) >")
     parser.add_argument("--num-replications", type=int, dest = "num_replica", default = 1,
                         help="Number of replicate to generated for the data")
-    parser.add_argument('--snrs', type=str, dest = "snr_string", default = '20:10:0', help='snrs to be used for corruption')
+    parser.add_argument('--foreground-snrs', type=str, dest = "foreground_snr_string", default = '20:10:0', help='snrs for foreground noises')
+    parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", default = '20:10:0', help='snrs for background noises')
     parser.add_argument('--prefix', type=str, default = None, help='prefix for the id of the corrupted utterances')
     parser.add_argument("--speech-rvb-probability", type=float, default = 0.8,
                         help="Probability of reverberating the speech signal, e.g. 0 <= p <= 1")
@@ -42,6 +52,7 @@ def GetArgs():
                         help="Probability of adding point-source noises, e.g. 0 <= p <= 1")
     parser.add_argument("--max-noises-added", type=int, default = 2,
                         help="Maximum number of point-source noises could be added")
+    parser.add_argument('--random-seed', type=int, default=0, help='seed to be used in the randomization of impulese and noises')
     parser.add_argument("input_dir",
                         help="Input data directory")
     parser.add_argument("output_dir",
@@ -59,9 +70,6 @@ def CheckArgs(args):
         os.makedirs(args.output_dir)
 
     ## Check arguments.
-    if args.rir_list_file is None:
-        raise Exception("Rir information file must be provided")
-    
     if not os.path.isfile(args.rir_list_file):
         raise Exception(args.rir_list_file + "not found")
     
@@ -90,26 +98,25 @@ def ParseFileToDict(file, assert2fields = False, value_processor = None):
 
 
 # This is the major function to generate pipeline command for the corruption
-# The rir list would have the following format:
-# --rir-id <string,compulsary> --room-id <string,compulsary> --receiver-position-id <string,optional> --source-position-id <string,optional> --rt-60 < <float,optional> --drr <float, optional> < location(support Kaldi IO strings) >
-# The noise list would have the following format:
-# --noise-id <string,compulsary> --noise-type <choices = (isotropic, point source),compulsary> --bg-fg-type <choices=(background|foreground), default=background> --rir-file <str, compulsary if isotropic, should not be specified if point-source> < location=(support Kaldi IO strings) >
-def CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, snr_string, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
-    rooms = list_cyclic_iterator(room_list, random_seed = 1)
+def CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, foreground_snr_array, background_snr_array, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
+    rooms = list_cyclic_iterator(room_list)
     noises = None
     if len(noise_list) > 0:
-        noises = list_cyclic_iterator(noise_list, random_seed = 1)
-    snrs = list_cyclic_iterator(snr_string.split(':'))
+        noises = list_cyclic_iterator(noise_list)
+    foreground_snrs = list_cyclic_iterator(foreground_snr_array)
+    background_snrs = list_cyclic_iterator(background_snr_array)
     command_list = []
     for i in range(num_replica):
         keys = wav_scp.keys()
         keys.sort()
         for wav_id in keys:
             wav_pipe = wav_scp[wav_id]
-            wav_dur = durations[wav_id]
+            # check if it is really a pipe
+            if len(wav_pipe.split()) == 1:
+                wav_pipe = "cat {0} |".format(wav_pipe)
+            speech_dur = durations[wav_id]
             if prefix is not None:
                 wav_id = prefix + str(i) + "_" + wav_id
-            command = "{0} {1} wav-reverberate".format(wav_id, wav_pipe)
 
             # pick the room
             room = rooms.next()
@@ -124,25 +131,31 @@ def CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, snr_string
                 # add the corresponding isotropic noise if there is any
                 if len(speech_rir.iso_noise_list) > 0:
                     isotropic_noise = speech_rir.iso_noise_list[random.randint(0,len(speech_rir.iso_noise_list)-1)]
-                    noises_added.append("{0}".format(isotropic_noise.noise_file_location))
-                    snrs_added.append("{0}".format(snrs.next()))
-                    start_times_added.append(round(random.random() * wav_dur, 2))
+                    # extend the isotropic noise to the length of the speech waveform
+                    noises_added.append("\"wav-reverberate --duration={1} {0} - |\" ".format(isotropic_noise.noise_file_location, speech_dur))
+                    snrs_added.append(background_snrs.next())
+                    start_times_added.append(0)
 
             if noises is not None and random.random() < noise_adding_probability:
                 for k in range(random.randint(1, max_noises_added)):
                     # pick the RIR to reverberate the point-source noise
                     noise = noises.next()
                     noise_rir = room['rir_list'][random.randint(0,len(room['rir_list'])-1)]
-                    start_times_added.append(round(random.random() * wav_dur, 2))
-                    noises_added.append("\"wav-reverberate --duration={2} --impulse-response={1} {0} - |\" ".format(noise.noise_file_location, noise_rir.rir_file_location, round(random.random()*(wav_dur-start_times_added[-1]), 2)))
-                    snrs_added.append("{0}".format(snrs.next()))
-
-            if len(noises_added) > 1:
+                    if noise.bg_fg_type == "background": 
+                        start_times_added.append(0)
+                        noises_added.append("\"wav-reverberate --duration={2} --impulse-response={1} {0} - |\" ".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
+                        snrs_added.append(background_snrs.next())
+                    else:
+                        start_times_added.append(round(random.random() * speech_dur, 2))
+                        noises_added.append("\"wav-reverberate --impulse-response={1} {0} - |\" ".format(noise.noise_file_location, noise_rir.rir_file_location))
+                        snrs_added.append(foreground_snrs.next())
+
+            if len(noises_added) > 0:
                 command_opts += "--additive-signals='{0}' ".format(','.join(noises_added))
-            if len(snrs_added) > 1:
-                command_opts += "--snrs='{0}' ".format(','.join(snrs_added))
-            if len(start_times_added) > 1:
-                command_opts += "--start-times='{0}' ".format(','.join(snrs_added))
+            if len(snrs_added) > 0:
+                command_opts += "--snrs='{0}' ".format(','.join(map(lambda x:str(x),snrs_added)))
+            if len(start_times_added) > 0:
+                command_opts += "--start-times='{0}' ".format(','.join(map(lambda x:str(x),start_times_added)))
             
             if command_opts == "":
                 command = "{0} {1}\n".format(wav_id, wav_pipe) 
@@ -156,52 +169,45 @@ def CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, snr_string
     file_handle.close()
 
 
-# This function replicate the entries in files like text
-def ReplicateFileType1(input_file, output_file, num_replica, prefix):
+# This function replicate the entries in files like segments, utt2spk, text
+def AddPrefixToFields(input_file, output_file, num_replica, prefix, field = [0]):
     list = map(lambda x: x.strip(), open(input_file))
     f = open(output_file, "w")
     for i in range(num_replica):
         for line in list:
-            split1 = line.split()
-            if prefix is not None:
-                split1[0] = prefix + str(i) + "_" + split1[0]
-            print(" ".join(split1), file=f)
-    f.close()
-
-
-# This function replicate the entries in files like segments, utt2spk
-def ReplicateFileType2(input_file, output_file, num_replica, prefix):
-    list = map(lambda x: x.strip(), open(input_file))
-    f = open(output_file, "w")
-    for i in range(num_replica):
-        for line in list:
-            split1 = line.split()
-            if prefix is not None:
-                split1[0] = prefix + str(i) + "_" + split1[0]
-                split1[1] = prefix + str(i) + "_" + split1[1]
-            print(" ".join(split1), file=f)
+            if len(line) > 0 and line[0] != ';':
+                split1 = line.split()
+                for j in field:
+                    if prefix is not None:
+                        split1[j] = prefix + str(i) + "_" + split1[j]
+                print(" ".join(split1), file=f)
+            else:
+                print(line, file=f)
     f.close()
 
 
-def MakeCorruption(input_dir, output_dir, room_list, noise_list, snr_string, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
+def CreateReverberatedCopy(input_dir, output_dir, room_list, noise_list, foreground_snr_string, background_snr_string, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
     
     if not os.path.isfile(input_dir + "/reco2dur"):
         print("Getting the duration of the recordings...");
         train_lib.RunKaldiCommand("wav-to-duration --read-entire-file=true scp:{0}/wav.scp ark,t:{0}/reco2dur".format(input_dir))
     durations = ParseFileToDict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0]))
     wav_scp = ParseFileToDict(input_dir + "/wav.scp", value_processor = lambda x: " ".join(x))
-    CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, snr_string, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added)
+    foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':'))
+    background_snr_array = map(lambda x: float(x), background_snr_string.split(':'))
+
+    CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, foreground_snr_array, background_snr_array, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added)
 
-    ReplicateFileType2(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replica, prefix)
+    AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replica, prefix, field = [0,1])
     train_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
                     .format(output_dir = output_dir))
 
     if os.path.isfile(input_dir + "/text"):
-        ReplicateFileType1(input_dir + "/text", output_dir + "/text", num_replica, prefix)
+        AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replica, prefix, field =[0])
     if os.path.isfile(input_dir + "/segments"):
-        ReplicateFileType2(input_dir + "/segments", output_dir + "/segments", num_replica, prefix)
+        AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", num_replica, prefix, field = [0,1])
     if os.path.isfile(input_dir + "/reco2file_and_channel"):
-        ReplicateFileType2(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replica, prefix)
+        AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replica, prefix, field = [0,1])
 
     train_lib.RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}"
                     .format(output_dir = output_dir))
@@ -247,7 +253,7 @@ def ParseNoiseList(rir_list, noise_list_file):
     noise_parser = argparse.ArgumentParser()
     noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id')
     noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"])
-    noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foregroun noise', choices = ["background", "foreground"])
+    noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise', choices = ["background", "foreground"])
     noise_parser.add_argument('--rir-file', type=str, default=None, help='compulsary if isotropic, should not be specified if point-source')
     noise_parser.add_argument('noise_file_location', type=str, help='noise file location')
 
@@ -264,7 +270,6 @@ def ParseNoiseList(rir_list, noise_list_file):
                 for j in range(len(rir_list)):
                     if noise.rir_file == rir_list[j].rir_file_location:
                         id = j
-                        print(noise.rir_file)
                         rir_list[id].iso_noise_list.append(noise)
                         break;
                 if id == -1:
@@ -276,6 +281,7 @@ def ParseNoiseList(rir_list, noise_list_file):
 
 def Main():
     args = GetArgs()
+    random.seed(args.random_seed)
     rir_list = ParseRirList(args.rir_list_file)
     noise_list = []
     if args.noise_list_file is not None:
@@ -283,11 +289,12 @@ def Main():
         print("Number of point-source noises is {0}".format(len(noise_list)))
     room_list = MakeRoomList(rir_list)
 
-    MakeCorruption(input_dir = args.input_dir,
+    CreateReverberatedCopy(input_dir = args.input_dir,
                    output_dir = args.output_dir,
                    room_list = room_list,
                    noise_list = noise_list,
-                   snr_string = args.snr_string,
+                   foreground_snr_string = args.foreground_snr_string,
+                   background_snr_string = args.background_snr_string,
                    num_replica = args.num_replica,
                    prefix = args.prefix,
                    speech_rvb_probability = args.speech_rvb_probability,

From 99b4210cd403a53a620e5a7e7387ae2d38bb146e Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Thu, 5 May 2016 05:34:38 -0400
Subject: [PATCH 03/14] Pick the RIRs and noises according to assigned
 probabilities.

---
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 134 ++++++++++++------
 1 file changed, 94 insertions(+), 40 deletions(-)

diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index b06035de2a6..a7d887e5f06 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -83,6 +83,40 @@ def CheckArgs(args):
 
     return args
 
+
+def PickItemFromDict(dict):
+   total_p = sum(dict[key].probability for key in dict.keys())
+   p = random.uniform(0, total_p)
+   upto = 0
+   for key in dict.keys():
+      if upto + dict[key].probability >= p:
+         return dict[key]
+      upto += dict[key].probability
+   assert False, "Shouldn't get here"
+
+
+def PickItemFromList(list):
+   total_p = sum(item.probability for item in list)
+   p = random.uniform(0, total_p)
+   upto = 0
+   for item in list:
+      if upto + item.probability >= p:
+         return item
+      upto += item.probability
+   assert False, "Shouldn't get here"
+
+
+def weighted_choice(choices):
+   total = sum(w for c, w in choices)
+   r = random.uniform(0, total)
+   upto = 0
+   for c, w in choices:
+      if upto + w >= r:
+         return c
+      upto += w
+   assert False, "Shouldn't get here"
+
+
 def ParseFileToDict(file, assert2fields = False, value_processor = None):
     if value_processor is None:
         value_processor = lambda x: x[0]
@@ -98,11 +132,7 @@ def ParseFileToDict(file, assert2fields = False, value_processor = None):
 
 
 # This is the major function to generate pipeline command for the corruption
-def CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, foreground_snr_array, background_snr_array, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
-    rooms = list_cyclic_iterator(room_list)
-    noises = None
-    if len(noise_list) > 0:
-        noises = list_cyclic_iterator(noise_list)
+def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
     foreground_snrs = list_cyclic_iterator(foreground_snr_array)
     background_snrs = list_cyclic_iterator(background_snr_array)
     command_list = []
@@ -119,28 +149,29 @@ def CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, foreground
                 wav_id = prefix + str(i) + "_" + wav_id
 
             # pick the room
-            room = rooms.next()
+            room = PickItemFromDict(room_dict)
             command_opts = ""
             noises_added = []
             snrs_added = []
             start_times_added = []
             if random.random() < speech_rvb_probability:
                 # pick the RIR to reverberate the speech
-                speech_rir = room['rir_list'][random.randint(0,len(room['rir_list'])-1)]
+                speech_rir = PickItemFromList(room.rir_list)
                 command_opts += "--impulse-response={0} ".format(speech_rir.rir_file_location)
                 # add the corresponding isotropic noise if there is any
                 if len(speech_rir.iso_noise_list) > 0:
-                    isotropic_noise = speech_rir.iso_noise_list[random.randint(0,len(speech_rir.iso_noise_list)-1)]
+                    isotropic_noise = PickItemFromList(speech_rir.iso_noise_list)
                     # extend the isotropic noise to the length of the speech waveform
                     noises_added.append("\"wav-reverberate --duration={1} {0} - |\" ".format(isotropic_noise.noise_file_location, speech_dur))
                     snrs_added.append(background_snrs.next())
                     start_times_added.append(0)
 
-            if noises is not None and random.random() < noise_adding_probability:
+            # Add the point-source noise
+            if len(noise_list) > 0 and random.random() < noise_adding_probability:
                 for k in range(random.randint(1, max_noises_added)):
                     # pick the RIR to reverberate the point-source noise
-                    noise = noises.next()
-                    noise_rir = room['rir_list'][random.randint(0,len(room['rir_list'])-1)]
+                    noise = PickItemFromList(noise_list)
+                    noise_rir = PickItemFromList(room.rir_list)
                     if noise.bg_fg_type == "background": 
                         start_times_added.append(0)
                         noises_added.append("\"wav-reverberate --duration={2} --impulse-response={1} {0} - |\" ".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
@@ -186,7 +217,7 @@ def AddPrefixToFields(input_file, output_file, num_replica, prefix, field = [0])
     f.close()
 
 
-def CreateReverberatedCopy(input_dir, output_dir, room_list, noise_list, foreground_snr_string, background_snr_string, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
+def CreateReverberatedCopy(input_dir, output_dir, room_dict, noise_list, foreground_snr_string, background_snr_string, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
     
     if not os.path.isfile(input_dir + "/reco2dur"):
         print("Getting the duration of the recordings...");
@@ -196,7 +227,7 @@ def CreateReverberatedCopy(input_dir, output_dir, room_list, noise_list, foregro
     foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':'))
     background_snr_array = map(lambda x: float(x), background_snr_string.split(':'))
 
-    CorruptWav(wav_scp, durations, output_dir, room_list, noise_list, foreground_snr_array, background_snr_array, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added)
+    CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added)
 
     AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replica, prefix, field = [0,1])
     train_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
@@ -212,6 +243,21 @@ def CreateReverberatedCopy(input_dir, output_dir, room_list, noise_list, foregro
     train_lib.RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}"
                     .format(output_dir = output_dir))
 
+def SmoothProbability(list):
+    uniform_probability = 1 / float(len(list))
+    for item in list:
+        if item.probability is None:
+            item.probability = uniform_probability
+        else:
+            # smooth the probability
+            item.probability = 0.3 * item.probability + 0.7 * uniform_probability
+
+    sum_p = sum(item.probability for item in list)
+    # Normalize the probability
+    for item in list:
+        item.probability = item.probability / sum_p
+
+    return list
 
 def ParseRirList(rir_list_file):
     rir_parser = argparse.ArgumentParser()
@@ -221,6 +267,7 @@ def ParseRirList(rir_list_file):
     rir_parser.add_argument('--source-position-id', type=str, default=None, help='source position id')
     rir_parser.add_argument('--rt60', type=float, default=None, help='RT60 is the time required for reflections of a direct sound to decay 60 dB.')
     rir_parser.add_argument('--drr', type=float, default=None, help='Direct-to-reverberant-ratio of the impulse.')
+    rir_parser.add_argument('--probability', type=float, default=None, help='probability of the impulse.')
     rir_parser.add_argument('rir_file_location', type=str, help='rir file location')
 
     rir_list = []
@@ -228,26 +275,26 @@ def ParseRirList(rir_list_file):
     for line in rir_lines:
         rir = rir_parser.parse_args(line.split())
         setattr(rir, "iso_noise_list", [])
-        rir.iso_noise_list = []
         rir_list.append(rir)
 
-    return rir_list
+    return SmoothProbability(rir_list)
 
-def MakeRoomList(rir_list):
-    room_list = []
-    for i in range(len(rir_list)):
-        id = -1
-        for j in range(len(room_list)):
-            if room_list[j]['room_id'] == rir_list[i].room_id:
-                id = j
-                break
-        if id == -1:
+
+def MakeRoomDict(rir_list):
+    room_dict = {}
+    for rir in rir_list:
+        if rir.room_id not in room_dict:
             # add new room
-            room_list.append({'room_id': rir_list[i].room_id, 'rir_list': []})
+            room_dict[rir.room_id] = lambda: None
+            setattr(room_dict[rir.room_id], "rir_list", [])
+            setattr(room_dict[rir.room_id], "probability", 0)
+        room_dict[rir.room_id].rir_list.append(rir)
+
+    for key in room_dict.keys():
+        room_dict[key].probability = sum(rir.probability for rir in room_dict[key].rir_list)
 
-        room_list[id]['rir_list'].append(rir_list[i])
+    return room_dict
 
-    return room_list
 
 def ParseNoiseList(rir_list, noise_list_file):
     noise_parser = argparse.ArgumentParser()
@@ -255,29 +302,36 @@ def ParseNoiseList(rir_list, noise_list_file):
     noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"])
     noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise', choices = ["background", "foreground"])
     noise_parser.add_argument('--rir-file', type=str, default=None, help='compulsary if isotropic, should not be specified if point-source')
+    noise_parser.add_argument('--probability', type=float, default=None, help='probability of the noise.')
     noise_parser.add_argument('noise_file_location', type=str, help='noise file location')
 
     point_noise_list = []
+    iso_noise_list = []
     noise_lines = map(lambda x: x.strip(), open(noise_list_file))
     for line in noise_lines:
         noise = noise_parser.parse_args(line.split())
         if noise.noise_type == "isotropic":
             if noise.rir_file is None:
-                raise Exception("--rir-file must be specified is --noise-type is point-source")
-                warnings.warn("No rir file specified for noise id {0}".format(noise.noise_id))
+                raise Exception("--rir-file must be specified if --noise-type is point-source")
             else:
-                id = -1
-                for j in range(len(rir_list)):
-                    if noise.rir_file == rir_list[j].rir_file_location:
-                        id = j
-                        rir_list[id].iso_noise_list.append(noise)
-                        break;
-                if id == -1:
-                    warnings.warn("Rir file specified for noise id {0} is not found in rir_list".format(noise.noise_id))
+                iso_noise_list.append(noise)
         else:
             point_noise_list.append(noise)
 
-    return (point_noise_list, rir_list)
+    iso_noise_list = SmoothProbability(iso_noise_list)
+
+    for iso_noise in iso_noise_list:
+        id = -1
+        for j in range(len(rir_list)):
+            if iso_noise.rir_file == rir_list[j].rir_file_location:
+                id = j
+                rir_list[id].iso_noise_list.append(noise)
+                break;
+        if id == -1:
+            warnings.warn("Rir file specified for noise id {0} is not found in rir_list".format(iso_noise.noise_id))
+
+    return (SmoothProbability(point_noise_list), rir_list)
+
 
 def Main():
     args = GetArgs()
@@ -287,11 +341,11 @@ def Main():
     if args.noise_list_file is not None:
         noise_list, rir_list = ParseNoiseList(rir_list, args.noise_list_file)
         print("Number of point-source noises is {0}".format(len(noise_list)))
-    room_list = MakeRoomList(rir_list)
+    room_dict = MakeRoomDict(rir_list)
 
     CreateReverberatedCopy(input_dir = args.input_dir,
                    output_dir = args.output_dir,
-                   room_list = room_list,
+                   room_dict = room_dict,
                    noise_list = noise_list,
                    foreground_snr_string = args.foreground_snr_string,
                    background_snr_string = args.background_snr_string,

From 0b7f06c11e45c3f53c7e4b21cd88357e053014d1 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Sat, 7 May 2016 20:22:49 -0400
Subject: [PATCH 04/14] Modify wav-reverberate.cc according to the new
 steps/data/reverberate_data_dir.py

---
 egs/wsj/s5/steps/data/reverberate_data_dir.py |   9 +-
 src/featbin/wav-reverberate.cc                | 240 ++++++++++++------
 2 files changed, 171 insertions(+), 78 deletions(-)

diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index a7d887e5f06..228eaebfba0 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -132,6 +132,9 @@ def ParseFileToDict(file, assert2fields = False, value_processor = None):
 
 
 # This is the major function to generate pipeline command for the corruption
+# The generic command of wav-reverberate will be like:
+# wav-reverberate --duration=t --impulse-response=rir.wav 
+# --additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' --start-times='s1,s2' input.wav output.wav
 def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
     foreground_snrs = list_cyclic_iterator(foreground_snr_array)
     background_snrs = list_cyclic_iterator(background_snr_array)
@@ -162,7 +165,7 @@ def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground
                 if len(speech_rir.iso_noise_list) > 0:
                     isotropic_noise = PickItemFromList(speech_rir.iso_noise_list)
                     # extend the isotropic noise to the length of the speech waveform
-                    noises_added.append("\"wav-reverberate --duration={1} {0} - |\" ".format(isotropic_noise.noise_file_location, speech_dur))
+                    noises_added.append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_file_location, speech_dur))
                     snrs_added.append(background_snrs.next())
                     start_times_added.append(0)
 
@@ -174,11 +177,11 @@ def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground
                     noise_rir = PickItemFromList(room.rir_list)
                     if noise.bg_fg_type == "background": 
                         start_times_added.append(0)
-                        noises_added.append("\"wav-reverberate --duration={2} --impulse-response={1} {0} - |\" ".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
+                        noises_added.append("wav-reverberate --duration={2} --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
                         snrs_added.append(background_snrs.next())
                     else:
                         start_times_added.append(round(random.random() * speech_dur, 2))
-                        noises_added.append("\"wav-reverberate --impulse-response={1} {0} - |\" ".format(noise.noise_file_location, noise_rir.rir_file_location))
+                        noises_added.append("wav-reverberate --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location))
                         snrs_added.append(foreground_snrs.next())
 
             if len(noises_added) > 0:
diff --git a/src/featbin/wav-reverberate.cc b/src/featbin/wav-reverberate.cc
index d7599c5ea3d..56e2a0eb4f6 100644
--- a/src/featbin/wav-reverberate.cc
+++ b/src/featbin/wav-reverberate.cc
@@ -28,7 +28,8 @@ namespace kaldi {
    This function is to repeatedly concatenate signal1 by itself 
    to match the length of signal2 and add the two signals together.
 */
-void AddVectorsOfUnequalLength(const Vector<BaseFloat> &signal1, Vector<BaseFloat> *signal2) {
+void AddVectorsOfUnequalLength(const Vector<BaseFloat> &signal1,
+                                     Vector<BaseFloat> *signal2) {
   for (int32 po = 0; po < signal2->Dim(); po += signal1.Dim()) {
     int32 block_length = signal1.Dim();
     if (signal2->Dim() - po < block_length) block_length = signal2->Dim() - po;
@@ -36,6 +37,18 @@ void AddVectorsOfUnequalLength(const Vector<BaseFloat> &signal1, Vector<BaseFloa
   }
 }
 
+/*
+   This function is to add signal1 to signal2 starting at the offset of signal2
+   This will not extend the length of signal2.
+*/
+void AddVectorsWithOffset(const Vector<BaseFloat> &signal1, int32 offset,
+                                             Vector<BaseFloat> *signal2) {
+  int32 add_length = std::min(signal2->Dim() - offset, signal1.Dim());
+  if (add_length > 0)
+    signal2->Range(offset, add_length).AddVec(1.0, signal1.Range(0, add_length));
+}
+
+
 BaseFloat MaxAbsolute(const Vector<BaseFloat> &vector) {
   return std::max(std::abs(vector.Max()), std::abs(vector.Min()));
 }
@@ -71,29 +84,44 @@ BaseFloat ComputeEarlyReverbEnergy(const Vector<BaseFloat> &rir, const Vector<Ba
 }
 
 /*
-   This is the core function to do reverberation and noise addition
-   on the given signal. The noise will be scaled before the addition
-   to match the given signal-to-noise ratio (SNR) and it will also concatenate
-   itself repeatedly to match the length of the signal.
+   This is the core function to do reverberation on the given signal.
    The input parameters to this function are the room impulse response,
-   the sampling frequency, the SNR(dB), the noise and the signal respectively.
+   the sampling frequency and the signal respectively.
 */
-void DoReverberation(const Vector<BaseFloat> &rir, BaseFloat samp_freq,
-                        BaseFloat snr_db, Vector<BaseFloat> *noise,
+float DoReverberation(const Vector<BaseFloat> &rir, BaseFloat samp_freq,
                         Vector<BaseFloat> *signal) {
-  if (noise->Dim()) {
-    float input_power = ComputeEarlyReverbEnergy(rir, *signal, samp_freq);
-    float noise_power = VecVec(*noise, *noise) / noise->Dim();
-    float scale_factor = sqrt(pow(10, -snr_db / 10) * input_power / noise_power);
-    noise->Scale(scale_factor);
-    KALDI_VLOG(1) << "Noise signal is being scaled with " << scale_factor
-                  << " to generate output with SNR " << snr_db << "db\n";
-  }
-
+  float signal_power = ComputeEarlyReverbEnergy(rir, *signal, samp_freq);
   FFTbasedBlockConvolveSignals(rir, signal);
+  return signal_power;
+}
 
-  if (noise->Dim() > 0) {
-    AddVectorsOfUnequalLength(*noise, signal);
+/*
+   The noise will be scaled before the addition
+   to match the given signal-to-noise ratio (SNR).
+*/
+void AddNoise(Vector<BaseFloat> *noise, BaseFloat snr_db,
+                BaseFloat time, BaseFloat samp_freq,
+                BaseFloat signal_power, Vector<BaseFloat> *signal) {
+  float noise_power = VecVec(*noise, *noise) / noise->Dim();
+  float scale_factor = sqrt(pow(10, -snr_db / 10) * signal_power / noise_power);
+  noise->Scale(scale_factor);
+  KALDI_VLOG(1) << "Noise signal is being scaled with " << scale_factor
+                << " to generate output with SNR " << snr_db << "db\n";
+  int32 offset = time * samp_freq;
+  AddVectorsWithOffset(*noise, offset, signal);
+}
+
+/*
+   This function converts comma-spearted string into float vector.
+*/
+void ReadCommaSeparatedCommand(const std::string &s,
+                                std::vector<BaseFloat> *v) {
+  std::vector<std::string> split_string;
+  SplitStringToVector(s, ",", true, &split_string);
+  for (size_t i = 0; i < split_string.size(); i++) {
+    float ret;
+    ConvertStringToReal(split_string[i], &ret);
+    v->push_back(ret);
   }
 }
 }
@@ -107,20 +135,24 @@ int main(int argc, char *argv[]) {
         "room-impulse response (rir_matrix) and additive noise distortions\n"
         "(specified by corresponding files).\n"
         "Usage:  wav-reverberate [options...] <wav-in-rxfilename> "
-        "<rir-rxfilename> <wav-out-wxfilename>\n"
+        "<wav-out-wxfilename>\n"
         "e.g.\n"
-        "wav-reverberate --noise-file=noise.wav \\\n"
-        "  input.wav rir.wav output.wav\n";
+        "wav-reverberate --duration=t --impulse-response=rir.wav "
+        "--additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' "
+        "--start-times='s1,s2' input.wav output.wav\n";
 
     ParseOptions po(usage);
-    std::string noise_file;
-    BaseFloat snr_db = 20;
+    std::string rir_file;
+    std::string additive_signals;
+    std::string snrs;
+    std::string start_times;
     bool multi_channel_output = false;
     int32 input_channel = 0;
     int32 rir_channel = 0;
     int32 noise_channel = 0;
     bool normalize_output = true;
     BaseFloat volume = 0;
+    BaseFloat duration = 0;
 
     po.Register("multi-channel-output", &multi_channel_output,
                 "Specifies if the output should be multi-channel or not");
@@ -133,14 +165,29 @@ int main(int argc, char *argv[]) {
     po.Register("noise-channel", &noise_channel,
                 "Specifies the channel of the noise file, "
                 "it will only be used when multi-channel-output is false");
-    po.Register("noise-file", &noise_file,
-                "File with additive noise");
-    po.Register("snr-db", &snr_db,
-                "Desired SNR(dB) of the output");
+    po.Register("impulse-response", &rir_file,
+                "File with the impulse response for reverberating the input wave");
+    po.Register("additive-signals", &additive_signals,
+                "A comma separated list of additive signals");
+    po.Register("snrs", &snrs,
+                "A comma separated list of SNRs. The additive signals will be "
+                "scaled according to these SNRs.");
+    po.Register("start-times", &start_times,
+                "A comma separated list of start times referring to the "
+                "input signal. The additive signals will be added to the "
+                "input signal starting at the offset. If the start time "
+                "exceed the length of the input signal, the addition will "
+                "be ignored.");
     po.Register("normalize-output", &normalize_output,
                 "If true, then after reverberating and "
                 "possibly adding noise, scale so that the signal "
                 "energy is the same as the original input signal.");
+    po.Register("duration", &duration,
+                "If nonzero, it specified the duration (secs) of the output "
+                "signal. If the duration t is less than the length of the "
+                "input signal, the first t secs of the signal is trimed, "
+                "otherwise, the signal will be repeated to"
+                "fulfill the duration specified.");
     po.Register("volume", &volume,
                 "If nonzero, a scaling factor on the signal that is applied "
                 "after reverberating and possibly adding noise. "
@@ -148,7 +195,7 @@ int main(int argc, char *argv[]) {
                 "if you had also specified --normalize-output=false.");
 
     po.Read(argc, argv);
-    if (po.NumArgs() != 3) {
+    if (po.NumArgs() != 2) {
       po.PrintUsage();
       exit(1);
     }
@@ -160,13 +207,14 @@ int main(int argc, char *argv[]) {
     }
 
     std::string input_wave_file = po.GetArg(1);
-    std::string rir_file = po.GetArg(2);
-    std::string output_wave_file = po.GetArg(3);
+    std::string output_wave_file = po.GetArg(2);
 
     WaveData input_wave;
     {
+      WaveHolder waveholder;
       Input ki(input_wave_file);
-      input_wave.Read(ki.Stream());
+      waveholder.Read(ki.Stream());
+      input_wave = waveholder.Value();
     }
 
     const Matrix<BaseFloat> &input_matrix = input_wave.Data();
@@ -178,45 +226,70 @@ int main(int argc, char *argv[]) {
                   << " #channel: " << num_input_channel;
     KALDI_ASSERT(input_channel < num_input_channel);
 
-    WaveData rir_wave;
-    {
-      Input ki(rir_file);
-      rir_wave.Read(ki.Stream());
-    }
-    const Matrix<BaseFloat> &rir_matrix = rir_wave.Data();
-    BaseFloat samp_freq_rir = rir_wave.SampFreq();
-    int32 num_samp_rir = rir_matrix.NumCols(),
-          num_rir_channel = rir_matrix.NumRows();
-    KALDI_VLOG(1) << "sampling frequency of rir: " << samp_freq_rir
-                  << " #samples: " << num_samp_rir
-                  << " #channel: " << num_rir_channel;
-    if (!multi_channel_output) {
-      KALDI_ASSERT(rir_channel < num_rir_channel);
-    }
-
-    Matrix<BaseFloat> noise_matrix;
-    if (!noise_file.empty()) {
-      WaveData noise_wave;
+    Matrix<BaseFloat> rir_matrix;
+    BaseFloat samp_freq_rir = samp_freq_input;
+    int32 num_samp_rir = 1,
+          num_rir_channel = 1;
+    if (!rir_file.empty()) {
+      WaveData rir_wave;
       {
-        Input ki(noise_file);
-        noise_wave.Read(ki.Stream());
+        WaveHolder waveholder;
+        Input ki(rir_file);
+        waveholder.Read(ki.Stream());
+        rir_wave = waveholder.Value();
       }
-      noise_matrix = noise_wave.Data();
-      BaseFloat samp_freq_noise = noise_wave.SampFreq();
-      int32 num_samp_noise = noise_matrix.NumCols(),
-            num_noise_channel = noise_matrix.NumRows();
-      KALDI_VLOG(1) << "sampling frequency of noise: " << samp_freq_noise
-                    << " #samples: " << num_samp_noise
-                    << " #channel: " << num_noise_channel;
-      if (multi_channel_output) {
-        KALDI_ASSERT(num_rir_channel == num_noise_channel);
-      } else {
-        KALDI_ASSERT(noise_channel < num_noise_channel);
+      rir_matrix = rir_wave.Data();
+      samp_freq_rir = rir_wave.SampFreq();
+      num_samp_rir = rir_matrix.NumCols();
+      num_rir_channel = rir_matrix.NumRows();
+      KALDI_VLOG(1) << "sampling frequency of rir: " << samp_freq_rir
+                    << " #samples: " << num_samp_rir
+                    << " #channel: " << num_rir_channel;
+      if (!multi_channel_output) {
+        KALDI_ASSERT(rir_channel < num_rir_channel);
+      }
+    }
+
+    std::vector<Matrix<BaseFloat> > additive_signal_matrices;
+    if (!additive_signals.empty()) {
+      std::vector<std::string> split_string;
+      SplitStringToVector(additive_signals, ",", true, &split_string);
+      for (size_t i = 0; i < split_string.size(); i++) {
+        WaveHolder waveholder;
+        Input ki(split_string[i]);
+        waveholder.Read(ki.Stream());
+        WaveData additive_signal_wave = waveholder.Value();
+        Matrix<BaseFloat> additive_signal_matrix = additive_signal_wave.Data();
+        BaseFloat samp_freq = additive_signal_wave.SampFreq();
+        KALDI_ASSERT(samp_freq == samp_freq_input);
+        int32 num_samp = additive_signal_matrix.NumCols(),
+              num_channel = additive_signal_matrix.NumRows();
+        KALDI_VLOG(1) << "sampling frequency of additive signal: " << samp_freq
+                      << " #samples: " << num_samp
+                      << " #channel: " << num_channel;
+        if (multi_channel_output) {
+          KALDI_ASSERT(num_rir_channel == num_channel);
+        } else {
+          KALDI_ASSERT(noise_channel < num_channel);
+        }
+
+        additive_signal_matrices.push_back(additive_signal_matrix);
       }
     }
 
+    std::vector<BaseFloat> snr_vector;
+    if (!snrs.empty()) {
+      ReadCommaSeparatedCommand(snrs, &snr_vector);
+    }
+
+    std::vector<BaseFloat> start_time_vector;
+    if (!start_times.empty()) {
+      ReadCommaSeparatedCommand(start_times, &start_time_vector);
+    }
+
     int32 num_output_channels = (multi_channel_output ? num_rir_channel : 1);
-    Matrix<BaseFloat> out_matrix(num_output_channels, num_samp_input);
+    int32 num_samp_output = (duration > 0 ? samp_freq_input * duration : num_samp_input);
+    Matrix<BaseFloat> out_matrix(num_output_channels, num_samp_output);
 
     for (int32 output_channel = 0; output_channel < num_output_channels; output_channel++) {
       Vector<BaseFloat> input(num_samp_input);
@@ -224,18 +297,26 @@ int main(int argc, char *argv[]) {
       float power_before_reverb = VecVec(input, input) / input.Dim();
 
       int32 this_rir_channel = (multi_channel_output ? output_channel : rir_channel);
-      Vector<BaseFloat> rir(num_samp_rir);
-      rir.CopyRowFromMat(rir_matrix, this_rir_channel);
-      rir.Scale(1.0 / (1 << 15));
 
-      Vector<BaseFloat> noise(0);
-      if (!noise_file.empty()) {
-        noise.Resize(noise_matrix.NumCols());
-        int32 this_noise_channel = (multi_channel_output ? output_channel : noise_channel);
-        noise.CopyRowFromMat(noise_matrix, this_noise_channel);
+      float early_energy = power_before_reverb;
+      if (!rir_file.empty()) {
+        Vector<BaseFloat> rir;
+        rir.Resize(num_samp_rir);
+        rir.CopyRowFromMat(rir_matrix, this_rir_channel);
+        rir.Scale(1.0 / (1 << 15));
+        early_energy = DoReverberation(rir, samp_freq_rir, &input);
       }
 
-      DoReverberation(rir, samp_freq_rir, snr_db, &noise, &input);
+      if (additive_signal_matrices.size() > 0) {
+        Vector<BaseFloat> noise(0);
+        int32 this_noise_channel = (multi_channel_output ? output_channel : noise_channel);
+        for (int32 i = 0; i < additive_signal_matrices.size(); i++) {
+          noise.Resize(additive_signal_matrices[i].NumCols());
+          noise.CopyRowFromMat(additive_signal_matrices[i], this_noise_channel);
+          AddNoise(&noise, snr_vector[i], start_time_vector[i],
+                    samp_freq_input, early_energy, &input);
+        }
+      }
 
       float power_after_reverb = VecVec(input, input) / input.Dim();
 
@@ -244,7 +325,16 @@ int main(int argc, char *argv[]) {
       else if (normalize_output)
         input.Scale(sqrt(power_before_reverb / power_after_reverb));
 
-      out_matrix.CopyRowFromVec(input, output_channel);
+      if (num_samp_output <= num_samp_input) {
+        // trim the signal from the start
+        out_matrix.CopyRowFromVec(input.Range(0, num_samp_output), output_channel);
+      } else {
+        // repeat the signal to fill up the duration
+        Vector<BaseFloat> extended_input(num_samp_output);
+        extended_input.SetZero();
+        AddVectorsOfUnequalLength(input, &extended_input);
+        out_matrix.CopyRowFromVec(extended_input, output_channel);
+      }
     }
 
     WaveData out_wave(samp_freq_input, out_matrix);

From 1068ec452fa77773284c30b03323329743c7a977 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Mon, 11 Jul 2016 03:51:59 -0400
Subject: [PATCH 05/14] Change the functions in signal.cc to extend the length
 of the convolved signal, the correct length should be original signal length
 + rir length - 1; add the shift option to wav-reverberate.cc

---
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 26 ++++++-------
 src/feat/signal.cc                            | 38 +++++++++++--------
 src/feat/signal.h                             |  7 ++++
 src/featbin/wav-reverberate.cc                | 31 ++++++++++++---
 4 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 228eaebfba0..e2f05b25aa1 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -41,7 +41,7 @@ def GetArgs():
                         "--bg-fg-type <choices=(background|foreground), default=background> "
                         "--rir-file <str, compulsary if isotropic, should not be specified if point-source> "
                         "< location=(support Kaldi IO strings) >")
-    parser.add_argument("--num-replications", type=int, dest = "num_replica", default = 1,
+    parser.add_argument("--num-replications", type=int, dest = "num_replicas", default = 1,
                         help="Number of replicate to generated for the data")
     parser.add_argument('--foreground-snrs', type=str, dest = "foreground_snr_string", default = '20:10:0', help='snrs for foreground noises')
     parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", default = '20:10:0', help='snrs for background noises')
@@ -77,7 +77,7 @@ def CheckArgs(args):
         if not os.path.isfile(args.noise_list_file):
             raise Exception(args.noise_list_file + "not found")
 
-    if args.num_replica > 1 and args.prefix is None:
+    if args.num_replicas > 1 and args.prefix is None:
         args.prefix = "rvb"
         warnings.warn("--prefix is set to 'rvb' as --num-replications is larger than 1.")
 
@@ -135,11 +135,11 @@ def ParseFileToDict(file, assert2fields = False, value_processor = None):
 # The generic command of wav-reverberate will be like:
 # wav-reverberate --duration=t --impulse-response=rir.wav 
 # --additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' --start-times='s1,s2' input.wav output.wav
-def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
+def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
     foreground_snrs = list_cyclic_iterator(foreground_snr_array)
     background_snrs = list_cyclic_iterator(background_snr_array)
     command_list = []
-    for i in range(num_replica):
+    for i in range(num_replicas):
         keys = wav_scp.keys()
         keys.sort()
         for wav_id in keys:
@@ -204,10 +204,10 @@ def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground
 
 
 # This function replicate the entries in files like segments, utt2spk, text
-def AddPrefixToFields(input_file, output_file, num_replica, prefix, field = [0]):
+def AddPrefixToFields(input_file, output_file, num_replicas, prefix, field = [0]):
     list = map(lambda x: x.strip(), open(input_file))
     f = open(output_file, "w")
-    for i in range(num_replica):
+    for i in range(num_replicas):
         for line in list:
             if len(line) > 0 and line[0] != ';':
                 split1 = line.split()
@@ -220,7 +220,7 @@ def AddPrefixToFields(input_file, output_file, num_replica, prefix, field = [0])
     f.close()
 
 
-def CreateReverberatedCopy(input_dir, output_dir, room_dict, noise_list, foreground_snr_string, background_snr_string, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
+def CreateReverberatedCopy(input_dir, output_dir, room_dict, noise_list, foreground_snr_string, background_snr_string, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
     
     if not os.path.isfile(input_dir + "/reco2dur"):
         print("Getting the duration of the recordings...");
@@ -230,18 +230,18 @@ def CreateReverberatedCopy(input_dir, output_dir, room_dict, noise_list, foregro
     foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':'))
     background_snr_array = map(lambda x: float(x), background_snr_string.split(':'))
 
-    CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replica, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added)
+    CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added)
 
-    AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replica, prefix, field = [0,1])
+    AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replicas, prefix, field = [0,1])
     train_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
                     .format(output_dir = output_dir))
 
     if os.path.isfile(input_dir + "/text"):
-        AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replica, prefix, field =[0])
+        AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, prefix, field =[0])
     if os.path.isfile(input_dir + "/segments"):
-        AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", num_replica, prefix, field = [0,1])
+        AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", num_replicas, prefix, field = [0,1])
     if os.path.isfile(input_dir + "/reco2file_and_channel"):
-        AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replica, prefix, field = [0,1])
+        AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replicas, prefix, field = [0,1])
 
     train_lib.RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}"
                     .format(output_dir = output_dir))
@@ -352,7 +352,7 @@ def Main():
                    noise_list = noise_list,
                    foreground_snr_string = args.foreground_snr_string,
                    background_snr_string = args.background_snr_string,
-                   num_replica = args.num_replica,
+                   num_replicas = args.num_replicas,
                    prefix = args.prefix,
                    speech_rvb_probability = args.speech_rvb_probability,
                    noise_adding_probability = args.noise_adding_probability,
diff --git a/src/feat/signal.cc b/src/feat/signal.cc
index e8fbb0b84cf..12a9a710092 100644
--- a/src/feat/signal.cc
+++ b/src/feat/signal.cc
@@ -34,31 +34,34 @@ void ElementwiseProductOfFft(const Vector<BaseFloat> &a, Vector<BaseFloat> *b) {
 void ConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
   int32 signal_length = signal->Dim();
   int32 filter_length = filter.Dim();
-  Vector<BaseFloat> signal_padded(signal_length + filter_length - 1);
+  int32 output_length = signal_length + filter_length - 1;
+  Vector<float> signal_padded(output_length);
   signal_padded.SetZero();
   for (int32 i = 0; i < signal_length; i++) {
     for (int32 j = 0; j < filter_length; j++) {
         signal_padded(i + j) += (*signal)(i) * filter(j);
     }
   }
-  signal->CopyFromVec(signal_padded.Range(0, signal_length));
+  signal->Resize(output_length);
+  signal->CopyFromVec(signal_padded);
 }
 
 
 void FFTbasedConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
   int32 signal_length = signal->Dim();
   int32 filter_length = filter.Dim();
+  int32 output_length = signal_length + filter_length - 1;
 
-  int32 fft_length = RoundUpToNearestPowerOfTwo(signal_length + filter_length - 1);
+  int32 fft_length = RoundUpToNearestPowerOfTwo(output_length);
   KALDI_VLOG(1) << "fft_length for full signal convolution is " << fft_length;
 
   SplitRadixRealFft<BaseFloat> srfft(fft_length);
 
-  Vector<BaseFloat> filter_padded(fft_length);
+  Vector<float> filter_padded(fft_length);
   filter_padded.Range(0, filter_length).CopyFromVec(filter);
   srfft.Compute(filter_padded.Data(), true);
 
-  Vector<BaseFloat> signal_padded(fft_length);
+  Vector<float> signal_padded(fft_length);
   signal_padded.Range(0, signal_length).CopyFromVec(*signal);
   srfft.Compute(signal_padded.Data(), true);
 
@@ -67,12 +70,15 @@ void FFTbasedConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat>
   srfft.Compute(signal_padded.Data(), false);
   signal_padded.Scale(1.0 / fft_length);
 
-  signal->CopyFromVec(signal_padded.Range(0, signal_length));
+  signal->Resize(output_length);
+  signal->CopyFromVec(signal_padded.Range(0, output_length));
 }
 
 void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
   int32 signal_length = signal->Dim();
   int32 filter_length = filter.Dim();
+  int32 output_length = signal_length + filter_length - 1;
+  signal->Resize(output_length, kCopyData);
 
   KALDI_VLOG(1) << "Length of the filter is " << filter_length;
 
@@ -83,17 +89,17 @@ void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFl
   KALDI_VLOG(1) << "Block size is " << block_length;
   SplitRadixRealFft<BaseFloat> srfft(fft_length);
 
-  Vector<BaseFloat> filter_padded(fft_length);
+  Vector<float> filter_padded(fft_length);
   filter_padded.Range(0, filter_length).CopyFromVec(filter);
   srfft.Compute(filter_padded.Data(), true);
 
-  Vector<BaseFloat> temp_pad(filter_length - 1);
+  Vector<float> temp_pad(filter_length - 1);
   temp_pad.SetZero();
-  Vector<BaseFloat> signal_block_padded(fft_length);
+  Vector<float> signal_block_padded(fft_length);
 
-  for (int32 po = 0; po < signal_length; po += block_length) {
+  for (int32 po = 0; po < output_length; po += block_length) {
     // get a block of the signal
-    int32 process_length = std::min(block_length, signal_length - po);
+    int32 process_length = std::min(block_length, output_length - po);
     signal_block_padded.SetZero();
     signal_block_padded.Range(0, process_length).CopyFromVec(signal->Range(po, process_length));
 
@@ -105,17 +111,17 @@ void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFl
     signal_block_padded.Scale(1.0 / fft_length);
 
     // combine the block
-    if (po + block_length < signal_length) {       // current block is not the last block
+    if (po + block_length < output_length) {       // current block is not the last block
       signal->Range(po, block_length).CopyFromVec(signal_block_padded.Range(0, block_length));
       signal->Range(po, filter_length - 1).AddVec(1.0, temp_pad);
       temp_pad.CopyFromVec(signal_block_padded.Range(block_length, filter_length - 1));
     } else {
-      signal->Range(po, signal_length - po).CopyFromVec(
-                        signal_block_padded.Range(0, signal_length - po));
-      if (filter_length - 1 < signal_length - po)
+      signal->Range(po, output_length - po).CopyFromVec(
+                        signal_block_padded.Range(0, output_length - po));
+      if (filter_length - 1 < output_length - po)
         signal->Range(po, filter_length - 1).AddVec(1.0, temp_pad);
       else
-        signal->Range(po, signal_length - po).AddVec(1.0, temp_pad.Range(0, signal_length - po));
+        signal->Range(po, output_length - po).AddVec(1.0, temp_pad.Range(0, output_length - po));
     }
   }
 }
diff --git a/src/feat/signal.h b/src/feat/signal.h
index 7ff0ce33b52..c6c3eb50530 100644
--- a/src/feat/signal.h
+++ b/src/feat/signal.h
@@ -25,6 +25,13 @@
 
 namespace kaldi {
 
+/* 
+   The following three functions are having the same functionality but
+   different implementations so as the efficiency. After the convolution,
+   the length of the signal will be extended to (original signal length +
+   filter length - 1).
+*/
+
 /*
    This function implements a simple non-FFT-based convolution of two signals.
    It is suggested to use the FFT-based convolution function which is more
diff --git a/src/featbin/wav-reverberate.cc b/src/featbin/wav-reverberate.cc
index 56e2a0eb4f6..80b08307172 100644
--- a/src/featbin/wav-reverberate.cc
+++ b/src/featbin/wav-reverberate.cc
@@ -28,7 +28,7 @@ namespace kaldi {
    This function is to repeatedly concatenate signal1 by itself 
    to match the length of signal2 and add the two signals together.
 */
-void AddVectorsOfUnequalLength(const Vector<BaseFloat> &signal1,
+void AddVectorsOfUnequalLength(const VectorBase<BaseFloat> &signal1,
                                      Vector<BaseFloat> *signal2) {
   for (int32 po = 0; po < signal2->Dim(); po += signal1.Dim()) {
     int32 block_length = signal1.Dim();
@@ -87,6 +87,8 @@ BaseFloat ComputeEarlyReverbEnergy(const Vector<BaseFloat> &rir, const Vector<Ba
    This is the core function to do reverberation on the given signal.
    The input parameters to this function are the room impulse response,
    the sampling frequency and the signal respectively.
+   The length of the signal will be extended to (original signal length +
+   rir length - 1) after the reverberation.
 */
 float DoReverberation(const Vector<BaseFloat> &rir, BaseFloat samp_freq,
                         Vector<BaseFloat> *signal) {
@@ -147,6 +149,7 @@ int main(int argc, char *argv[]) {
     std::string snrs;
     std::string start_times;
     bool multi_channel_output = false;
+    bool shift_output = true;
     int32 input_channel = 0;
     int32 rir_channel = 0;
     int32 noise_channel = 0;
@@ -156,6 +159,14 @@ int main(int argc, char *argv[]) {
 
     po.Register("multi-channel-output", &multi_channel_output,
                 "Specifies if the output should be multi-channel or not");
+    po.Register("shift-output", &shift_output,
+                "If true, the reverberated waveform will be shifted by the "
+                "amount of the peak position of the RIR and the length of "
+                "the output waveform will be equal to the input waveform."
+                "If false, the length of the output waveform will be "
+                "equal to (original input length + rir length - 1). "
+                "This value is default true and "
+                "it only affects the output when RIR file is provided.");
     po.Register("input-wave-channel", &input_channel,
                 "Specifies the channel to be used from input as only a "
                 "single channel will be used to generate reverberated output");
@@ -228,8 +239,8 @@ int main(int argc, char *argv[]) {
 
     Matrix<BaseFloat> rir_matrix;
     BaseFloat samp_freq_rir = samp_freq_input;
-    int32 num_samp_rir = 1,
-          num_rir_channel = 1;
+    int32 num_samp_rir = 0,
+          num_rir_channel = 0;
     if (!rir_file.empty()) {
       WaveData rir_wave;
       {
@@ -287,8 +298,11 @@ int main(int argc, char *argv[]) {
       ReadCommaSeparatedCommand(start_times, &start_time_vector);
     }
 
+    int32 shift_index = 0;
     int32 num_output_channels = (multi_channel_output ? num_rir_channel : 1);
-    int32 num_samp_output = (duration > 0 ? samp_freq_input * duration : num_samp_input);
+    int32 num_samp_output = (duration > 0 ? samp_freq_input * duration :
+                              (shift_output ? num_samp_input :
+                                              num_samp_input + num_samp_rir - 1));
     Matrix<BaseFloat> out_matrix(num_output_channels, num_samp_output);
 
     for (int32 output_channel = 0; output_channel < num_output_channels; output_channel++) {
@@ -305,6 +319,11 @@ int main(int argc, char *argv[]) {
         rir.CopyRowFromMat(rir_matrix, this_rir_channel);
         rir.Scale(1.0 / (1 << 15));
         early_energy = DoReverberation(rir, samp_freq_rir, &input);
+        if (shift_output) {
+          // find the position of the peak of the impulse response 
+          // and shift the output waveform by this amount
+          rir.Max(&shift_index);
+        }
       }
 
       if (additive_signal_matrices.size() > 0) {
@@ -327,12 +346,12 @@ int main(int argc, char *argv[]) {
 
       if (num_samp_output <= num_samp_input) {
         // trim the signal from the start
-        out_matrix.CopyRowFromVec(input.Range(0, num_samp_output), output_channel);
+        out_matrix.CopyRowFromVec(input.Range(shift_index, num_samp_output), output_channel);
       } else {
         // repeat the signal to fill up the duration
         Vector<BaseFloat> extended_input(num_samp_output);
         extended_input.SetZero();
-        AddVectorsOfUnequalLength(input, &extended_input);
+        AddVectorsOfUnequalLength(input.Range(shift_index, num_samp_input), &extended_input);
         out_matrix.CopyRowFromVec(extended_input, output_channel);
       }
     }

From fdb576dff7d1940f333b6ee05f92a2d98319669f Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Wed, 13 Jul 2016 01:20:45 -0400
Subject: [PATCH 06/14] Adding more comments and remove duplicate function in
 reverberate_data_dir.py

---
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 62 ++++++++-----------
 1 file changed, 25 insertions(+), 37 deletions(-)

diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index e2f05b25aa1..6f4418e7aca 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -25,9 +25,11 @@ def GetArgs():
     # we add compulsary arguments as named arguments for readability
     parser = argparse.ArgumentParser(description="Reverberate the data directory with an option "
                                                  "to add isotropic and point source noiseis. "
-                                                 "This script only deals with single channel wave files. "
-                                                 "If multi-channel noise/rir/speech files are provided one "
-                                                 "of the channels will be randomly picked",
+                                                 "Usage: reverberate_data_dir.py [options...] <in-data-dir> <out-data-dir> "
+                                                 "E.g. reverberate_data_dir.py --rir-list-file rir_list "
+                                                 "--foreground-snrs 20:10:15:5:0 --background-snrs 20:10:15:5:0 "
+                                                 "--noise-list-file noise_list --speech-rvb-probability 1 --num-replications 2 "
+                                                 "--random-seed 1 data/train data/train_rvb",
                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
     parser.add_argument("--rir-list-file", type=str, required = True, 
@@ -84,37 +86,21 @@ def CheckArgs(args):
     return args
 
 
-def PickItemFromDict(dict):
-   total_p = sum(dict[key].probability for key in dict.keys())
+# This function pick the item according to the associated probability
+# The input could be either a dictinoary of a list
+def PickItemWithProbability(x):
+   if isinstance(x, dict):
+     plist = list(set(x.values()))
+   else:
+     plist = x
+   total_p = sum(item.probability for item in plist)
    p = random.uniform(0, total_p)
-   upto = 0
-   for key in dict.keys():
-      if upto + dict[key].probability >= p:
-         return dict[key]
-      upto += dict[key].probability
-   assert False, "Shouldn't get here"
-
-
-def PickItemFromList(list):
-   total_p = sum(item.probability for item in list)
-   p = random.uniform(0, total_p)
-   upto = 0
-   for item in list:
-      if upto + item.probability >= p:
+   accumulate_p = 0
+   for item in plist:
+      if accumulate_p + item.probability >= p:
          return item
-      upto += item.probability
-   assert False, "Shouldn't get here"
-
-
-def weighted_choice(choices):
-   total = sum(w for c, w in choices)
-   r = random.uniform(0, total)
-   upto = 0
-   for c, w in choices:
-      if upto + w >= r:
-         return c
-      upto += w
-   assert False, "Shouldn't get here"
+      accumulate_p += item.probability
+   assert False, "Shouldn't get here as the accumulated probability should always equal to 1"
 
 
 def ParseFileToDict(file, assert2fields = False, value_processor = None):
@@ -152,18 +138,18 @@ def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground
                 wav_id = prefix + str(i) + "_" + wav_id
 
             # pick the room
-            room = PickItemFromDict(room_dict)
+            room = PickItemWithProbability(room_dict)
             command_opts = ""
             noises_added = []
             snrs_added = []
             start_times_added = []
             if random.random() < speech_rvb_probability:
                 # pick the RIR to reverberate the speech
-                speech_rir = PickItemFromList(room.rir_list)
+                speech_rir = PickItemWithProbability(room.rir_list)
                 command_opts += "--impulse-response={0} ".format(speech_rir.rir_file_location)
                 # add the corresponding isotropic noise if there is any
                 if len(speech_rir.iso_noise_list) > 0:
-                    isotropic_noise = PickItemFromList(speech_rir.iso_noise_list)
+                    isotropic_noise = PickItemWithProbability(speech_rir.iso_noise_list)
                     # extend the isotropic noise to the length of the speech waveform
                     noises_added.append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_file_location, speech_dur))
                     snrs_added.append(background_snrs.next())
@@ -173,8 +159,8 @@ def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground
             if len(noise_list) > 0 and random.random() < noise_adding_probability:
                 for k in range(random.randint(1, max_noises_added)):
                     # pick the RIR to reverberate the point-source noise
-                    noise = PickItemFromList(noise_list)
-                    noise_rir = PickItemFromList(room.rir_list)
+                    noise = PickItemWithProbability(noise_list)
+                    noise_rir = PickItemWithProbability(room.rir_list)
                     if noise.bg_fg_type == "background": 
                         start_times_added.append(0)
                         noises_added.append("wav-reverberate --duration={2} --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
@@ -283,6 +269,8 @@ def ParseRirList(rir_list_file):
     return SmoothProbability(rir_list)
 
 
+# This function crate the room dictinoary from the rir list
+# The key of the returned dictionary is the room id
 def MakeRoomDict(rir_list):
     room_dict = {}
     for rir in rir_list:

From 3802fde4b4da335c1482da6519f3f8d58145200f Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Thu, 14 Jul 2016 03:29:02 -0400
Subject: [PATCH 07/14] Change option --max-noises-added to
 --max-noises-per-minute

---
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 6f4418e7aca..3f6f04fb340 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -49,11 +49,11 @@ def GetArgs():
     parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", default = '20:10:0', help='snrs for background noises')
     parser.add_argument('--prefix', type=str, default = None, help='prefix for the id of the corrupted utterances')
     parser.add_argument("--speech-rvb-probability", type=float, default = 0.8,
-                        help="Probability of reverberating the speech signal, e.g. 0 <= p <= 1")
+                        help="Probability of reverberating a speech signal, e.g. 0 <= p <= 1")
     parser.add_argument("--noise-adding-probability", type=float, default = 0.4,
                         help="Probability of adding point-source noises, e.g. 0 <= p <= 1")
-    parser.add_argument("--max-noises-added", type=int, default = 2,
-                        help="Maximum number of point-source noises could be added")
+    parser.add_argument("--max-noises-per-minute", type=int, default = 2,
+                        help="This controls the maximum number of point-source noises that could be added to a recording according to its duration")
     parser.add_argument('--random-seed', type=int, default=0, help='seed to be used in the randomization of impulese and noises')
     parser.add_argument("input_dir",
                         help="Input data directory")
@@ -121,7 +121,7 @@ def ParseFileToDict(file, assert2fields = False, value_processor = None):
 # The generic command of wav-reverberate will be like:
 # wav-reverberate --duration=t --impulse-response=rir.wav 
 # --additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' --start-times='s1,s2' input.wav output.wav
-def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
+def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_per_minute):
     foreground_snrs = list_cyclic_iterator(foreground_snr_array)
     background_snrs = list_cyclic_iterator(background_snr_array)
     command_list = []
@@ -134,6 +134,7 @@ def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground
             if len(wav_pipe.split()) == 1:
                 wav_pipe = "cat {0} |".format(wav_pipe)
             speech_dur = durations[wav_id]
+            max_noises_recording = math.floor(max_noises_per_minute * speech_dur / 60)
             if prefix is not None:
                 wav_id = prefix + str(i) + "_" + wav_id
 
@@ -157,7 +158,7 @@ def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground
 
             # Add the point-source noise
             if len(noise_list) > 0 and random.random() < noise_adding_probability:
-                for k in range(random.randint(1, max_noises_added)):
+                for k in range(random.randint(1, max_noises_recording)):
                     # pick the RIR to reverberate the point-source noise
                     noise = PickItemWithProbability(noise_list)
                     noise_rir = PickItemWithProbability(room.rir_list)
@@ -206,7 +207,7 @@ def AddPrefixToFields(input_file, output_file, num_replicas, prefix, field = [0]
     f.close()
 
 
-def CreateReverberatedCopy(input_dir, output_dir, room_dict, noise_list, foreground_snr_string, background_snr_string, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added):
+def CreateReverberatedCopy(input_dir, output_dir, room_dict, noise_list, foreground_snr_string, background_snr_string, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_per_minute):
     
     if not os.path.isfile(input_dir + "/reco2dur"):
         print("Getting the duration of the recordings...");
@@ -216,7 +217,7 @@ def CreateReverberatedCopy(input_dir, output_dir, room_dict, noise_list, foregro
     foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':'))
     background_snr_array = map(lambda x: float(x), background_snr_string.split(':'))
 
-    CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_added)
+    CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_per_minute)
 
     AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replicas, prefix, field = [0,1])
     train_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
@@ -344,7 +345,7 @@ def Main():
                    prefix = args.prefix,
                    speech_rvb_probability = args.speech_rvb_probability,
                    noise_adding_probability = args.noise_adding_probability,
-                   max_noises_added = args.max_noises_added)
+                   max_noises_per_minute = args.max_noises_per_minute)
 
 if __name__ == "__main__":
     Main()

From 970def5d4387fb3f4331b28f8dd1d0d123318c1c Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Fri, 15 Jul 2016 12:12:51 -0400
Subject: [PATCH 08/14] Adding data_lib.py; adding more comments, splitting
 large function in reverberate_data_dir.py

---
 egs/wsj/s5/steps/data/data_lib.py             |  23 ++
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 287 ++++++++++++------
 src/featbin/wav-reverberate.cc                |   2 +-
 3 files changed, 211 insertions(+), 101 deletions(-)
 create mode 100644 egs/wsj/s5/steps/data/data_lib.py

diff --git a/egs/wsj/s5/steps/data/data_lib.py b/egs/wsj/s5/steps/data/data_lib.py
new file mode 100644
index 00000000000..52aa83cae81
--- /dev/null
+++ b/egs/wsj/s5/steps/data/data_lib.py
@@ -0,0 +1,23 @@
+import subprocess
+#import logging
+#import math
+#import re
+#import time
+#import argparse
+
+def RunKaldiCommand(command, wait = True):
+    """ Runs commands frequently seen in Kaldi scripts. These are usually a
+        sequence of commands connected by pipes, so we use shell=True """
+    #logger.info("Running the command\n{0}".format(command))
+    p = subprocess.Popen(command, shell = True,
+                         stdout = subprocess.PIPE,
+                         stderr = subprocess.PIPE)
+
+    if wait:
+        [stdout, stderr] = p.communicate()
+        if p.returncode is not 0:
+            raise Exception("There was an error while running the command {0}\n".format(command)+"-"*10+"\n"+stderr)
+        return stdout, stderr
+    else:
+        return p
+
diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 3f6f04fb340..92114b23614 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -7,19 +7,7 @@
 from __future__ import print_function
 import argparse, glob, math, os, random, sys, warnings, copy, imp, ast
 
-train_lib = imp.load_source('ntl', 'steps/nnet3/nnet3_train_lib.py')
-
-class list_cyclic_iterator:
-  def __init__(self, list):
-    self.list_index = 0
-    self.list = list
-    random.shuffle(self.list)
-
-  def next(self):
-    item = self.list[self.list_index]
-    self.list_index = (self.list_index + 1) % len(self.list)
-    return item
-
+data_lib = imp.load_source('ntl', 'steps/data/data_lib.py')
 
 def GetArgs():
     # we add compulsary arguments as named arguments for readability
@@ -50,8 +38,10 @@ def GetArgs():
     parser.add_argument('--prefix', type=str, default = None, help='prefix for the id of the corrupted utterances')
     parser.add_argument("--speech-rvb-probability", type=float, default = 0.8,
                         help="Probability of reverberating a speech signal, e.g. 0 <= p <= 1")
-    parser.add_argument("--noise-adding-probability", type=float, default = 0.4,
+    parser.add_argument("--pointsource-noise-addition-probability", type=float, default = 0.4,
                         help="Probability of adding point-source noises, e.g. 0 <= p <= 1")
+    parser.add_argument("--isotropic-noise-addition-probability", type=float, default = 0.4,
+                        help="Probability of adding isotropic noises, e.g. 0 <= p <= 1")
     parser.add_argument("--max-noises-per-minute", type=int, default = 2,
                         help="This controls the maximum number of point-source noises that could be added to a recording according to its duration")
     parser.add_argument('--random-seed', type=int, default=0, help='seed to be used in the randomization of impulese and noises')
@@ -86,6 +76,18 @@ def CheckArgs(args):
     return args
 
 
+class list_cyclic_iterator:
+  def __init__(self, list):
+    self.list_index = 0
+    self.list = list
+    random.shuffle(self.list)
+
+  def next(self):
+    item = self.list[self.list_index]
+    self.list_index = (self.list_index + 1) % len(self.list)
+    return item
+
+
 # This function pick the item according to the associated probability
 # The input could be either a dictinoary of a list
 def PickItemWithProbability(x):
@@ -116,78 +118,144 @@ def ParseFileToDict(file, assert2fields = False, value_processor = None):
         dict[parts[0]] = value_processor(parts[1:])
     return dict
 
+def WriteDictToFile(dict, file_name):
+    file = open(file_name, 'w')
+    keys = dict.keys()
+    keys.sort()
+    for key in keys:
+        value = dict[key]
+        if type(value) in [list, tuple] :
+            if type(value) is tuple:
+                value = list(value)
+            value.sort()
+            value = ' '.join(value)
+        file.write('{0}\t{1}\n'.format(key, value))
+    file.close()
+
+
+# This function returns only the isotropic noises according to the specified RIR id
+def FilterIsotropicNoiseList(iso_noise_list, rir_id):
+    filtered_list = []
+    for noise in iso_noise_list:
+        if noise.rir_id == rir_id:
+            filtered_list.append(noise)
+
+    return filtered_list
+
+def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
+                            point_noise_list, # the point source noise list
+                            iso_noise_list, # the isotropic noise list
+                            foreground_snrs, # the SNR for adding the foreground noises
+                            background_snrs, # the SNR for adding the background noises
+                            speech_rvb_probability, # Probability of reverberating a speech signal
+                            isotropic_noise_addition_probability, # Probability of adding isotropic noises
+                            pointsource_noise_addition_probability, # Probability of adding point-source noises
+                            speech_dur,  # duration of the recording
+                            max_noises_recording  # Maximum number of point-source noises that can be added
+                            ):
+    reverberate_opts = ""
+    noises_added = []
+    snrs_added = []
+    start_times_added = []
+    # Randomly select the room
+    room = PickItemWithProbability(room_dict)
+    # Randomly select the RIR in the room
+    speech_rir = PickItemWithProbability(room.rir_list)
+    if random.random() < speech_rvb_probability:
+        # pick the RIR to reverberate the speech
+        reverberate_opts += "--impulse-response={0} ".format(speech_rir.rir_file_location)
+
+    rir_iso_noise_list = FilterIsotropicNoiseList(iso_noise_list, speech_rir.rir_id)
+    # Add the corresponding isotropic noise associated with the selected RIR
+    if len(rir_iso_noise_list) > 0 and random.random() < isotropic_noise_addition_probability:
+        isotropic_noise = PickItemWithProbability(rir_iso_noise_list)
+        # extend the isotropic noise to the length of the speech waveform
+        noises_added.append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_file_location, speech_dur))
+        snrs_added.append(background_snrs.next())
+        start_times_added.append(0)
+
+    # Add the point-source noise
+    if len(point_noise_list) > 0 and random.random() < pointsource_noise_addition_probability:
+        for k in range(random.randint(1, max_noises_recording)):
+            # pick the RIR to reverberate the point-source noise
+            noise = PickItemWithProbability(point_noise_list)
+            noise_rir = PickItemWithProbability(room.rir_list)
+            if noise.bg_fg_type == "background":
+                start_times_added.append(0)
+                noises_added.append("wav-reverberate --duration={2} --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
+                snrs_added.append(background_snrs.next())
+            else:
+                start_times_added.append(round(random.random() * speech_dur, 2))
+                noises_added.append("wav-reverberate --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location))
+                snrs_added.append(foreground_snrs.next())
+
+    assert len(noises_added) == len(snrs_added)
+    assert len(noises_added) == len(start_times_added)
+
+    if len(noises_added) > 0:
+        reverberate_opts += "--additive-signals='{0}' ".format(','.join(noises_added))
+        reverberate_opts += "--snrs='{0}' ".format(','.join(map(lambda x:str(x),snrs_added)))
+        reverberate_opts += "--start-times='{0}' ".format(','.join(map(lambda x:str(x),start_times_added)))
 
-# This is the major function to generate pipeline command for the corruption
+    return reverberate_opts
+
+# This is the main function to generate pipeline command for the corruption
 # The generic command of wav-reverberate will be like:
 # wav-reverberate --duration=t --impulse-response=rir.wav 
 # --additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' --start-times='s1,s2' input.wav output.wav
-def CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_per_minute):
+def CorruptWav(wav_scp,  # the dictionary of which elements are the IO of the speech recordings
+               durations, # the dictionary of which elements are the duration (in sec) of the speech recordings
+               output_dir, # output directory to write the corrupted wav.scp 
+               room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
+               point_noise_list, # the point source noise list
+               iso_noise_list, # the isotropic noise list
+               foreground_snr_array, # the SNR for adding the foreground noises
+               background_snr_array, # the SNR for adding the background noises
+               num_replicas, # Number of replicate to generated for the data
+               prefix, # prefix for the id of the corrupted utterances
+               speech_rvb_probability, # Probability of reverberating a speech signal
+               isotropic_noise_addition_probability, # Probability of adding isotropic noises
+               pointsource_noise_addition_probability, # Probability of adding point-source noises
+               max_noises_per_minute # maximum number of point-source noises that can be added to a recording according to its duration
+               ):
     foreground_snrs = list_cyclic_iterator(foreground_snr_array)
     background_snrs = list_cyclic_iterator(background_snr_array)
-    command_list = []
+    corrupted_wav_scp = {}
     for i in range(num_replicas):
         keys = wav_scp.keys()
         keys.sort()
         for wav_id in keys:
-            wav_pipe = wav_scp[wav_id]
+            wav_original_pipe = wav_scp[wav_id]
             # check if it is really a pipe
-            if len(wav_pipe.split()) == 1:
-                wav_pipe = "cat {0} |".format(wav_pipe)
+            if len(wav_original_pipe.split()) == 1:
+                wav_original_pipe = "cat {0} |".format(wav_original_pipe)
             speech_dur = durations[wav_id]
             max_noises_recording = math.floor(max_noises_per_minute * speech_dur / 60)
             if prefix is not None:
-                wav_id = prefix + str(i) + "_" + wav_id
-
-            # pick the room
-            room = PickItemWithProbability(room_dict)
-            command_opts = ""
-            noises_added = []
-            snrs_added = []
-            start_times_added = []
-            if random.random() < speech_rvb_probability:
-                # pick the RIR to reverberate the speech
-                speech_rir = PickItemWithProbability(room.rir_list)
-                command_opts += "--impulse-response={0} ".format(speech_rir.rir_file_location)
-                # add the corresponding isotropic noise if there is any
-                if len(speech_rir.iso_noise_list) > 0:
-                    isotropic_noise = PickItemWithProbability(speech_rir.iso_noise_list)
-                    # extend the isotropic noise to the length of the speech waveform
-                    noises_added.append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_file_location, speech_dur))
-                    snrs_added.append(background_snrs.next())
-                    start_times_added.append(0)
-
-            # Add the point-source noise
-            if len(noise_list) > 0 and random.random() < noise_adding_probability:
-                for k in range(random.randint(1, max_noises_recording)):
-                    # pick the RIR to reverberate the point-source noise
-                    noise = PickItemWithProbability(noise_list)
-                    noise_rir = PickItemWithProbability(room.rir_list)
-                    if noise.bg_fg_type == "background": 
-                        start_times_added.append(0)
-                        noises_added.append("wav-reverberate --duration={2} --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
-                        snrs_added.append(background_snrs.next())
-                    else:
-                        start_times_added.append(round(random.random() * speech_dur, 2))
-                        noises_added.append("wav-reverberate --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location))
-                        snrs_added.append(foreground_snrs.next())
-
-            if len(noises_added) > 0:
-                command_opts += "--additive-signals='{0}' ".format(','.join(noises_added))
-            if len(snrs_added) > 0:
-                command_opts += "--snrs='{0}' ".format(','.join(map(lambda x:str(x),snrs_added)))
-            if len(start_times_added) > 0:
-                command_opts += "--start-times='{0}' ".format(','.join(map(lambda x:str(x),start_times_added)))
+                new_wav_id = prefix + str(i) + "_" + wav_id
+            else:
+                new_wav_id = wav_id
+
+            reverberate_opts = GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
+                                                         point_noise_list, # the point source noise list
+                                                         iso_noise_list, # the isotropic noise list
+                                                         foreground_snrs, # the SNR for adding the foreground noises
+                                                         background_snrs, # the SNR for adding the background noises
+                                                         speech_rvb_probability, # Probability of reverberating a speech signal
+                                                         isotropic_noise_addition_probability, # Probability of adding isotropic noises
+                                                         pointsource_noise_addition_probability, # Probability of adding point-source noises
+                                                         speech_dur,  # duration of the recording
+                                                         max_noises_recording  # Maximum number of point-source noises that can be added
+                                                         )       
             
-            if command_opts == "":
-                command = "{0} {1}\n".format(wav_id, wav_pipe) 
+            if reverberate_opts == "":
+                wav_corrupted_pipe = "{0}".format(wav_original_pipe) 
             else:
-                command = "{0} {1} wav-reverberate {2} - - |\n".format(wav_id, wav_pipe, command_opts)
+                wav_corrupted_pipe = "{0} wav-reverberate {1} - - |".format(wav_original_pipe, reverberate_opts)
 
-            command_list.append(command)
+            corrupted_wav_scp[new_wav_id] = wav_corrupted_pipe
 
-    file_handle = open(output_dir + "/wav.scp", 'w')
-    file_handle.write("".join(command_list))
-    file_handle.close()
+    WriteDictToFile(corrupted_wav_scp, output_dir + "/wav.scp")
 
 
 # This function replicate the entries in files like segments, utt2spk, text
@@ -207,20 +275,37 @@ def AddPrefixToFields(input_file, output_file, num_replicas, prefix, field = [0]
     f.close()
 
 
-def CreateReverberatedCopy(input_dir, output_dir, room_dict, noise_list, foreground_snr_string, background_snr_string, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_per_minute):
+# This function creates multiple copies of the necessary files, e.g. utt2spk, wav.scp ...
+def CreateReverberatedCopy(input_dir,
+                           output_dir,
+                           room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
+                           point_noise_list, # the point source noise list
+                           iso_noise_list, # the isotropic noise list
+                           foreground_snr_string, # the SNR for adding the foreground noises
+                           background_snr_string, # the SNR for adding the background noises
+                           num_replicas, # Number of replicate to generated for the data
+                           prefix, # prefix for the id of the corrupted utterances
+                           speech_rvb_probability, # Probability of reverberating a speech signal
+                           isotropic_noise_addition_probability, # Probability of adding isotropic noises
+                           pointsource_noise_addition_probability, # Probability of adding point-source noises
+                           max_noises_per_minute  # maximum number of point-source noises that can be added to a recording according to its duration
+                           ):
     
     if not os.path.isfile(input_dir + "/reco2dur"):
         print("Getting the duration of the recordings...");
-        train_lib.RunKaldiCommand("wav-to-duration --read-entire-file=true scp:{0}/wav.scp ark,t:{0}/reco2dur".format(input_dir))
+        data_lib.RunKaldiCommand("wav-to-duration --read-entire-file=true scp:{0}/wav.scp ark,t:{0}/reco2dur".format(input_dir))
     durations = ParseFileToDict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0]))
     wav_scp = ParseFileToDict(input_dir + "/wav.scp", value_processor = lambda x: " ".join(x))
     foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':'))
     background_snr_array = map(lambda x: float(x), background_snr_string.split(':'))
 
-    CorruptWav(wav_scp, durations, output_dir, room_dict, noise_list, foreground_snr_array, background_snr_array, num_replicas, prefix, speech_rvb_probability, noise_adding_probability, max_noises_per_minute)
+    CorruptWav(wav_scp, durations, output_dir, room_dict, point_noise_list, iso_noise_list, 
+               foreground_snr_array, background_snr_array, num_replicas, prefix, 
+               speech_rvb_probability, isotropic_noise_addition_probability, 
+               pointsource_noise_addition_probability, max_noises_per_minute)
 
     AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replicas, prefix, field = [0,1])
-    train_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
+    data_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
                     .format(output_dir = output_dir))
 
     if os.path.isfile(input_dir + "/text"):
@@ -230,10 +315,12 @@ def CreateReverberatedCopy(input_dir, output_dir, room_dict, noise_list, foregro
     if os.path.isfile(input_dir + "/reco2file_and_channel"):
         AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replicas, prefix, field = [0,1])
 
-    train_lib.RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}"
+    data_lib.RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}"
                     .format(output_dir = output_dir))
 
-def SmoothProbability(list):
+
+# This function smooths the probability distribution in the list
+def SmoothProbabilityDistribution(list):
     uniform_probability = 1 / float(len(list))
     for item in list:
         if item.probability is None:
@@ -242,13 +329,15 @@ def SmoothProbability(list):
             # smooth the probability
             item.probability = 0.3 * item.probability + 0.7 * uniform_probability
 
-    sum_p = sum(item.probability for item in list)
     # Normalize the probability
+    sum_p = sum(item.probability for item in list)
     for item in list:
         item.probability = item.probability / sum_p
 
     return list
 
+# This function creates the RIR list 
+# Each item in the list contains the following arguments
 def ParseRirList(rir_list_file):
     rir_parser = argparse.ArgumentParser()
     rir_parser.add_argument('--rir-id', type=str, required=True, help='rir id')
@@ -267,11 +356,14 @@ def ParseRirList(rir_list_file):
         setattr(rir, "iso_noise_list", [])
         rir_list.append(rir)
 
-    return SmoothProbability(rir_list)
+    return SmoothProbabilityDistribution(rir_list)
 
 
-# This function crate the room dictinoary from the rir list
-# The key of the returned dictionary is the room id
+# This function divides the global RIR list into local lists
+# according to the room where the RIRs are generated
+# It returns the room dictionary indexed by the room id
+# Each element in the room dictionary contains a local RIR list 
+# and the probability of the corresponding room
 def MakeRoomDict(rir_list):
     room_dict = {}
     for rir in rir_list:
@@ -282,18 +374,21 @@ def MakeRoomDict(rir_list):
             setattr(room_dict[rir.room_id], "probability", 0)
         room_dict[rir.room_id].rir_list.append(rir)
 
+    # the probability of the room is the sum of probabilities of its RIR
     for key in room_dict.keys():
         room_dict[key].probability = sum(rir.probability for rir in room_dict[key].rir_list)
 
     return room_dict
 
-
-def ParseNoiseList(rir_list, noise_list_file):
+# This function creates the point-source noise list 
+# and the isotropic noise list from the noise information file
+# Each item in the list contains the following arguments
+def ParseNoiseList(noise_list_file):
     noise_parser = argparse.ArgumentParser()
     noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id')
     noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"])
     noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise', choices = ["background", "foreground"])
-    noise_parser.add_argument('--rir-file', type=str, default=None, help='compulsary if isotropic, should not be specified if point-source')
+    noise_parser.add_argument('--rir-id', type=str, default=None, help='compulsary if isotropic, should not be specified if point-source')
     noise_parser.add_argument('--probability', type=float, default=None, help='probability of the noise.')
     noise_parser.add_argument('noise_file_location', type=str, help='noise file location')
 
@@ -303,26 +398,15 @@ def ParseNoiseList(rir_list, noise_list_file):
     for line in noise_lines:
         noise = noise_parser.parse_args(line.split())
         if noise.noise_type == "isotropic":
-            if noise.rir_file is None:
-                raise Exception("--rir-file must be specified if --noise-type is point-source")
+            if noise.rir_id is None:
+                raise Exception("--rir-id must be specified if --noise-type is isotropic")
             else:
                 iso_noise_list.append(noise)
         else:
             point_noise_list.append(noise)
 
-    iso_noise_list = SmoothProbability(iso_noise_list)
-
-    for iso_noise in iso_noise_list:
-        id = -1
-        for j in range(len(rir_list)):
-            if iso_noise.rir_file == rir_list[j].rir_file_location:
-                id = j
-                rir_list[id].iso_noise_list.append(noise)
-                break;
-        if id == -1:
-            warnings.warn("Rir file specified for noise id {0} is not found in rir_list".format(iso_noise.noise_id))
-
-    return (SmoothProbability(point_noise_list), rir_list)
+    return (SmoothProbabilityDistribution(point_noise_list),
+            SmoothProbabilityDistribution(iso_noise_list))
 
 
 def Main():
@@ -331,20 +415,23 @@ def Main():
     rir_list = ParseRirList(args.rir_list_file)
     noise_list = []
     if args.noise_list_file is not None:
-        noise_list, rir_list = ParseNoiseList(rir_list, args.noise_list_file)
-        print("Number of point-source noises is {0}".format(len(noise_list)))
+        point_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file)
+        print("Number of point-source noises is {0}".format(len(point_noise_list)))
+        print("Number of isotropic noises is {0}".format(len(iso_noise_list)))
     room_dict = MakeRoomDict(rir_list)
 
     CreateReverberatedCopy(input_dir = args.input_dir,
                    output_dir = args.output_dir,
                    room_dict = room_dict,
-                   noise_list = noise_list,
+                   point_noise_list = point_noise_list,
+                   iso_noise_list = iso_noise_list,
                    foreground_snr_string = args.foreground_snr_string,
                    background_snr_string = args.background_snr_string,
                    num_replicas = args.num_replicas,
                    prefix = args.prefix,
                    speech_rvb_probability = args.speech_rvb_probability,
-                   noise_adding_probability = args.noise_adding_probability,
+                   isotropic_noise_addition_probability = args.isotropic_noise_addition_probability,
+                   pointsource_noise_addition_probability = args.pointsource_noise_addition_probability,
                    max_noises_per_minute = args.max_noises_per_minute)
 
 if __name__ == "__main__":
diff --git a/src/featbin/wav-reverberate.cc b/src/featbin/wav-reverberate.cc
index 80b08307172..683b8be6177 100644
--- a/src/featbin/wav-reverberate.cc
+++ b/src/featbin/wav-reverberate.cc
@@ -165,7 +165,7 @@ int main(int argc, char *argv[]) {
                 "the output waveform will be equal to the input waveform."
                 "If false, the length of the output waveform will be "
                 "equal to (original input length + rir length - 1). "
-                "This value is default true and "
+                "This value is true by default and "
                 "it only affects the output when RIR file is provided.");
     po.Register("input-wave-channel", &input_channel,
                 "Specifies the channel to be used from input as only a "

From d335c718e3b1849afe40a61fc436c0e098c6a36e Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Tue, 19 Jul 2016 03:38:19 -0400
Subject: [PATCH 09/14] adding AddPointSourceNoise()

---
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 111 +++++++++++-------
 1 file changed, 71 insertions(+), 40 deletions(-)

diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 92114b23614..53d3aa44973 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -36,11 +36,11 @@ def GetArgs():
     parser.add_argument('--foreground-snrs', type=str, dest = "foreground_snr_string", default = '20:10:0', help='snrs for foreground noises')
     parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", default = '20:10:0', help='snrs for background noises')
     parser.add_argument('--prefix', type=str, default = None, help='prefix for the id of the corrupted utterances')
-    parser.add_argument("--speech-rvb-probability", type=float, default = 0.8,
+    parser.add_argument("--speech-rvb-probability", type=float, default = 1.0,
                         help="Probability of reverberating a speech signal, e.g. 0 <= p <= 1")
-    parser.add_argument("--pointsource-noise-addition-probability", type=float, default = 0.4,
+    parser.add_argument("--pointsource-noise-addition-probability", type=float, default = 1.0,
                         help="Probability of adding point-source noises, e.g. 0 <= p <= 1")
-    parser.add_argument("--isotropic-noise-addition-probability", type=float, default = 0.4,
+    parser.add_argument("--isotropic-noise-addition-probability", type=float, default = 1.0,
                         help="Probability of adding isotropic noises, e.g. 0 <= p <= 1")
     parser.add_argument("--max-noises-per-minute", type=int, default = 2,
                         help="This controls the maximum number of point-source noises that could be added to a recording according to its duration")
@@ -88,8 +88,9 @@ def next(self):
     return item
 
 
-# This function pick the item according to the associated probability
-# The input could be either a dictinoary of a list
+# This functions picks an item from the collection according to the associated probability distribution.
+# The probability estimate of each item in the collection is stored in the "probability" field of 
+# the particular item. x : a collection (list or dictionary) where the values contain a field called probability
 def PickItemWithProbability(x):
    if isinstance(x, dict):
      plist = list(set(x.values()))
@@ -105,6 +106,8 @@ def PickItemWithProbability(x):
    assert False, "Shouldn't get here as the accumulated probability should always equal to 1"
 
 
+# This function parses a file and pack the data into a dictionary
+# It is useful for parsing file like wav.scp, utt2spk, text...etc
 def ParseFileToDict(file, assert2fields = False, value_processor = None):
     if value_processor is None:
         value_processor = lambda x: x[0]
@@ -118,6 +121,7 @@ def ParseFileToDict(file, assert2fields = False, value_processor = None):
         dict[parts[0]] = value_processor(parts[1:])
     return dict
 
+# This function creates a file and write the content of a dictionary into it
 def WriteDictToFile(dict, file_name):
     file = open(file_name, 'w')
     keys = dict.keys()
@@ -134,6 +138,7 @@ def WriteDictToFile(dict, file_name):
 
 
 # This function returns only the isotropic noises according to the specified RIR id
+# Please refer to ParseNoiseList() for the format of iso_noise_list
 def FilterIsotropicNoiseList(iso_noise_list, rir_id):
     filtered_list = []
     for noise in iso_noise_list:
@@ -142,6 +147,33 @@ def FilterIsotropicNoiseList(iso_noise_list, rir_id):
 
     return filtered_list
 
+
+def AddPointSourceNoise(noise_addition_descriptor,  # descriptor to store the information of the noise added
+                        room,  # the room selected
+                        point_noise_list, # the point source noise list
+                        pointsource_noise_addition_probability, # Probability of adding point-source noises
+                        foreground_snrs, # the SNR for adding the foreground noises
+                        background_snrs, # the SNR for adding the background noises
+                        speech_dur,  # duration of the recording
+                        max_noises_recording  # Maximum number of point-source noises that can be added
+                        ):
+    if len(point_noise_list) > 0 and random.random() < pointsource_noise_addition_probability:
+        for k in range(random.randint(1, max_noises_recording)):
+            # pick the RIR to reverberate the point-source noise
+            noise = PickItemWithProbability(point_noise_list)
+            noise_rir = PickItemWithProbability(room.rir_list)
+            if noise.bg_fg_type == "background":
+                noise_addition_descriptor['noise_io'].append("wav-reverberate --duration={2} --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
+                noise_addition_descriptor['start_times'].append(0)
+                noise_addition_descriptor['snrs'].append(background_snrs.next())
+            else:
+                noise_addition_descriptor['noise_io'].append("wav-reverberate --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location))
+                noise_addition_descriptor['start_times'].append(round(random.random() * speech_dur, 2))
+                noise_addition_descriptor['snrs'].append(foreground_snrs.next())
+
+    return noise_addition_descriptor
+
+
 def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
                             point_noise_list, # the point source noise list
                             iso_noise_list, # the isotropic noise list
@@ -154,9 +186,9 @@ def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to
                             max_noises_recording  # Maximum number of point-source noises that can be added
                             ):
     reverberate_opts = ""
-    noises_added = []
-    snrs_added = []
-    start_times_added = []
+    noise_addition_descriptor = {'noise_io': [],
+                                 'start_times': [],
+                                 'snrs': []}
     # Randomly select the room
     room = PickItemWithProbability(room_dict)
     # Randomly select the RIR in the room
@@ -170,35 +202,30 @@ def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to
     if len(rir_iso_noise_list) > 0 and random.random() < isotropic_noise_addition_probability:
         isotropic_noise = PickItemWithProbability(rir_iso_noise_list)
         # extend the isotropic noise to the length of the speech waveform
-        noises_added.append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_file_location, speech_dur))
-        snrs_added.append(background_snrs.next())
-        start_times_added.append(0)
-
-    # Add the point-source noise
-    if len(point_noise_list) > 0 and random.random() < pointsource_noise_addition_probability:
-        for k in range(random.randint(1, max_noises_recording)):
-            # pick the RIR to reverberate the point-source noise
-            noise = PickItemWithProbability(point_noise_list)
-            noise_rir = PickItemWithProbability(room.rir_list)
-            if noise.bg_fg_type == "background":
-                start_times_added.append(0)
-                noises_added.append("wav-reverberate --duration={2} --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
-                snrs_added.append(background_snrs.next())
-            else:
-                start_times_added.append(round(random.random() * speech_dur, 2))
-                noises_added.append("wav-reverberate --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location))
-                snrs_added.append(foreground_snrs.next())
-
-    assert len(noises_added) == len(snrs_added)
-    assert len(noises_added) == len(start_times_added)
-
-    if len(noises_added) > 0:
-        reverberate_opts += "--additive-signals='{0}' ".format(','.join(noises_added))
-        reverberate_opts += "--snrs='{0}' ".format(','.join(map(lambda x:str(x),snrs_added)))
-        reverberate_opts += "--start-times='{0}' ".format(','.join(map(lambda x:str(x),start_times_added)))
+        noise_addition_descriptor['noise_io'].append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_file_location, speech_dur))
+        noise_addition_descriptor['start_times'].append(0)
+        noise_addition_descriptor['snrs'].append(background_snrs.next())
+
+    noise_addition_descriptor = AddPointSourceNoise(noise_addition_descriptor,  # descriptor to store the information of the noise added
+                                                    room,  # the room selected
+                                                    point_noise_list, # the point source noise list
+                                                    pointsource_noise_addition_probability, # Probability of adding point-source noises
+                                                    foreground_snrs, # the SNR for adding the foreground noises
+                                                    background_snrs, # the SNR for adding the background noises
+                                                    speech_dur,  # duration of the recording
+                                                    max_noises_recording  # Maximum number of point-source noises that can be added
+                                                    )
+
+    assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['start_times'])
+    assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['snrs'])
+    if len(noise_addition_descriptor['noise_io']) > 0:
+        reverberate_opts += "--additive-signals='{0}' ".format(','.join(noise_addition_descriptor['noise_io']))
+        reverberate_opts += "--start-times='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['start_times'])))
+        reverberate_opts += "--snrs='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['snrs'])))
 
     return reverberate_opts
 
+
 # This is the main function to generate pipeline command for the corruption
 # The generic command of wav-reverberate will be like:
 # wav-reverberate --duration=t --impulse-response=rir.wav 
@@ -320,14 +347,14 @@ def CreateReverberatedCopy(input_dir,
 
 
 # This function smooths the probability distribution in the list
-def SmoothProbabilityDistribution(list):
+def SmoothProbabilityDistribution(list, smoothing_weight=0.3):
     uniform_probability = 1 / float(len(list))
     for item in list:
         if item.probability is None:
             item.probability = uniform_probability
         else:
             # smooth the probability
-            item.probability = 0.3 * item.probability + 0.7 * uniform_probability
+            item.probability = (1 - smoothing_weight) * item.probability + smoothing_weight * uniform_probability
 
     # Normalize the probability
     sum_p = sum(item.probability for item in list)
@@ -337,11 +364,13 @@ def SmoothProbabilityDistribution(list):
     return list
 
 # This function creates the RIR list 
-# Each item in the list contains the following arguments
+# Each noise item in the list contains the following attributes:
+# rir_id, room_id, receiver_position_id, source_position_id, rt60, drr, probability
+# Please refer to the help messages in the parser for the meaning of these attributes
 def ParseRirList(rir_list_file):
     rir_parser = argparse.ArgumentParser()
-    rir_parser.add_argument('--rir-id', type=str, required=True, help='rir id')
-    rir_parser.add_argument('--room-id', type=str, required=True, help='room id')
+    rir_parser.add_argument('--rir-id', type=str, required=True, help='This id is unique for each RIR and the noise may associate with a particular RIR by refering to this id')
+    rir_parser.add_argument('--room-id', type=str, required=True, help='This is the room that where the RIR is generated')
     rir_parser.add_argument('--receiver-position-id', type=str, default=None, help='receiver position id')
     rir_parser.add_argument('--source-position-id', type=str, default=None, help='source position id')
     rir_parser.add_argument('--rt60', type=float, default=None, help='RT60 is the time required for reflections of a direct sound to decay 60 dB.')
@@ -382,7 +411,9 @@ def MakeRoomDict(rir_list):
 
 # This function creates the point-source noise list 
 # and the isotropic noise list from the noise information file
-# Each item in the list contains the following arguments
+# Each noise item in the list contains the following attributes:
+# noise_id, noise_type, bg_fg_type, rir_id, probability, noise_file_location
+# Please refer to the help messages in the parser for the meaning of these attributes
 def ParseNoiseList(noise_list_file):
     noise_parser = argparse.ArgumentParser()
     noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id')

From ebfba00a1d4742c50db9ffb57504d885dc033c63 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Wed, 20 Jul 2016 23:51:42 -0400
Subject: [PATCH 10/14] Fixing spelling mistake and modifying comments

---
 ...ta_lib.py => data_dir_manipulation_lib.py} |  5 -
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 99 +++++++++++--------
 2 files changed, 56 insertions(+), 48 deletions(-)
 rename egs/wsj/s5/steps/data/{data_lib.py => data_dir_manipulation_lib.py} (90%)

diff --git a/egs/wsj/s5/steps/data/data_lib.py b/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py
similarity index 90%
rename from egs/wsj/s5/steps/data/data_lib.py
rename to egs/wsj/s5/steps/data/data_dir_manipulation_lib.py
index 52aa83cae81..1f7253d4891 100644
--- a/egs/wsj/s5/steps/data/data_lib.py
+++ b/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py
@@ -1,9 +1,4 @@
 import subprocess
-#import logging
-#import math
-#import re
-#import time
-#import argparse
 
 def RunKaldiCommand(command, wait = True):
     """ Runs commands frequently seen in Kaldi scripts. These are usually a
diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 53d3aa44973..f9a6617fe00 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -7,12 +7,12 @@
 from __future__ import print_function
 import argparse, glob, math, os, random, sys, warnings, copy, imp, ast
 
-data_lib = imp.load_source('ntl', 'steps/data/data_lib.py')
+data_lib = imp.load_source('dml', 'steps/data/data_dir_manipulation_lib.py')
 
 def GetArgs():
-    # we add compulsary arguments as named arguments for readability
+    # we add required arguments as named arguments for readability
     parser = argparse.ArgumentParser(description="Reverberate the data directory with an option "
-                                                 "to add isotropic and point source noiseis. "
+                                                 "to add isotropic and point source noises. "
                                                  "Usage: reverberate_data_dir.py [options...] <in-data-dir> <out-data-dir> "
                                                  "E.g. reverberate_data_dir.py --rir-list-file rir_list "
                                                  "--foreground-snrs 20:10:15:5:0 --background-snrs 20:10:15:5:0 "
@@ -22,20 +22,24 @@ def GetArgs():
 
     parser.add_argument("--rir-list-file", type=str, required = True, 
                         help="RIR information file, the format of the file is "
-                        "--rir-id <string,compulsary> --room-id <string,compulsary> "
+                        "--rir-id <string,required> --room-id <string,required> "
                         "--receiver-position-id <string,optional> --source-position-id <string,optional> "
-                        "--rt-60 < <float,optional> --drr <float, optional> < location(support Kaldi IO strings) >")
+                        "--rt-60 <float,optional> --drr <float, optional> <location(support Kaldi IO strings)> "
+                        "E.g. --rir-id 00001 --room-id 001 --receiver-position-id 001 --source-position-id 00001 "
+                        "--rt60 0.58 --drr -4.885 data/impulses/Room001-00001.wav")
     parser.add_argument("--noise-list-file", type=str, default = None,
                         help="Noise information file, the format of the file is"
-                        "--noise-id <string,compulsary> --noise-type <choices = (isotropic, point source),compulsary> "
-                        "--bg-fg-type <choices=(background|foreground), default=background> "
-                        "--rir-file <str, compulsary if isotropic, should not be specified if point-source> "
-                        "< location=(support Kaldi IO strings) >")
+                        "--noise-id <string,required> --noise-type <choices = {isotropic, point source},required> "
+                        "--bg-fg-type <choices = {background, foreground}, default=background> "
+                        "--rir-file <str, specifies the rir file associated with the noise file. Required if isotropic "
+                        "as the rir file links this noise file to a specific position in the room> "
+                        "<location=(support Kaldi IO strings)> "
+                        "E.g. --noise-id 001 --noise-type isotropic --rir-id 00019 iso_noise.wav")
     parser.add_argument("--num-replications", type=int, dest = "num_replicas", default = 1,
                         help="Number of replicate to generated for the data")
-    parser.add_argument('--foreground-snrs', type=str, dest = "foreground_snr_string", default = '20:10:0', help='snrs for foreground noises')
-    parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", default = '20:10:0', help='snrs for background noises')
-    parser.add_argument('--prefix', type=str, default = None, help='prefix for the id of the corrupted utterances')
+    parser.add_argument('--foreground-snrs', type=str, dest = "foreground_snr_string", default = '20:10:0', help='When foreground noises are being added the script will iterate through these SNRs.')
+    parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", default = '20:10:0', help='When background noises are being added the script will iterate through these SNRs.')
+    parser.add_argument('--prefix', type=str, default = None, help='This prefix will modified for each reverberated copy, by adding additional affixes.')
     parser.add_argument("--speech-rvb-probability", type=float, default = 1.0,
                         help="Probability of reverberating a speech signal, e.g. 0 <= p <= 1")
     parser.add_argument("--pointsource-noise-addition-probability", type=float, default = 1.0,
@@ -44,7 +48,7 @@ def GetArgs():
                         help="Probability of adding isotropic noises, e.g. 0 <= p <= 1")
     parser.add_argument("--max-noises-per-minute", type=int, default = 2,
                         help="This controls the maximum number of point-source noises that could be added to a recording according to its duration")
-    parser.add_argument('--random-seed', type=int, default=0, help='seed to be used in the randomization of impulese and noises')
+    parser.add_argument('--random-seed', type=int, default=0, help='seed to be used in the randomization of impulses and noises')
     parser.add_argument("input_dir",
                         help="Input data directory")
     parser.add_argument("output_dir",
@@ -132,7 +136,7 @@ def WriteDictToFile(dict, file_name):
             if type(value) is tuple:
                 value = list(value)
             value.sort()
-            value = ' '.join(value)
+            value = ' '.join(str(value))
         file.write('{0}\t{1}\n'.format(key, value))
     file.close()
 
@@ -150,18 +154,20 @@ def FilterIsotropicNoiseList(iso_noise_list, rir_id):
 
 def AddPointSourceNoise(noise_addition_descriptor,  # descriptor to store the information of the noise added
                         room,  # the room selected
-                        point_noise_list, # the point source noise list
+                        pointsource_noise_list, # the point source noise list
                         pointsource_noise_addition_probability, # Probability of adding point-source noises
                         foreground_snrs, # the SNR for adding the foreground noises
                         background_snrs, # the SNR for adding the background noises
                         speech_dur,  # duration of the recording
                         max_noises_recording  # Maximum number of point-source noises that can be added
                         ):
-    if len(point_noise_list) > 0 and random.random() < pointsource_noise_addition_probability:
+    if len(pointsource_noise_list) > 0 and random.random() < pointsource_noise_addition_probability:
         for k in range(random.randint(1, max_noises_recording)):
             # pick the RIR to reverberate the point-source noise
-            noise = PickItemWithProbability(point_noise_list)
+            noise = PickItemWithProbability(pointsource_noise_list)
             noise_rir = PickItemWithProbability(room.rir_list)
+            # If it is a background noise, the noise will be extended and be added to the whole speech
+            # if it is a foreground noise, the noise will not extended and be added at a random time of the speech
             if noise.bg_fg_type == "background":
                 noise_addition_descriptor['noise_io'].append("wav-reverberate --duration={2} --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
                 noise_addition_descriptor['start_times'].append(0)
@@ -175,7 +181,7 @@ def AddPointSourceNoise(noise_addition_descriptor,  # descriptor to store the in
 
 
 def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
-                            point_noise_list, # the point source noise list
+                            pointsource_noise_list, # the point source noise list
                             iso_noise_list, # the isotropic noise list
                             foreground_snrs, # the SNR for adding the foreground noises
                             background_snrs, # the SNR for adding the background noises
@@ -190,6 +196,7 @@ def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to
                                  'start_times': [],
                                  'snrs': []}
     # Randomly select the room
+    # Here the room probability is a sum of the probabilities of the RIRs recorded in the room.
     room = PickItemWithProbability(room_dict)
     # Randomly select the RIR in the room
     speech_rir = PickItemWithProbability(room.rir_list)
@@ -208,7 +215,7 @@ def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to
 
     noise_addition_descriptor = AddPointSourceNoise(noise_addition_descriptor,  # descriptor to store the information of the noise added
                                                     room,  # the room selected
-                                                    point_noise_list, # the point source noise list
+                                                    pointsource_noise_list, # the point source noise list
                                                     pointsource_noise_addition_probability, # Probability of adding point-source noises
                                                     foreground_snrs, # the SNR for adding the foreground noises
                                                     background_snrs, # the SNR for adding the background noises
@@ -225,16 +232,26 @@ def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to
 
     return reverberate_opts
 
+# This function generates a new id from the input id
+# This is needed when we have to create multiple copies of the original data
+def GetNewId(id, prefix=None, copy=0):
+    if prefix is not None:
+        new_id = prefix + str(copy) + "_" + id
+    else:
+        new_id = id
+
+    return new_id
+    
 
 # This is the main function to generate pipeline command for the corruption
 # The generic command of wav-reverberate will be like:
 # wav-reverberate --duration=t --impulse-response=rir.wav 
 # --additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' --start-times='s1,s2' input.wav output.wav
-def CorruptWav(wav_scp,  # the dictionary of which elements are the IO of the speech recordings
-               durations, # the dictionary of which elements are the duration (in sec) of the speech recordings
+def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kaldi-IO strings of the speech recordings
+               durations, # a dictionary whose values are the duration (in sec) of the speech recordings
                output_dir, # output directory to write the corrupted wav.scp 
                room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
-               point_noise_list, # the point source noise list
+               pointsource_noise_list, # the point source noise list
                iso_noise_list, # the isotropic noise list
                foreground_snr_array, # the SNR for adding the foreground noises
                background_snr_array, # the SNR for adding the background noises
@@ -251,20 +268,16 @@ def CorruptWav(wav_scp,  # the dictionary of which elements are the IO of the sp
     for i in range(num_replicas):
         keys = wav_scp.keys()
         keys.sort()
-        for wav_id in keys:
-            wav_original_pipe = wav_scp[wav_id]
+        for recording_id in keys:
+            wav_original_pipe = wav_scp[recording_id]
             # check if it is really a pipe
             if len(wav_original_pipe.split()) == 1:
                 wav_original_pipe = "cat {0} |".format(wav_original_pipe)
-            speech_dur = durations[wav_id]
+            speech_dur = durations[recording_id]
             max_noises_recording = math.floor(max_noises_per_minute * speech_dur / 60)
-            if prefix is not None:
-                new_wav_id = prefix + str(i) + "_" + wav_id
-            else:
-                new_wav_id = wav_id
 
             reverberate_opts = GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
-                                                         point_noise_list, # the point source noise list
+                                                         pointsource_noise_list, # the point source noise list
                                                          iso_noise_list, # the isotropic noise list
                                                          foreground_snrs, # the SNR for adding the foreground noises
                                                          background_snrs, # the SNR for adding the background noises
@@ -280,7 +293,8 @@ def CorruptWav(wav_scp,  # the dictionary of which elements are the IO of the sp
             else:
                 wav_corrupted_pipe = "{0} wav-reverberate {1} - - |".format(wav_original_pipe, reverberate_opts)
 
-            corrupted_wav_scp[new_wav_id] = wav_corrupted_pipe
+            new_recording_id = GetNewId(recording_id, prefix, i)
+            corrupted_wav_scp[new_recording_id] = wav_corrupted_pipe
 
     WriteDictToFile(corrupted_wav_scp, output_dir + "/wav.scp")
 
@@ -294,8 +308,7 @@ def AddPrefixToFields(input_file, output_file, num_replicas, prefix, field = [0]
             if len(line) > 0 and line[0] != ';':
                 split1 = line.split()
                 for j in field:
-                    if prefix is not None:
-                        split1[j] = prefix + str(i) + "_" + split1[j]
+                    split1[j] = GetNewId(split1[j], prefix, i)
                 print(" ".join(split1), file=f)
             else:
                 print(line, file=f)
@@ -306,7 +319,7 @@ def AddPrefixToFields(input_file, output_file, num_replicas, prefix, field = [0]
 def CreateReverberatedCopy(input_dir,
                            output_dir,
                            room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
-                           point_noise_list, # the point source noise list
+                           pointsource_noise_list, # the point source noise list
                            iso_noise_list, # the isotropic noise list
                            foreground_snr_string, # the SNR for adding the foreground noises
                            background_snr_string, # the SNR for adding the background noises
@@ -326,7 +339,7 @@ def CreateReverberatedCopy(input_dir,
     foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':'))
     background_snr_array = map(lambda x: float(x), background_snr_string.split(':'))
 
-    CorruptWav(wav_scp, durations, output_dir, room_dict, point_noise_list, iso_noise_list, 
+    GenerateReverberatedWavScp(wav_scp, durations, output_dir, room_dict, pointsource_noise_list, iso_noise_list, 
                foreground_snr_array, background_snr_array, num_replicas, prefix, 
                speech_rvb_probability, isotropic_noise_addition_probability, 
                pointsource_noise_addition_probability, max_noises_per_minute)
@@ -391,7 +404,7 @@ def ParseRirList(rir_list_file):
 # This function divides the global RIR list into local lists
 # according to the room where the RIRs are generated
 # It returns the room dictionary indexed by the room id
-# Each element in the room dictionary contains a local RIR list 
+# Its values are objects with two attributes: a local RIR list
 # and the probability of the corresponding room
 def MakeRoomDict(rir_list):
     room_dict = {}
@@ -419,11 +432,11 @@ def ParseNoiseList(noise_list_file):
     noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id')
     noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"])
     noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise', choices = ["background", "foreground"])
-    noise_parser.add_argument('--rir-id', type=str, default=None, help='compulsary if isotropic, should not be specified if point-source')
+    noise_parser.add_argument('--rir-id', type=str, default=None, help='required if isotropic, should not be specified if point-source')
     noise_parser.add_argument('--probability', type=float, default=None, help='probability of the noise.')
     noise_parser.add_argument('noise_file_location', type=str, help='noise file location')
 
-    point_noise_list = []
+    pointsource_noise_list = []
     iso_noise_list = []
     noise_lines = map(lambda x: x.strip(), open(noise_list_file))
     for line in noise_lines:
@@ -434,9 +447,9 @@ def ParseNoiseList(noise_list_file):
             else:
                 iso_noise_list.append(noise)
         else:
-            point_noise_list.append(noise)
+            pointsource_noise_list.append(noise)
 
-    return (SmoothProbabilityDistribution(point_noise_list),
+    return (SmoothProbabilityDistribution(pointsource_noise_list),
             SmoothProbabilityDistribution(iso_noise_list))
 
 
@@ -446,15 +459,15 @@ def Main():
     rir_list = ParseRirList(args.rir_list_file)
     noise_list = []
     if args.noise_list_file is not None:
-        point_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file)
-        print("Number of point-source noises is {0}".format(len(point_noise_list)))
+        pointsource_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file)
+        print("Number of point-source noises is {0}".format(len(pointsource_noise_list)))
         print("Number of isotropic noises is {0}".format(len(iso_noise_list)))
     room_dict = MakeRoomDict(rir_list)
 
     CreateReverberatedCopy(input_dir = args.input_dir,
                    output_dir = args.output_dir,
                    room_dict = room_dict,
-                   point_noise_list = point_noise_list,
+                   pointsource_noise_list = pointsource_noise_list,
                    iso_noise_list = iso_noise_list,
                    foreground_snr_string = args.foreground_snr_string,
                    background_snr_string = args.background_snr_string,

From 617982b2d490ea009cb60de63f34aee186ae99d7 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Tue, 26 Jul 2016 05:44:21 -0400
Subject: [PATCH 11/14] Modify the aspire recipe to use the new
 reverberate_data_dir.py; fixing bugs in reverberate_data_dir.py; add
 aspire_prep_rir_noise_list.py for generating rir_list and noise_list for
 aspire

---
 .../aspire_prep_rir_noise_list.py             | 79 +++++++++++++++++++
 .../local/multi_condition/run_nnet2_common.sh | 29 ++++---
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 73 +++++++++++------
 src/feat/signal.cc                            | 12 +--
 4 files changed, 151 insertions(+), 42 deletions(-)
 create mode 100755 egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py

diff --git a/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py b/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py
new file mode 100755
index 00000000000..9dd7a38d183
--- /dev/null
+++ b/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+# Copyright 2016  Tom Ko
+# Apache 2.0
+# script to generate rir_list and noise_list in aspire
+
+# we're using python 3.x style print but want it to work in python 2.x,
+from __future__ import print_function
+import argparse, glob, math, os, sys
+
+
+def GetArgs():
+    parser = argparse.ArgumentParser(description="Prepare rir_list and noise_list for Aspire  "
+                                                 "Usage: reverberate_data_dir.py [options...] <in-data-dir> <out-data-dir> "
+                                                 "E.g. reverberate_data_dir.py "
+                                                 "data/impulses_noises data/impulses_noises/info",
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument("input_dir", help="Input data directory")
+    parser.add_argument("output_dir", help="Output data directory")
+    print(' '.join(sys.argv))
+    args = parser.parse_args()
+
+    return args
+
+
+# This function generate the rir_list file for the aspire real RIR
+def GenerateRirListFile(input_dir, output_dir):
+  rir_list_file = open(output_dir + "/rir_list", 'w')
+  rir_id = 1
+  room_id = 1
+  for db in ["RVB2014", "RWCP", "air"]:
+    rir_files = glob.glob(input_dir + "/{0}_*.wav".format(db))
+    for rir in rir_files:
+      filename = rir.split('/')[-1]
+      if "noise" not in filename:
+        rir_list_file.write('--rir-id {0} --room-id {1} {2}\n'.format(str(rir_id).zfill(5), str(room_id).zfill(3), rir))
+        rir_id += 1
+    room_id += 1
+  rir_list_file.close()
+
+
+# This function generate the noise_list file from the aspire noise-rir pair 
+def GenerateNoiseListFile(input_dir, output_dir):
+  noise_list_file = open(output_dir + "/noise_list", 'w')
+  noise_files = glob.glob(input_dir + "/*_type*_noise*.wav")
+  noise_id = 1
+  for noise_file in noise_files:
+    parts = noise_file.split('/')[-1].split('_')
+    db_name = parts[0]
+    type_num = parts[1]
+    noise_pattern = '_'.join(parts[3:len(parts)-1])
+    if db_name == "RWCP":
+      type_num = "type*"
+    matched_rir_files = glob.glob(input_dir + "/{0}_{1}_rir_{2}*.wav".format(db_name, type_num, noise_pattern))
+    noise_line = "--noise-id {0} --noise-type isotropic ".format(str(noise_id).zfill(5))
+    for rir in matched_rir_files:
+      noise_line += "--rir-linkage {0} ".format(rir)
+    noise_line += "{0}".format(noise_file)
+    noise_list_file.write("{0}\n".format(noise_line))
+    noise_id += 1
+  noise_list_file.close()
+
+
+def Main():
+  args = GetArgs()
+
+  if not os.path.exists(args.output_dir):
+    os.makedirs(args.output_dir)
+
+  # generating the rir_list file for the new steps/data/reverberate_data_dir.py
+  GenerateRirListFile(args.input_dir, args.output_dir)
+
+  # generating the noise_list file for the new steps/data/reverberate_data_dir.py
+  GenerateNoiseListFile(args.input_dir, args.output_dir)
+
+
+if __name__ == "__main__":
+    Main()
+
diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
index 5b6424a1d86..15bb922726c 100755
--- a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
+++ b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
@@ -7,6 +7,8 @@
 
 stage=1
 snrs="20:10:15:5:0"
+foreground_snrs="20:10:15:5:0"
+background_snrs="20:10:15:5:0"
 num_data_reps=3
 ali_dir=exp/
 db_string="'air' 'rwcp' 'rvb2014'" # RIR dbs to be used in the experiment
@@ -31,24 +33,29 @@ if [ $stage -le 1 ]; then
     --RIR-home $RIR_home \
     data/impulses_noises || exit 1;
     
+  # Generate the rir_list and noise_list for the reverberate_data_dir.py to corrupt the data
+  python local/multi_condition/aspire_prep_rir_noise_list.py data/impulses_noises data/impulses_noises/info
+
   # corrupt the fisher data to generate multi-condition data 
-  # for data_dir in train dev test; do
   for data_dir in train dev test; do
     if [ "$data_dir" == "train" ]; then
       num_reps=$num_data_reps
     else
       num_reps=1
     fi
-    reverb_data_dirs=
-    for i in `seq 1 $num_reps`; do
-      cur_dest_dir=" data/temp_${data_dir}_${i}" 
-      local/multi_condition/reverberate_data_dir.sh --random-seed $i \
-        --snrs "$snrs" --log-dir exp/make_corrupted_wav \
-        data/${data_dir}  data/impulses_noises $cur_dest_dir
-      reverb_data_dirs+=" $cur_dest_dir" 
-    done
-    utils/combine_data.sh --extra-files utt2uniq data/${data_dir}_rvb $reverb_data_dirs
-    rm -rf $reverb_data_dirs
+    python steps/data/reverberate_data_dir.py \
+      --prefix "rev" \
+      --rir-list-file data/impulses_noises/info/rir_list \
+      --noise-list-file data/impulses_noises/info/noise_list \
+      --foreground-snrs $foreground_snrs \
+      --background-snrs $background_snrs \
+      --speech-rvb-probability 1 \
+      --pointsource-noise-addition-probability 1 \
+      --isotropic-noise-addition-probability 1 \
+      --num-replications $num_reps \
+      --max-noises-per-minute 1 \
+      --random-seed 1 \
+      data/${data_dir} data/${data_dir}_rvb
   done
 
   # create the dev, test and eval sets from the aspire recipe
diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index f9a6617fe00..52b07e669b9 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -67,11 +67,11 @@ def CheckArgs(args):
 
     ## Check arguments.
     if not os.path.isfile(args.rir_list_file):
-        raise Exception(args.rir_list_file + "not found")
+        raise Exception(args.rir_list_file + " not found")
     
     if args.noise_list_file is not None:
         if not os.path.isfile(args.noise_list_file):
-            raise Exception(args.noise_list_file + "not found")
+            raise Exception(args.noise_list_file + " not found")
 
     if args.num_replicas > 1 and args.prefix is None:
         args.prefix = "rvb"
@@ -146,8 +146,10 @@ def WriteDictToFile(dict, file_name):
 def FilterIsotropicNoiseList(iso_noise_list, rir_id):
     filtered_list = []
     for noise in iso_noise_list:
-        if noise.rir_id == rir_id:
-            filtered_list.append(noise)
+        for id in noise.rir_linkage:
+            if id == rir_id:
+                filtered_list.append(noise)
+                break
 
     return filtered_list
 
@@ -161,7 +163,7 @@ def AddPointSourceNoise(noise_addition_descriptor,  # descriptor to store the in
                         speech_dur,  # duration of the recording
                         max_noises_recording  # Maximum number of point-source noises that can be added
                         ):
-    if len(pointsource_noise_list) > 0 and random.random() < pointsource_noise_addition_probability:
+    if len(pointsource_noise_list) > 0 and random.random() < pointsource_noise_addition_probability and max_noises_recording > 1:
         for k in range(random.randint(1, max_noises_recording)):
             # pick the RIR to reverberate the point-source noise
             noise = PickItemWithProbability(pointsource_noise_list)
@@ -265,7 +267,7 @@ def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kal
     foreground_snrs = list_cyclic_iterator(foreground_snr_array)
     background_snrs = list_cyclic_iterator(background_snr_array)
     corrupted_wav_scp = {}
-    for i in range(num_replicas):
+    for i in range(1, num_replicas+1):
         keys = wav_scp.keys()
         keys.sort()
         for recording_id in keys:
@@ -303,7 +305,7 @@ def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kal
 def AddPrefixToFields(input_file, output_file, num_replicas, prefix, field = [0]):
     list = map(lambda x: x.strip(), open(input_file))
     f = open(output_file, "w")
-    for i in range(num_replicas):
+    for i in range(1, num_replicas+1):
         for line in list:
             if len(line) > 0 and line[0] != ';':
                 split1 = line.split()
@@ -361,18 +363,19 @@ def CreateReverberatedCopy(input_dir,
 
 # This function smooths the probability distribution in the list
 def SmoothProbabilityDistribution(list, smoothing_weight=0.3):
-    uniform_probability = 1 / float(len(list))
-    for item in list:
-        if item.probability is None:
-            item.probability = uniform_probability
-        else:
-            # smooth the probability
-            item.probability = (1 - smoothing_weight) * item.probability + smoothing_weight * uniform_probability
-
-    # Normalize the probability
-    sum_p = sum(item.probability for item in list)
-    for item in list:
-        item.probability = item.probability / sum_p
+    if len(list) > 0:
+      uniform_probability = 1 / float(len(list))
+      for item in list:
+          if item.probability is None:
+              item.probability = uniform_probability
+          else:
+              # smooth the probability
+              item.probability = (1 - smoothing_weight) * item.probability + smoothing_weight * uniform_probability
+
+      # Normalize the probability
+      sum_p = sum(item.probability for item in list)
+      for item in list:
+          item.probability = item.probability / sum_p
 
     return list
 
@@ -422,17 +425,28 @@ def MakeRoomDict(rir_list):
 
     return room_dict
 
+
+# This function check if the RIR IO string is listed in the input rir_list file
+# It returns the RIR id if the io string is found
+def ValidateRirIO(rir_io_str, rir_list):
+    for rir in rir_list:
+        if rir_io_str == rir.rir_file_location:
+            return rir.rir_id
+
+    return "Not found"
+
+
 # This function creates the point-source noise list 
 # and the isotropic noise list from the noise information file
 # Each noise item in the list contains the following attributes:
-# noise_id, noise_type, bg_fg_type, rir_id, probability, noise_file_location
+# noise_id, noise_type, bg_fg_type, rir_linkage, probability, noise_file_location
 # Please refer to the help messages in the parser for the meaning of these attributes
-def ParseNoiseList(noise_list_file):
+def ParseNoiseList(noise_list_file, rir_list):
     noise_parser = argparse.ArgumentParser()
     noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id')
     noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"])
     noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise', choices = ["background", "foreground"])
-    noise_parser.add_argument('--rir-id', type=str, default=None, help='required if isotropic, should not be specified if point-source')
+    noise_parser.add_argument('--rir-linkage', type=str, action='append', default=None, help='required if isotropic, should not be specified if point-source, this option can be repeatly added to define multiple noise-rir association, the rir linkage can either be a RIR id or a RIR file path')
     noise_parser.add_argument('--probability', type=float, default=None, help='probability of the noise.')
     noise_parser.add_argument('noise_file_location', type=str, help='noise file location')
 
@@ -442,9 +456,18 @@ def ParseNoiseList(noise_list_file):
     for line in noise_lines:
         noise = noise_parser.parse_args(line.split())
         if noise.noise_type == "isotropic":
-            if noise.rir_id is None:
-                raise Exception("--rir-id must be specified if --noise-type is isotropic")
+            if noise.rir_linkage is None:
+                raise Exception("--rir-linkage must be specified if --noise-type is isotropic")
             else:
+                for r in range(0, len(noise.rir_linkage)):
+                    if not noise.rir_linkage[r].isdigit():
+                       # this is a RIR IO string, validate if it exist in the input rir_list and return the RIR id
+                       result = ValidateRirIO(noise.rir_linkage[r], rir_list)
+                       if result == "Not found":
+                           raise Exception("RIR {0} specified by isotropic noise {1} not found".format(noise.rir_linkage[r], noise.noise_id))
+                       else:
+                           noise.rir_linkage[r] = result
+                       
                 iso_noise_list.append(noise)
         else:
             pointsource_noise_list.append(noise)
@@ -459,7 +482,7 @@ def Main():
     rir_list = ParseRirList(args.rir_list_file)
     noise_list = []
     if args.noise_list_file is not None:
-        pointsource_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file)
+        pointsource_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file, rir_list)
         print("Number of point-source noises is {0}".format(len(pointsource_noise_list)))
         print("Number of isotropic noises is {0}".format(len(iso_noise_list)))
     room_dict = MakeRoomDict(rir_list)
diff --git a/src/feat/signal.cc b/src/feat/signal.cc
index 12a9a710092..a206d399804 100644
--- a/src/feat/signal.cc
+++ b/src/feat/signal.cc
@@ -35,7 +35,7 @@ void ConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal)
   int32 signal_length = signal->Dim();
   int32 filter_length = filter.Dim();
   int32 output_length = signal_length + filter_length - 1;
-  Vector<float> signal_padded(output_length);
+  Vector<BaseFloat> signal_padded(output_length);
   signal_padded.SetZero();
   for (int32 i = 0; i < signal_length; i++) {
     for (int32 j = 0; j < filter_length; j++) {
@@ -57,11 +57,11 @@ void FFTbasedConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat>
 
   SplitRadixRealFft<BaseFloat> srfft(fft_length);
 
-  Vector<float> filter_padded(fft_length);
+  Vector<BaseFloat> filter_padded(fft_length);
   filter_padded.Range(0, filter_length).CopyFromVec(filter);
   srfft.Compute(filter_padded.Data(), true);
 
-  Vector<float> signal_padded(fft_length);
+  Vector<BaseFloat> signal_padded(fft_length);
   signal_padded.Range(0, signal_length).CopyFromVec(*signal);
   srfft.Compute(signal_padded.Data(), true);
 
@@ -89,13 +89,13 @@ void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFl
   KALDI_VLOG(1) << "Block size is " << block_length;
   SplitRadixRealFft<BaseFloat> srfft(fft_length);
 
-  Vector<float> filter_padded(fft_length);
+  Vector<BaseFloat> filter_padded(fft_length);
   filter_padded.Range(0, filter_length).CopyFromVec(filter);
   srfft.Compute(filter_padded.Data(), true);
 
-  Vector<float> temp_pad(filter_length - 1);
+  Vector<BaseFloat> temp_pad(filter_length - 1);
   temp_pad.SetZero();
-  Vector<float> signal_block_padded(fft_length);
+  Vector<BaseFloat> signal_block_padded(fft_length);
 
   for (int32 po = 0; po < output_length; po += block_length) {
     // get a block of the signal

From 93a2295552fd59ccce36c4074c36653bbc2dc332 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Wed, 27 Jul 2016 00:02:35 -0400
Subject: [PATCH 12/14] Changing isotropic noise linkage to a room instead of a
 particular rir; Support using string as room id

---
 .../aspire_prep_rir_noise_list.py             | 28 +++++---
 .../local/multi_condition/run_nnet2_common.sh |  2 +-
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 72 ++++++++++---------
 3 files changed, 57 insertions(+), 45 deletions(-)

diff --git a/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py b/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py
index 9dd7a38d183..c07eed60d10 100755
--- a/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py
+++ b/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py
@@ -23,19 +23,31 @@ def GetArgs():
     return args
 
 
-# This function generate the rir_list file for the aspire real RIR
+# This function generates the rir_list file for the real RIRs being in ASpIRE experiments.
+# It assumes the availability of data/impulses_noises directory prepared by local/multi_condition/prepare_impulses_noises.sh
 def GenerateRirListFile(input_dir, output_dir):
   rir_list_file = open(output_dir + "/rir_list", 'w')
   rir_id = 1
-  room_id = 1
   for db in ["RVB2014", "RWCP", "air"]:
     rir_files = glob.glob(input_dir + "/{0}_*.wav".format(db))
+    rir_files.sort()
     for rir in rir_files:
       filename = rir.split('/')[-1]
       if "noise" not in filename:
-        rir_list_file.write('--rir-id {0} --room-id {1} {2}\n'.format(str(rir_id).zfill(5), str(room_id).zfill(3), rir))
+        parts = filename.split('_')
+        db_name = parts[0]
+        type_num = parts[1]
+        if db == "RVB2014":
+          noise_pattern = parts[3]
+        elif db == "RWCP" and len(parts) == 4:
+          noise_pattern = parts[3]
+        else:
+          noise_pattern = '_'.join(parts[3:len(parts)-1])
+
+        # We use the string as the room id
+        room_id = db_name + "_" + noise_pattern
+        rir_list_file.write('--rir-id {0} --room-id {1} {2}\n'.format(str(rir_id).zfill(5), room_id, rir))
         rir_id += 1
-    room_id += 1
   rir_list_file.close()
 
 
@@ -43,18 +55,16 @@ def GenerateRirListFile(input_dir, output_dir):
 def GenerateNoiseListFile(input_dir, output_dir):
   noise_list_file = open(output_dir + "/noise_list", 'w')
   noise_files = glob.glob(input_dir + "/*_type*_noise*.wav")
+  noise_files.sort()
   noise_id = 1
   for noise_file in noise_files:
     parts = noise_file.split('/')[-1].split('_')
     db_name = parts[0]
     type_num = parts[1]
     noise_pattern = '_'.join(parts[3:len(parts)-1])
-    if db_name == "RWCP":
-      type_num = "type*"
-    matched_rir_files = glob.glob(input_dir + "/{0}_{1}_rir_{2}*.wav".format(db_name, type_num, noise_pattern))
     noise_line = "--noise-id {0} --noise-type isotropic ".format(str(noise_id).zfill(5))
-    for rir in matched_rir_files:
-      noise_line += "--rir-linkage {0} ".format(rir)
+    room_id = db_name + "_" + noise_pattern
+    noise_line += "--room-linkage {0} ".format(room_id)
     noise_line += "{0}".format(noise_file)
     noise_list_file.write("{0}\n".format(noise_line))
     noise_id += 1
diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
index 15bb922726c..78942c053f3 100755
--- a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
+++ b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
@@ -6,7 +6,6 @@
 . cmd.sh
 
 stage=1
-snrs="20:10:15:5:0"
 foreground_snrs="20:10:15:5:0"
 background_snrs="20:10:15:5:0"
 num_data_reps=3
@@ -34,6 +33,7 @@ if [ $stage -le 1 ]; then
     data/impulses_noises || exit 1;
     
   # Generate the rir_list and noise_list for the reverberate_data_dir.py to corrupt the data
+  # this script just assumes air rwcp rvb2014 databases
   python local/multi_condition/aspire_prep_rir_noise_list.py data/impulses_noises data/impulses_noises/info
 
   # corrupt the fisher data to generate multi-condition data 
diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 52b07e669b9..8c25a8211ab 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -137,19 +137,33 @@ def WriteDictToFile(dict, file_name):
                 value = list(value)
             value.sort()
             value = ' '.join(str(value))
-        file.write('{0}\t{1}\n'.format(key, value))
+        file.write('{0} {1}\n'.format(key, value))
     file.close()
 
 
-# This function returns only the isotropic noises according to the specified RIR id
+# This function creates the utt2uniq file from the utterance id in utt2spk file
+def CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, prefix):
+    corrupted_utt2uniq = {}
+    # Parse the utt2spk to get the utterance id
+    utt2spk = ParseFileToDict(input_dir + "/utt2spk", value_processor = lambda x: " ".join(x))
+    keys = utt2spk.keys()
+    keys.sort()
+    for i in range(1, num_replicas+1):
+        for utt_id in keys:
+            new_utt_id = GetNewId(utt_id, prefix, i)
+            corrupted_utt2uniq[new_utt_id] = utt_id
+
+    WriteDictToFile(corrupted_utt2uniq, output_dir + "/utt2uniq")
+
+
+# This function returns only the isotropic noises according to the specified room
 # Please refer to ParseNoiseList() for the format of iso_noise_list
-def FilterIsotropicNoiseList(iso_noise_list, rir_id):
+def FilterIsotropicNoiseList(iso_noise_list, room_id):
     filtered_list = []
     for noise in iso_noise_list:
-        for id in noise.rir_linkage:
-            if id == rir_id:
-                filtered_list.append(noise)
-                break
+        if noise.room_linkage == room_id:
+            filtered_list.append(noise)
+            break
 
     return filtered_list
 
@@ -206,7 +220,7 @@ def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to
         # pick the RIR to reverberate the speech
         reverberate_opts += "--impulse-response={0} ".format(speech_rir.rir_file_location)
 
-    rir_iso_noise_list = FilterIsotropicNoiseList(iso_noise_list, speech_rir.rir_id)
+    rir_iso_noise_list = FilterIsotropicNoiseList(iso_noise_list, speech_rir.room_id)
     # Add the corresponding isotropic noise associated with the selected RIR
     if len(rir_iso_noise_list) > 0 and random.random() < isotropic_noise_addition_probability:
         isotropic_noise = PickItemWithProbability(rir_iso_noise_list)
@@ -267,9 +281,9 @@ def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kal
     foreground_snrs = list_cyclic_iterator(foreground_snr_array)
     background_snrs = list_cyclic_iterator(background_snr_array)
     corrupted_wav_scp = {}
+    keys = wav_scp.keys()
+    keys.sort()
     for i in range(1, num_replicas+1):
-        keys = wav_scp.keys()
-        keys.sort()
         for recording_id in keys:
             wav_original_pipe = wav_scp[recording_id]
             # check if it is really a pipe
@@ -350,6 +364,13 @@ def CreateReverberatedCopy(input_dir,
     data_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
                     .format(output_dir = output_dir))
 
+    if os.path.isfile(input_dir + "/utt2uniq"):
+        AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", num_replicas, prefix, field =[0])
+    else:
+        # Create the utt2uniq file
+        CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, prefix)
+
+
     if os.path.isfile(input_dir + "/text"):
         AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, prefix, field =[0])
     if os.path.isfile(input_dir + "/segments"):
@@ -426,27 +447,17 @@ def MakeRoomDict(rir_list):
     return room_dict
 
 
-# This function check if the RIR IO string is listed in the input rir_list file
-# It returns the RIR id if the io string is found
-def ValidateRirIO(rir_io_str, rir_list):
-    for rir in rir_list:
-        if rir_io_str == rir.rir_file_location:
-            return rir.rir_id
-
-    return "Not found"
-
-
 # This function creates the point-source noise list 
 # and the isotropic noise list from the noise information file
 # Each noise item in the list contains the following attributes:
-# noise_id, noise_type, bg_fg_type, rir_linkage, probability, noise_file_location
+# noise_id, noise_type, bg_fg_type, room_linkage, probability, noise_file_location
 # Please refer to the help messages in the parser for the meaning of these attributes
-def ParseNoiseList(noise_list_file, rir_list):
+def ParseNoiseList(noise_list_file):
     noise_parser = argparse.ArgumentParser()
     noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id')
     noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"])
     noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise', choices = ["background", "foreground"])
-    noise_parser.add_argument('--rir-linkage', type=str, action='append', default=None, help='required if isotropic, should not be specified if point-source, this option can be repeatly added to define multiple noise-rir association, the rir linkage can either be a RIR id or a RIR file path')
+    noise_parser.add_argument('--room-linkage', type=str, default=None, help='required if isotropic, should not be specified if point-source.')
     noise_parser.add_argument('--probability', type=float, default=None, help='probability of the noise.')
     noise_parser.add_argument('noise_file_location', type=str, help='noise file location')
 
@@ -456,18 +467,9 @@ def ParseNoiseList(noise_list_file, rir_list):
     for line in noise_lines:
         noise = noise_parser.parse_args(line.split())
         if noise.noise_type == "isotropic":
-            if noise.rir_linkage is None:
-                raise Exception("--rir-linkage must be specified if --noise-type is isotropic")
+            if noise.room_linkage is None:
+                raise Exception("--room-linkage must be specified if --noise-type is isotropic")
             else:
-                for r in range(0, len(noise.rir_linkage)):
-                    if not noise.rir_linkage[r].isdigit():
-                       # this is a RIR IO string, validate if it exist in the input rir_list and return the RIR id
-                       result = ValidateRirIO(noise.rir_linkage[r], rir_list)
-                       if result == "Not found":
-                           raise Exception("RIR {0} specified by isotropic noise {1} not found".format(noise.rir_linkage[r], noise.noise_id))
-                       else:
-                           noise.rir_linkage[r] = result
-                       
                 iso_noise_list.append(noise)
         else:
             pointsource_noise_list.append(noise)
@@ -482,7 +484,7 @@ def Main():
     rir_list = ParseRirList(args.rir_list_file)
     noise_list = []
     if args.noise_list_file is not None:
-        pointsource_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file, rir_list)
+        pointsource_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file)
         print("Number of point-source noises is {0}".format(len(pointsource_noise_list)))
         print("Number of isotropic noises is {0}".format(len(iso_noise_list)))
     room_dict = MakeRoomDict(rir_list)

From cbe576282f472c8337aeb4c56ef2601fb0d8a25f Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Wed, 27 Jul 2016 10:47:14 -0400
Subject: [PATCH 13/14] Change comments in wav-reverberate.cc

---
 src/featbin/wav-reverberate.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/featbin/wav-reverberate.cc b/src/featbin/wav-reverberate.cc
index 683b8be6177..c19bc21cd84 100644
--- a/src/featbin/wav-reverberate.cc
+++ b/src/featbin/wav-reverberate.cc
@@ -197,8 +197,9 @@ int main(int argc, char *argv[]) {
                 "If nonzero, it specified the duration (secs) of the output "
                 "signal. If the duration t is less than the length of the "
                 "input signal, the first t secs of the signal is trimed, "
-                "otherwise, the signal will be repeated to"
-                "fulfill the duration specified.");
+                "otherwise, the signal will be repeated to "
+                "fulfill the duration specified. This option is useful for "
+                "extending the length of isotropic noises.");
     po.Register("volume", &volume,
                 "If nonzero, a scaling factor on the signal that is applied "
                 "after reverberating and possibly adding noise. "

From d34f5971e729919a6b406b41d977a89d31a0b087 Mon Sep 17 00:00:00 2001
From: Tom Ko <tomkocse@gmail.com>
Date: Sun, 17 Apr 2016 12:27:15 -0400
Subject: [PATCH 14/14] A new steps/data/reverberate_data_dir.py script

update function names; split snrs to background and foreground; user specified random seed; always handle isotropic noise as background noise

Pick the RIRs and noises according to assigned probabilities.

Modify wav-reverberate.cc according to the new steps/data/reverberate_data_dir.py

Change the functions in signal.cc to extend the length of the convolved signal, the correct length should be original signal length + rir length - 1; add the shift option to wav-reverberate.cc

Adding more comments and remove duplicate function in reverberate_data_dir.py

Change option --max-noises-added to --max-noises-per-minute

Adding data_lib.py; adding more comments, splitting large function in reverberate_data_dir.py

adding AddPointSourceNoise()

Fixing spelling mistake and modifying comments

Modify the aspire recipe to use the new reverberate_data_dir.py; fixing bugs in reverberate_data_dir.py; add aspire_prep_rir_noise_list.py for generating rir_list and noise_list for aspire

Changing isotropic noise linkage to a room instead of a particular rir; Support using string as room id

Change comments in wav-reverberate.cc
---
 .../aspire_prep_rir_noise_list.py             |  89 +++
 .../local/multi_condition/run_nnet2_common.sh |  31 +-
 .../steps/data/data_dir_manipulation_lib.py   |  18 +
 egs/wsj/s5/steps/data/reverberate_data_dir.py | 508 ++++++++++++++++++
 src/feat/signal.cc                            |  28 +-
 src/feat/signal.h                             |   7 +
 src/featbin/wav-reverberate.cc                | 260 ++++++---
 7 files changed, 843 insertions(+), 98 deletions(-)
 create mode 100755 egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py
 create mode 100644 egs/wsj/s5/steps/data/data_dir_manipulation_lib.py
 create mode 100755 egs/wsj/s5/steps/data/reverberate_data_dir.py

diff --git a/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py b/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py
new file mode 100755
index 00000000000..c07eed60d10
--- /dev/null
+++ b/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# Copyright 2016  Tom Ko
+# Apache 2.0
+# script to generate rir_list and noise_list in aspire
+
+# we're using python 3.x style print but want it to work in python 2.x,
+from __future__ import print_function
+import argparse, glob, math, os, sys
+
+
+def GetArgs():
+    parser = argparse.ArgumentParser(description="Prepare rir_list and noise_list for Aspire  "
+                                                 "Usage: reverberate_data_dir.py [options...] <in-data-dir> <out-data-dir> "
+                                                 "E.g. reverberate_data_dir.py "
+                                                 "data/impulses_noises data/impulses_noises/info",
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument("input_dir", help="Input data directory")
+    parser.add_argument("output_dir", help="Output data directory")
+    print(' '.join(sys.argv))
+    args = parser.parse_args()
+
+    return args
+
+
+# This function generates the rir_list file for the real RIRs being in ASpIRE experiments.
+# It assumes the availability of data/impulses_noises directory prepared by local/multi_condition/prepare_impulses_noises.sh
+def GenerateRirListFile(input_dir, output_dir):
+  rir_list_file = open(output_dir + "/rir_list", 'w')
+  rir_id = 1
+  for db in ["RVB2014", "RWCP", "air"]:
+    rir_files = glob.glob(input_dir + "/{0}_*.wav".format(db))
+    rir_files.sort()
+    for rir in rir_files:
+      filename = rir.split('/')[-1]
+      if "noise" not in filename:
+        parts = filename.split('_')
+        db_name = parts[0]
+        type_num = parts[1]
+        if db == "RVB2014":
+          noise_pattern = parts[3]
+        elif db == "RWCP" and len(parts) == 4:
+          noise_pattern = parts[3]
+        else:
+          noise_pattern = '_'.join(parts[3:len(parts)-1])
+
+        # We use the string as the room id
+        room_id = db_name + "_" + noise_pattern
+        rir_list_file.write('--rir-id {0} --room-id {1} {2}\n'.format(str(rir_id).zfill(5), room_id, rir))
+        rir_id += 1
+  rir_list_file.close()
+
+
+# This function generate the noise_list file from the aspire noise-rir pair 
+def GenerateNoiseListFile(input_dir, output_dir):
+  noise_list_file = open(output_dir + "/noise_list", 'w')
+  noise_files = glob.glob(input_dir + "/*_type*_noise*.wav")
+  noise_files.sort()
+  noise_id = 1
+  for noise_file in noise_files:
+    parts = noise_file.split('/')[-1].split('_')
+    db_name = parts[0]
+    type_num = parts[1]
+    noise_pattern = '_'.join(parts[3:len(parts)-1])
+    noise_line = "--noise-id {0} --noise-type isotropic ".format(str(noise_id).zfill(5))
+    room_id = db_name + "_" + noise_pattern
+    noise_line += "--room-linkage {0} ".format(room_id)
+    noise_line += "{0}".format(noise_file)
+    noise_list_file.write("{0}\n".format(noise_line))
+    noise_id += 1
+  noise_list_file.close()
+
+
+def Main():
+  args = GetArgs()
+
+  if not os.path.exists(args.output_dir):
+    os.makedirs(args.output_dir)
+
+  # generating the rir_list file for the new steps/data/reverberate_data_dir.py
+  GenerateRirListFile(args.input_dir, args.output_dir)
+
+  # generating the noise_list file for the new steps/data/reverberate_data_dir.py
+  GenerateNoiseListFile(args.input_dir, args.output_dir)
+
+
+if __name__ == "__main__":
+    Main()
+
diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
index 5b6424a1d86..78942c053f3 100755
--- a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
+++ b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
@@ -6,7 +6,8 @@
 . cmd.sh
 
 stage=1
-snrs="20:10:15:5:0"
+foreground_snrs="20:10:15:5:0"
+background_snrs="20:10:15:5:0"
 num_data_reps=3
 ali_dir=exp/
 db_string="'air' 'rwcp' 'rvb2014'" # RIR dbs to be used in the experiment
@@ -31,24 +32,30 @@ if [ $stage -le 1 ]; then
     --RIR-home $RIR_home \
     data/impulses_noises || exit 1;
     
+  # Generate the rir_list and noise_list for the reverberate_data_dir.py to corrupt the data
+  # this script just assumes air rwcp rvb2014 databases
+  python local/multi_condition/aspire_prep_rir_noise_list.py data/impulses_noises data/impulses_noises/info
+
   # corrupt the fisher data to generate multi-condition data 
-  # for data_dir in train dev test; do
   for data_dir in train dev test; do
     if [ "$data_dir" == "train" ]; then
       num_reps=$num_data_reps
     else
       num_reps=1
     fi
-    reverb_data_dirs=
-    for i in `seq 1 $num_reps`; do
-      cur_dest_dir=" data/temp_${data_dir}_${i}" 
-      local/multi_condition/reverberate_data_dir.sh --random-seed $i \
-        --snrs "$snrs" --log-dir exp/make_corrupted_wav \
-        data/${data_dir}  data/impulses_noises $cur_dest_dir
-      reverb_data_dirs+=" $cur_dest_dir" 
-    done
-    utils/combine_data.sh --extra-files utt2uniq data/${data_dir}_rvb $reverb_data_dirs
-    rm -rf $reverb_data_dirs
+    python steps/data/reverberate_data_dir.py \
+      --prefix "rev" \
+      --rir-list-file data/impulses_noises/info/rir_list \
+      --noise-list-file data/impulses_noises/info/noise_list \
+      --foreground-snrs $foreground_snrs \
+      --background-snrs $background_snrs \
+      --speech-rvb-probability 1 \
+      --pointsource-noise-addition-probability 1 \
+      --isotropic-noise-addition-probability 1 \
+      --num-replications $num_reps \
+      --max-noises-per-minute 1 \
+      --random-seed 1 \
+      data/${data_dir} data/${data_dir}_rvb
   done
 
   # create the dev, test and eval sets from the aspire recipe
diff --git a/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py b/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py
new file mode 100644
index 00000000000..1f7253d4891
--- /dev/null
+++ b/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py
@@ -0,0 +1,18 @@
+import subprocess
+
+def RunKaldiCommand(command, wait = True):
+    """ Runs commands frequently seen in Kaldi scripts. These are usually a
+        sequence of commands connected by pipes, so we use shell=True """
+    #logger.info("Running the command\n{0}".format(command))
+    p = subprocess.Popen(command, shell = True,
+                         stdout = subprocess.PIPE,
+                         stderr = subprocess.PIPE)
+
+    if wait:
+        [stdout, stderr] = p.communicate()
+        if p.returncode is not 0:
+            raise Exception("There was an error while running the command {0}\n".format(command)+"-"*10+"\n"+stderr)
+        return stdout, stderr
+    else:
+        return p
+
diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
new file mode 100755
index 00000000000..8c25a8211ab
--- /dev/null
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -0,0 +1,508 @@
+#!/usr/bin/env python
+# Copyright 2016  Tom Ko
+# Apache 2.0
+# script to generate reverberated data
+
+# we're using python 3.x style print but want it to work in python 2.x,
+from __future__ import print_function
+import argparse, glob, math, os, random, sys, warnings, copy, imp, ast
+
+data_lib = imp.load_source('dml', 'steps/data/data_dir_manipulation_lib.py')
+
+def GetArgs():
+    # we add required arguments as named arguments for readability
+    parser = argparse.ArgumentParser(description="Reverberate the data directory with an option "
+                                                 "to add isotropic and point source noises. "
+                                                 "Usage: reverberate_data_dir.py [options...] <in-data-dir> <out-data-dir> "
+                                                 "E.g. reverberate_data_dir.py --rir-list-file rir_list "
+                                                 "--foreground-snrs 20:10:15:5:0 --background-snrs 20:10:15:5:0 "
+                                                 "--noise-list-file noise_list --speech-rvb-probability 1 --num-replications 2 "
+                                                 "--random-seed 1 data/train data/train_rvb",
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument("--rir-list-file", type=str, required = True, 
+                        help="RIR information file, the format of the file is "
+                        "--rir-id <string,required> --room-id <string,required> "
+                        "--receiver-position-id <string,optional> --source-position-id <string,optional> "
+                        "--rt-60 <float,optional> --drr <float, optional> <location(support Kaldi IO strings)> "
+                        "E.g. --rir-id 00001 --room-id 001 --receiver-position-id 001 --source-position-id 00001 "
+                        "--rt60 0.58 --drr -4.885 data/impulses/Room001-00001.wav")
+    parser.add_argument("--noise-list-file", type=str, default = None,
+                        help="Noise information file, the format of the file is"
+                        "--noise-id <string,required> --noise-type <choices = {isotropic, point source},required> "
+                        "--bg-fg-type <choices = {background, foreground}, default=background> "
+                        "--rir-file <str, specifies the rir file associated with the noise file. Required if isotropic "
+                        "as the rir file links this noise file to a specific position in the room> "
+                        "<location=(support Kaldi IO strings)> "
+                        "E.g. --noise-id 001 --noise-type isotropic --rir-id 00019 iso_noise.wav")
+    parser.add_argument("--num-replications", type=int, dest = "num_replicas", default = 1,
+                        help="Number of replicate to generated for the data")
+    parser.add_argument('--foreground-snrs', type=str, dest = "foreground_snr_string", default = '20:10:0', help='When foreground noises are being added the script will iterate through these SNRs.')
+    parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", default = '20:10:0', help='When background noises are being added the script will iterate through these SNRs.')
+    parser.add_argument('--prefix', type=str, default = None, help='This prefix will modified for each reverberated copy, by adding additional affixes.')
+    parser.add_argument("--speech-rvb-probability", type=float, default = 1.0,
+                        help="Probability of reverberating a speech signal, e.g. 0 <= p <= 1")
+    parser.add_argument("--pointsource-noise-addition-probability", type=float, default = 1.0,
+                        help="Probability of adding point-source noises, e.g. 0 <= p <= 1")
+    parser.add_argument("--isotropic-noise-addition-probability", type=float, default = 1.0,
+                        help="Probability of adding isotropic noises, e.g. 0 <= p <= 1")
+    parser.add_argument("--max-noises-per-minute", type=int, default = 2,
+                        help="This controls the maximum number of point-source noises that could be added to a recording according to its duration")
+    parser.add_argument('--random-seed', type=int, default=0, help='seed to be used in the randomization of impulses and noises')
+    parser.add_argument("input_dir",
+                        help="Input data directory")
+    parser.add_argument("output_dir",
+                        help="Output data directory")
+
+    print(' '.join(sys.argv))
+
+    args = parser.parse_args()
+    args = CheckArgs(args)
+
+    return args
+
+def CheckArgs(args):
+    if not os.path.exists(args.output_dir):
+        os.makedirs(args.output_dir)
+
+    ## Check arguments.
+    if not os.path.isfile(args.rir_list_file):
+        raise Exception(args.rir_list_file + " not found")
+    
+    if args.noise_list_file is not None:
+        if not os.path.isfile(args.noise_list_file):
+            raise Exception(args.noise_list_file + " not found")
+
+    if args.num_replicas > 1 and args.prefix is None:
+        args.prefix = "rvb"
+        warnings.warn("--prefix is set to 'rvb' as --num-replications is larger than 1.")
+
+    return args
+
+
+class list_cyclic_iterator:
+  def __init__(self, list):
+    self.list_index = 0
+    self.list = list
+    random.shuffle(self.list)
+
+  def next(self):
+    item = self.list[self.list_index]
+    self.list_index = (self.list_index + 1) % len(self.list)
+    return item
+
+
+# This functions picks an item from the collection according to the associated probability distribution.
+# The probability estimate of each item in the collection is stored in the "probability" field of 
+# the particular item. x : a collection (list or dictionary) where the values contain a field called probability
+def PickItemWithProbability(x):
+   if isinstance(x, dict):
+     plist = list(set(x.values()))
+   else:
+     plist = x
+   total_p = sum(item.probability for item in plist)
+   p = random.uniform(0, total_p)
+   accumulate_p = 0
+   for item in plist:
+      if accumulate_p + item.probability >= p:
+         return item
+      accumulate_p += item.probability
+   assert False, "Shouldn't get here as the accumulated probability should always equal to 1"
+
+
+# This function parses a file and pack the data into a dictionary
+# It is useful for parsing file like wav.scp, utt2spk, text...etc
+def ParseFileToDict(file, assert2fields = False, value_processor = None):
+    if value_processor is None:
+        value_processor = lambda x: x[0]
+
+    dict = {}
+    for line in open(file, 'r'):
+        parts = line.split()
+        if assert2fields:
+            assert(len(parts) == 2)
+
+        dict[parts[0]] = value_processor(parts[1:])
+    return dict
+
+# This function creates a file and write the content of a dictionary into it
+def WriteDictToFile(dict, file_name):
+    file = open(file_name, 'w')
+    keys = dict.keys()
+    keys.sort()
+    for key in keys:
+        value = dict[key]
+        if type(value) in [list, tuple] :
+            if type(value) is tuple:
+                value = list(value)
+            value.sort()
+            value = ' '.join(str(value))
+        file.write('{0} {1}\n'.format(key, value))
+    file.close()
+
+
+# This function creates the utt2uniq file from the utterance id in utt2spk file
+def CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, prefix):
+    corrupted_utt2uniq = {}
+    # Parse the utt2spk to get the utterance id
+    utt2spk = ParseFileToDict(input_dir + "/utt2spk", value_processor = lambda x: " ".join(x))
+    keys = utt2spk.keys()
+    keys.sort()
+    for i in range(1, num_replicas+1):
+        for utt_id in keys:
+            new_utt_id = GetNewId(utt_id, prefix, i)
+            corrupted_utt2uniq[new_utt_id] = utt_id
+
+    WriteDictToFile(corrupted_utt2uniq, output_dir + "/utt2uniq")
+
+
+# This function returns only the isotropic noises according to the specified room
+# Please refer to ParseNoiseList() for the format of iso_noise_list
+def FilterIsotropicNoiseList(iso_noise_list, room_id):
+    filtered_list = []
+    for noise in iso_noise_list:
+        if noise.room_linkage == room_id:
+            filtered_list.append(noise)
+            break
+
+    return filtered_list
+
+
+def AddPointSourceNoise(noise_addition_descriptor,  # descriptor to store the information of the noise added
+                        room,  # the room selected
+                        pointsource_noise_list, # the point source noise list
+                        pointsource_noise_addition_probability, # Probability of adding point-source noises
+                        foreground_snrs, # the SNR for adding the foreground noises
+                        background_snrs, # the SNR for adding the background noises
+                        speech_dur,  # duration of the recording
+                        max_noises_recording  # Maximum number of point-source noises that can be added
+                        ):
+    if len(pointsource_noise_list) > 0 and random.random() < pointsource_noise_addition_probability and max_noises_recording > 1:
+        for k in range(random.randint(1, max_noises_recording)):
+            # pick the RIR to reverberate the point-source noise
+            noise = PickItemWithProbability(pointsource_noise_list)
+            noise_rir = PickItemWithProbability(room.rir_list)
+            # If it is a background noise, the noise will be extended and be added to the whole speech
+            # if it is a foreground noise, the noise will not extended and be added at a random time of the speech
+            if noise.bg_fg_type == "background":
+                noise_addition_descriptor['noise_io'].append("wav-reverberate --duration={2} --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location, speech_dur))
+                noise_addition_descriptor['start_times'].append(0)
+                noise_addition_descriptor['snrs'].append(background_snrs.next())
+            else:
+                noise_addition_descriptor['noise_io'].append("wav-reverberate --impulse-response={1} {0} - |".format(noise.noise_file_location, noise_rir.rir_file_location))
+                noise_addition_descriptor['start_times'].append(round(random.random() * speech_dur, 2))
+                noise_addition_descriptor['snrs'].append(foreground_snrs.next())
+
+    return noise_addition_descriptor
+
+
+def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
+                            pointsource_noise_list, # the point source noise list
+                            iso_noise_list, # the isotropic noise list
+                            foreground_snrs, # the SNR for adding the foreground noises
+                            background_snrs, # the SNR for adding the background noises
+                            speech_rvb_probability, # Probability of reverberating a speech signal
+                            isotropic_noise_addition_probability, # Probability of adding isotropic noises
+                            pointsource_noise_addition_probability, # Probability of adding point-source noises
+                            speech_dur,  # duration of the recording
+                            max_noises_recording  # Maximum number of point-source noises that can be added
+                            ):
+    reverberate_opts = ""
+    noise_addition_descriptor = {'noise_io': [],
+                                 'start_times': [],
+                                 'snrs': []}
+    # Randomly select the room
+    # Here the room probability is a sum of the probabilities of the RIRs recorded in the room.
+    room = PickItemWithProbability(room_dict)
+    # Randomly select the RIR in the room
+    speech_rir = PickItemWithProbability(room.rir_list)
+    if random.random() < speech_rvb_probability:
+        # pick the RIR to reverberate the speech
+        reverberate_opts += "--impulse-response={0} ".format(speech_rir.rir_file_location)
+
+    rir_iso_noise_list = FilterIsotropicNoiseList(iso_noise_list, speech_rir.room_id)
+    # Add the corresponding isotropic noise associated with the selected RIR
+    if len(rir_iso_noise_list) > 0 and random.random() < isotropic_noise_addition_probability:
+        isotropic_noise = PickItemWithProbability(rir_iso_noise_list)
+        # extend the isotropic noise to the length of the speech waveform
+        noise_addition_descriptor['noise_io'].append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_file_location, speech_dur))
+        noise_addition_descriptor['start_times'].append(0)
+        noise_addition_descriptor['snrs'].append(background_snrs.next())
+
+    noise_addition_descriptor = AddPointSourceNoise(noise_addition_descriptor,  # descriptor to store the information of the noise added
+                                                    room,  # the room selected
+                                                    pointsource_noise_list, # the point source noise list
+                                                    pointsource_noise_addition_probability, # Probability of adding point-source noises
+                                                    foreground_snrs, # the SNR for adding the foreground noises
+                                                    background_snrs, # the SNR for adding the background noises
+                                                    speech_dur,  # duration of the recording
+                                                    max_noises_recording  # Maximum number of point-source noises that can be added
+                                                    )
+
+    assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['start_times'])
+    assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['snrs'])
+    if len(noise_addition_descriptor['noise_io']) > 0:
+        reverberate_opts += "--additive-signals='{0}' ".format(','.join(noise_addition_descriptor['noise_io']))
+        reverberate_opts += "--start-times='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['start_times'])))
+        reverberate_opts += "--snrs='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['snrs'])))
+
+    return reverberate_opts
+
+# This function generates a new id from the input id
+# This is needed when we have to create multiple copies of the original data
+def GetNewId(id, prefix=None, copy=0):
+    if prefix is not None:
+        new_id = prefix + str(copy) + "_" + id
+    else:
+        new_id = id
+
+    return new_id
+    
+
+# This is the main function to generate pipeline command for the corruption
+# The generic command of wav-reverberate will be like:
+# wav-reverberate --duration=t --impulse-response=rir.wav 
+# --additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' --start-times='s1,s2' input.wav output.wav
+def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kaldi-IO strings of the speech recordings
+               durations, # a dictionary whose values are the duration (in sec) of the speech recordings
+               output_dir, # output directory to write the corrupted wav.scp 
+               room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
+               pointsource_noise_list, # the point source noise list
+               iso_noise_list, # the isotropic noise list
+               foreground_snr_array, # the SNR for adding the foreground noises
+               background_snr_array, # the SNR for adding the background noises
+               num_replicas, # Number of replicate to generated for the data
+               prefix, # prefix for the id of the corrupted utterances
+               speech_rvb_probability, # Probability of reverberating a speech signal
+               isotropic_noise_addition_probability, # Probability of adding isotropic noises
+               pointsource_noise_addition_probability, # Probability of adding point-source noises
+               max_noises_per_minute # maximum number of point-source noises that can be added to a recording according to its duration
+               ):
+    foreground_snrs = list_cyclic_iterator(foreground_snr_array)
+    background_snrs = list_cyclic_iterator(background_snr_array)
+    corrupted_wav_scp = {}
+    keys = wav_scp.keys()
+    keys.sort()
+    for i in range(1, num_replicas+1):
+        for recording_id in keys:
+            wav_original_pipe = wav_scp[recording_id]
+            # check if it is really a pipe
+            if len(wav_original_pipe.split()) == 1:
+                wav_original_pipe = "cat {0} |".format(wav_original_pipe)
+            speech_dur = durations[recording_id]
+            max_noises_recording = math.floor(max_noises_per_minute * speech_dur / 60)
+
+            reverberate_opts = GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
+                                                         pointsource_noise_list, # the point source noise list
+                                                         iso_noise_list, # the isotropic noise list
+                                                         foreground_snrs, # the SNR for adding the foreground noises
+                                                         background_snrs, # the SNR for adding the background noises
+                                                         speech_rvb_probability, # Probability of reverberating a speech signal
+                                                         isotropic_noise_addition_probability, # Probability of adding isotropic noises
+                                                         pointsource_noise_addition_probability, # Probability of adding point-source noises
+                                                         speech_dur,  # duration of the recording
+                                                         max_noises_recording  # Maximum number of point-source noises that can be added
+                                                         )       
+            
+            if reverberate_opts == "":
+                wav_corrupted_pipe = "{0}".format(wav_original_pipe) 
+            else:
+                wav_corrupted_pipe = "{0} wav-reverberate {1} - - |".format(wav_original_pipe, reverberate_opts)
+
+            new_recording_id = GetNewId(recording_id, prefix, i)
+            corrupted_wav_scp[new_recording_id] = wav_corrupted_pipe
+
+    WriteDictToFile(corrupted_wav_scp, output_dir + "/wav.scp")
+
+
+# This function replicate the entries in files like segments, utt2spk, text
+def AddPrefixToFields(input_file, output_file, num_replicas, prefix, field = [0]):
+    list = map(lambda x: x.strip(), open(input_file))
+    f = open(output_file, "w")
+    for i in range(1, num_replicas+1):
+        for line in list:
+            if len(line) > 0 and line[0] != ';':
+                split1 = line.split()
+                for j in field:
+                    split1[j] = GetNewId(split1[j], prefix, i)
+                print(" ".join(split1), file=f)
+            else:
+                print(line, file=f)
+    f.close()
+
+
+# This function creates multiple copies of the necessary files, e.g. utt2spk, wav.scp ...
+def CreateReverberatedCopy(input_dir,
+                           output_dir,
+                           room_dict,  # the room dictionary, please refer to MakeRoomDict() for the format
+                           pointsource_noise_list, # the point source noise list
+                           iso_noise_list, # the isotropic noise list
+                           foreground_snr_string, # the SNR for adding the foreground noises
+                           background_snr_string, # the SNR for adding the background noises
+                           num_replicas, # Number of replicate to generated for the data
+                           prefix, # prefix for the id of the corrupted utterances
+                           speech_rvb_probability, # Probability of reverberating a speech signal
+                           isotropic_noise_addition_probability, # Probability of adding isotropic noises
+                           pointsource_noise_addition_probability, # Probability of adding point-source noises
+                           max_noises_per_minute  # maximum number of point-source noises that can be added to a recording according to its duration
+                           ):
+    
+    if not os.path.isfile(input_dir + "/reco2dur"):
+        print("Getting the duration of the recordings...");
+        data_lib.RunKaldiCommand("wav-to-duration --read-entire-file=true scp:{0}/wav.scp ark,t:{0}/reco2dur".format(input_dir))
+    durations = ParseFileToDict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0]))
+    wav_scp = ParseFileToDict(input_dir + "/wav.scp", value_processor = lambda x: " ".join(x))
+    foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':'))
+    background_snr_array = map(lambda x: float(x), background_snr_string.split(':'))
+
+    GenerateReverberatedWavScp(wav_scp, durations, output_dir, room_dict, pointsource_noise_list, iso_noise_list, 
+               foreground_snr_array, background_snr_array, num_replicas, prefix, 
+               speech_rvb_probability, isotropic_noise_addition_probability, 
+               pointsource_noise_addition_probability, max_noises_per_minute)
+
+    AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replicas, prefix, field = [0,1])
+    data_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
+                    .format(output_dir = output_dir))
+
+    if os.path.isfile(input_dir + "/utt2uniq"):
+        AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", num_replicas, prefix, field =[0])
+    else:
+        # Create the utt2uniq file
+        CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, prefix)
+
+
+    if os.path.isfile(input_dir + "/text"):
+        AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, prefix, field =[0])
+    if os.path.isfile(input_dir + "/segments"):
+        AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", num_replicas, prefix, field = [0,1])
+    if os.path.isfile(input_dir + "/reco2file_and_channel"):
+        AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replicas, prefix, field = [0,1])
+
+    data_lib.RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}"
+                    .format(output_dir = output_dir))
+
+
+# This function smooths the probability distribution in the list
+def SmoothProbabilityDistribution(list, smoothing_weight=0.3):
+    if len(list) > 0:
+      uniform_probability = 1 / float(len(list))
+      for item in list:
+          if item.probability is None:
+              item.probability = uniform_probability
+          else:
+              # smooth the probability
+              item.probability = (1 - smoothing_weight) * item.probability + smoothing_weight * uniform_probability
+
+      # Normalize the probability
+      sum_p = sum(item.probability for item in list)
+      for item in list:
+          item.probability = item.probability / sum_p
+
+    return list
+
+# This function creates the RIR list 
+# Each noise item in the list contains the following attributes:
+# rir_id, room_id, receiver_position_id, source_position_id, rt60, drr, probability
+# Please refer to the help messages in the parser for the meaning of these attributes
+def ParseRirList(rir_list_file):
+    rir_parser = argparse.ArgumentParser()
+    rir_parser.add_argument('--rir-id', type=str, required=True, help='This id is unique for each RIR and the noise may associate with a particular RIR by refering to this id')
+    rir_parser.add_argument('--room-id', type=str, required=True, help='This is the room that where the RIR is generated')
+    rir_parser.add_argument('--receiver-position-id', type=str, default=None, help='receiver position id')
+    rir_parser.add_argument('--source-position-id', type=str, default=None, help='source position id')
+    rir_parser.add_argument('--rt60', type=float, default=None, help='RT60 is the time required for reflections of a direct sound to decay 60 dB.')
+    rir_parser.add_argument('--drr', type=float, default=None, help='Direct-to-reverberant-ratio of the impulse.')
+    rir_parser.add_argument('--probability', type=float, default=None, help='probability of the impulse.')
+    rir_parser.add_argument('rir_file_location', type=str, help='rir file location')
+
+    rir_list = []
+    rir_lines = map(lambda x: x.strip(), open(rir_list_file))
+    for line in rir_lines:
+        rir = rir_parser.parse_args(line.split())
+        setattr(rir, "iso_noise_list", [])
+        rir_list.append(rir)
+
+    return SmoothProbabilityDistribution(rir_list)
+
+
+# This function divides the global RIR list into local lists
+# according to the room where the RIRs are generated
+# It returns the room dictionary indexed by the room id
+# Its values are objects with two attributes: a local RIR list
+# and the probability of the corresponding room
+def MakeRoomDict(rir_list):
+    room_dict = {}
+    for rir in rir_list:
+        if rir.room_id not in room_dict:
+            # add new room
+            room_dict[rir.room_id] = lambda: None
+            setattr(room_dict[rir.room_id], "rir_list", [])
+            setattr(room_dict[rir.room_id], "probability", 0)
+        room_dict[rir.room_id].rir_list.append(rir)
+
+    # the probability of the room is the sum of probabilities of its RIR
+    for key in room_dict.keys():
+        room_dict[key].probability = sum(rir.probability for rir in room_dict[key].rir_list)
+
+    return room_dict
+
+
+# This function creates the point-source noise list 
+# and the isotropic noise list from the noise information file
+# Each noise item in the list contains the following attributes:
+# noise_id, noise_type, bg_fg_type, room_linkage, probability, noise_file_location
+# Please refer to the help messages in the parser for the meaning of these attributes
+def ParseNoiseList(noise_list_file):
+    noise_parser = argparse.ArgumentParser()
+    noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id')
+    noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"])
+    noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise', choices = ["background", "foreground"])
+    noise_parser.add_argument('--room-linkage', type=str, default=None, help='required if isotropic, should not be specified if point-source.')
+    noise_parser.add_argument('--probability', type=float, default=None, help='probability of the noise.')
+    noise_parser.add_argument('noise_file_location', type=str, help='noise file location')
+
+    pointsource_noise_list = []
+    iso_noise_list = []
+    noise_lines = map(lambda x: x.strip(), open(noise_list_file))
+    for line in noise_lines:
+        noise = noise_parser.parse_args(line.split())
+        if noise.noise_type == "isotropic":
+            if noise.room_linkage is None:
+                raise Exception("--room-linkage must be specified if --noise-type is isotropic")
+            else:
+                iso_noise_list.append(noise)
+        else:
+            pointsource_noise_list.append(noise)
+
+    return (SmoothProbabilityDistribution(pointsource_noise_list),
+            SmoothProbabilityDistribution(iso_noise_list))
+
+
+def Main():
+    args = GetArgs()
+    random.seed(args.random_seed)
+    rir_list = ParseRirList(args.rir_list_file)
+    noise_list = []
+    if args.noise_list_file is not None:
+        pointsource_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file)
+        print("Number of point-source noises is {0}".format(len(pointsource_noise_list)))
+        print("Number of isotropic noises is {0}".format(len(iso_noise_list)))
+    room_dict = MakeRoomDict(rir_list)
+
+    CreateReverberatedCopy(input_dir = args.input_dir,
+                   output_dir = args.output_dir,
+                   room_dict = room_dict,
+                   pointsource_noise_list = pointsource_noise_list,
+                   iso_noise_list = iso_noise_list,
+                   foreground_snr_string = args.foreground_snr_string,
+                   background_snr_string = args.background_snr_string,
+                   num_replicas = args.num_replicas,
+                   prefix = args.prefix,
+                   speech_rvb_probability = args.speech_rvb_probability,
+                   isotropic_noise_addition_probability = args.isotropic_noise_addition_probability,
+                   pointsource_noise_addition_probability = args.pointsource_noise_addition_probability,
+                   max_noises_per_minute = args.max_noises_per_minute)
+
+if __name__ == "__main__":
+    Main()
+
diff --git a/src/feat/signal.cc b/src/feat/signal.cc
index e8fbb0b84cf..a206d399804 100644
--- a/src/feat/signal.cc
+++ b/src/feat/signal.cc
@@ -34,22 +34,25 @@ void ElementwiseProductOfFft(const Vector<BaseFloat> &a, Vector<BaseFloat> *b) {
 void ConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
   int32 signal_length = signal->Dim();
   int32 filter_length = filter.Dim();
-  Vector<BaseFloat> signal_padded(signal_length + filter_length - 1);
+  int32 output_length = signal_length + filter_length - 1;
+  Vector<BaseFloat> signal_padded(output_length);
   signal_padded.SetZero();
   for (int32 i = 0; i < signal_length; i++) {
     for (int32 j = 0; j < filter_length; j++) {
         signal_padded(i + j) += (*signal)(i) * filter(j);
     }
   }
-  signal->CopyFromVec(signal_padded.Range(0, signal_length));
+  signal->Resize(output_length);
+  signal->CopyFromVec(signal_padded);
 }
 
 
 void FFTbasedConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
   int32 signal_length = signal->Dim();
   int32 filter_length = filter.Dim();
+  int32 output_length = signal_length + filter_length - 1;
 
-  int32 fft_length = RoundUpToNearestPowerOfTwo(signal_length + filter_length - 1);
+  int32 fft_length = RoundUpToNearestPowerOfTwo(output_length);
   KALDI_VLOG(1) << "fft_length for full signal convolution is " << fft_length;
 
   SplitRadixRealFft<BaseFloat> srfft(fft_length);
@@ -67,12 +70,15 @@ void FFTbasedConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat>
   srfft.Compute(signal_padded.Data(), false);
   signal_padded.Scale(1.0 / fft_length);
 
-  signal->CopyFromVec(signal_padded.Range(0, signal_length));
+  signal->Resize(output_length);
+  signal->CopyFromVec(signal_padded.Range(0, output_length));
 }
 
 void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
   int32 signal_length = signal->Dim();
   int32 filter_length = filter.Dim();
+  int32 output_length = signal_length + filter_length - 1;
+  signal->Resize(output_length, kCopyData);
 
   KALDI_VLOG(1) << "Length of the filter is " << filter_length;
 
@@ -91,9 +97,9 @@ void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFl
   temp_pad.SetZero();
   Vector<BaseFloat> signal_block_padded(fft_length);
 
-  for (int32 po = 0; po < signal_length; po += block_length) {
+  for (int32 po = 0; po < output_length; po += block_length) {
     // get a block of the signal
-    int32 process_length = std::min(block_length, signal_length - po);
+    int32 process_length = std::min(block_length, output_length - po);
     signal_block_padded.SetZero();
     signal_block_padded.Range(0, process_length).CopyFromVec(signal->Range(po, process_length));
 
@@ -105,17 +111,17 @@ void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFl
     signal_block_padded.Scale(1.0 / fft_length);
 
     // combine the block
-    if (po + block_length < signal_length) {       // current block is not the last block
+    if (po + block_length < output_length) {       // current block is not the last block
       signal->Range(po, block_length).CopyFromVec(signal_block_padded.Range(0, block_length));
       signal->Range(po, filter_length - 1).AddVec(1.0, temp_pad);
       temp_pad.CopyFromVec(signal_block_padded.Range(block_length, filter_length - 1));
     } else {
-      signal->Range(po, signal_length - po).CopyFromVec(
-                        signal_block_padded.Range(0, signal_length - po));
-      if (filter_length - 1 < signal_length - po)
+      signal->Range(po, output_length - po).CopyFromVec(
+                        signal_block_padded.Range(0, output_length - po));
+      if (filter_length - 1 < output_length - po)
         signal->Range(po, filter_length - 1).AddVec(1.0, temp_pad);
       else
-        signal->Range(po, signal_length - po).AddVec(1.0, temp_pad.Range(0, signal_length - po));
+        signal->Range(po, output_length - po).AddVec(1.0, temp_pad.Range(0, output_length - po));
     }
   }
 }
diff --git a/src/feat/signal.h b/src/feat/signal.h
index 7ff0ce33b52..c6c3eb50530 100644
--- a/src/feat/signal.h
+++ b/src/feat/signal.h
@@ -25,6 +25,13 @@
 
 namespace kaldi {
 
+/* 
+   The following three functions are having the same functionality but
+   different implementations so as the efficiency. After the convolution,
+   the length of the signal will be extended to (original signal length +
+   filter length - 1).
+*/
+
 /*
    This function implements a simple non-FFT-based convolution of two signals.
    It is suggested to use the FFT-based convolution function which is more
diff --git a/src/featbin/wav-reverberate.cc b/src/featbin/wav-reverberate.cc
index d7599c5ea3d..c19bc21cd84 100644
--- a/src/featbin/wav-reverberate.cc
+++ b/src/featbin/wav-reverberate.cc
@@ -28,7 +28,8 @@ namespace kaldi {
    This function is to repeatedly concatenate signal1 by itself 
    to match the length of signal2 and add the two signals together.
 */
-void AddVectorsOfUnequalLength(const Vector<BaseFloat> &signal1, Vector<BaseFloat> *signal2) {
+void AddVectorsOfUnequalLength(const VectorBase<BaseFloat> &signal1,
+                                     Vector<BaseFloat> *signal2) {
   for (int32 po = 0; po < signal2->Dim(); po += signal1.Dim()) {
     int32 block_length = signal1.Dim();
     if (signal2->Dim() - po < block_length) block_length = signal2->Dim() - po;
@@ -36,6 +37,18 @@ void AddVectorsOfUnequalLength(const Vector<BaseFloat> &signal1, Vector<BaseFloa
   }
 }
 
+/*
+   This function is to add signal1 to signal2 starting at the offset of signal2
+   This will not extend the length of signal2.
+*/
+void AddVectorsWithOffset(const Vector<BaseFloat> &signal1, int32 offset,
+                                             Vector<BaseFloat> *signal2) {
+  int32 add_length = std::min(signal2->Dim() - offset, signal1.Dim());
+  if (add_length > 0)
+    signal2->Range(offset, add_length).AddVec(1.0, signal1.Range(0, add_length));
+}
+
+
 BaseFloat MaxAbsolute(const Vector<BaseFloat> &vector) {
   return std::max(std::abs(vector.Max()), std::abs(vector.Min()));
 }
@@ -71,29 +84,46 @@ BaseFloat ComputeEarlyReverbEnergy(const Vector<BaseFloat> &rir, const Vector<Ba
 }
 
 /*
-   This is the core function to do reverberation and noise addition
-   on the given signal. The noise will be scaled before the addition
-   to match the given signal-to-noise ratio (SNR) and it will also concatenate
-   itself repeatedly to match the length of the signal.
+   This is the core function to do reverberation on the given signal.
    The input parameters to this function are the room impulse response,
-   the sampling frequency, the SNR(dB), the noise and the signal respectively.
+   the sampling frequency and the signal respectively.
+   The length of the signal will be extended to (original signal length +
+   rir length - 1) after the reverberation.
 */
-void DoReverberation(const Vector<BaseFloat> &rir, BaseFloat samp_freq,
-                        BaseFloat snr_db, Vector<BaseFloat> *noise,
+float DoReverberation(const Vector<BaseFloat> &rir, BaseFloat samp_freq,
                         Vector<BaseFloat> *signal) {
-  if (noise->Dim()) {
-    float input_power = ComputeEarlyReverbEnergy(rir, *signal, samp_freq);
-    float noise_power = VecVec(*noise, *noise) / noise->Dim();
-    float scale_factor = sqrt(pow(10, -snr_db / 10) * input_power / noise_power);
-    noise->Scale(scale_factor);
-    KALDI_VLOG(1) << "Noise signal is being scaled with " << scale_factor
-                  << " to generate output with SNR " << snr_db << "db\n";
-  }
-
+  float signal_power = ComputeEarlyReverbEnergy(rir, *signal, samp_freq);
   FFTbasedBlockConvolveSignals(rir, signal);
+  return signal_power;
+}
 
-  if (noise->Dim() > 0) {
-    AddVectorsOfUnequalLength(*noise, signal);
+/*
+   The noise will be scaled before the addition
+   to match the given signal-to-noise ratio (SNR).
+*/
+void AddNoise(Vector<BaseFloat> *noise, BaseFloat snr_db,
+                BaseFloat time, BaseFloat samp_freq,
+                BaseFloat signal_power, Vector<BaseFloat> *signal) {
+  float noise_power = VecVec(*noise, *noise) / noise->Dim();
+  float scale_factor = sqrt(pow(10, -snr_db / 10) * signal_power / noise_power);
+  noise->Scale(scale_factor);
+  KALDI_VLOG(1) << "Noise signal is being scaled with " << scale_factor
+                << " to generate output with SNR " << snr_db << "db\n";
+  int32 offset = time * samp_freq;
+  AddVectorsWithOffset(*noise, offset, signal);
+}
+
+/*
+   This function converts comma-spearted string into float vector.
+*/
+void ReadCommaSeparatedCommand(const std::string &s,
+                                std::vector<BaseFloat> *v) {
+  std::vector<std::string> split_string;
+  SplitStringToVector(s, ",", true, &split_string);
+  for (size_t i = 0; i < split_string.size(); i++) {
+    float ret;
+    ConvertStringToReal(split_string[i], &ret);
+    v->push_back(ret);
   }
 }
 }
@@ -107,23 +137,36 @@ int main(int argc, char *argv[]) {
         "room-impulse response (rir_matrix) and additive noise distortions\n"
         "(specified by corresponding files).\n"
         "Usage:  wav-reverberate [options...] <wav-in-rxfilename> "
-        "<rir-rxfilename> <wav-out-wxfilename>\n"
+        "<wav-out-wxfilename>\n"
         "e.g.\n"
-        "wav-reverberate --noise-file=noise.wav \\\n"
-        "  input.wav rir.wav output.wav\n";
+        "wav-reverberate --duration=t --impulse-response=rir.wav "
+        "--additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' "
+        "--start-times='s1,s2' input.wav output.wav\n";
 
     ParseOptions po(usage);
-    std::string noise_file;
-    BaseFloat snr_db = 20;
+    std::string rir_file;
+    std::string additive_signals;
+    std::string snrs;
+    std::string start_times;
     bool multi_channel_output = false;
+    bool shift_output = true;
     int32 input_channel = 0;
     int32 rir_channel = 0;
     int32 noise_channel = 0;
     bool normalize_output = true;
     BaseFloat volume = 0;
+    BaseFloat duration = 0;
 
     po.Register("multi-channel-output", &multi_channel_output,
                 "Specifies if the output should be multi-channel or not");
+    po.Register("shift-output", &shift_output,
+                "If true, the reverberated waveform will be shifted by the "
+                "amount of the peak position of the RIR and the length of "
+                "the output waveform will be equal to the input waveform."
+                "If false, the length of the output waveform will be "
+                "equal to (original input length + rir length - 1). "
+                "This value is true by default and "
+                "it only affects the output when RIR file is provided.");
     po.Register("input-wave-channel", &input_channel,
                 "Specifies the channel to be used from input as only a "
                 "single channel will be used to generate reverberated output");
@@ -133,14 +176,30 @@ int main(int argc, char *argv[]) {
     po.Register("noise-channel", &noise_channel,
                 "Specifies the channel of the noise file, "
                 "it will only be used when multi-channel-output is false");
-    po.Register("noise-file", &noise_file,
-                "File with additive noise");
-    po.Register("snr-db", &snr_db,
-                "Desired SNR(dB) of the output");
+    po.Register("impulse-response", &rir_file,
+                "File with the impulse response for reverberating the input wave");
+    po.Register("additive-signals", &additive_signals,
+                "A comma separated list of additive signals");
+    po.Register("snrs", &snrs,
+                "A comma separated list of SNRs. The additive signals will be "
+                "scaled according to these SNRs.");
+    po.Register("start-times", &start_times,
+                "A comma separated list of start times referring to the "
+                "input signal. The additive signals will be added to the "
+                "input signal starting at the offset. If the start time "
+                "exceed the length of the input signal, the addition will "
+                "be ignored.");
     po.Register("normalize-output", &normalize_output,
                 "If true, then after reverberating and "
                 "possibly adding noise, scale so that the signal "
                 "energy is the same as the original input signal.");
+    po.Register("duration", &duration,
+                "If nonzero, it specified the duration (secs) of the output "
+                "signal. If the duration t is less than the length of the "
+                "input signal, the first t secs of the signal is trimed, "
+                "otherwise, the signal will be repeated to "
+                "fulfill the duration specified. This option is useful for "
+                "extending the length of isotropic noises.");
     po.Register("volume", &volume,
                 "If nonzero, a scaling factor on the signal that is applied "
                 "after reverberating and possibly adding noise. "
@@ -148,7 +207,7 @@ int main(int argc, char *argv[]) {
                 "if you had also specified --normalize-output=false.");
 
     po.Read(argc, argv);
-    if (po.NumArgs() != 3) {
+    if (po.NumArgs() != 2) {
       po.PrintUsage();
       exit(1);
     }
@@ -160,13 +219,14 @@ int main(int argc, char *argv[]) {
     }
 
     std::string input_wave_file = po.GetArg(1);
-    std::string rir_file = po.GetArg(2);
-    std::string output_wave_file = po.GetArg(3);
+    std::string output_wave_file = po.GetArg(2);
 
     WaveData input_wave;
     {
+      WaveHolder waveholder;
       Input ki(input_wave_file);
-      input_wave.Read(ki.Stream());
+      waveholder.Read(ki.Stream());
+      input_wave = waveholder.Value();
     }
 
     const Matrix<BaseFloat> &input_matrix = input_wave.Data();
@@ -178,45 +238,73 @@ int main(int argc, char *argv[]) {
                   << " #channel: " << num_input_channel;
     KALDI_ASSERT(input_channel < num_input_channel);
 
-    WaveData rir_wave;
-    {
-      Input ki(rir_file);
-      rir_wave.Read(ki.Stream());
-    }
-    const Matrix<BaseFloat> &rir_matrix = rir_wave.Data();
-    BaseFloat samp_freq_rir = rir_wave.SampFreq();
-    int32 num_samp_rir = rir_matrix.NumCols(),
-          num_rir_channel = rir_matrix.NumRows();
-    KALDI_VLOG(1) << "sampling frequency of rir: " << samp_freq_rir
-                  << " #samples: " << num_samp_rir
-                  << " #channel: " << num_rir_channel;
-    if (!multi_channel_output) {
-      KALDI_ASSERT(rir_channel < num_rir_channel);
-    }
-
-    Matrix<BaseFloat> noise_matrix;
-    if (!noise_file.empty()) {
-      WaveData noise_wave;
+    Matrix<BaseFloat> rir_matrix;
+    BaseFloat samp_freq_rir = samp_freq_input;
+    int32 num_samp_rir = 0,
+          num_rir_channel = 0;
+    if (!rir_file.empty()) {
+      WaveData rir_wave;
       {
-        Input ki(noise_file);
-        noise_wave.Read(ki.Stream());
+        WaveHolder waveholder;
+        Input ki(rir_file);
+        waveholder.Read(ki.Stream());
+        rir_wave = waveholder.Value();
       }
-      noise_matrix = noise_wave.Data();
-      BaseFloat samp_freq_noise = noise_wave.SampFreq();
-      int32 num_samp_noise = noise_matrix.NumCols(),
-            num_noise_channel = noise_matrix.NumRows();
-      KALDI_VLOG(1) << "sampling frequency of noise: " << samp_freq_noise
-                    << " #samples: " << num_samp_noise
-                    << " #channel: " << num_noise_channel;
-      if (multi_channel_output) {
-        KALDI_ASSERT(num_rir_channel == num_noise_channel);
-      } else {
-        KALDI_ASSERT(noise_channel < num_noise_channel);
+      rir_matrix = rir_wave.Data();
+      samp_freq_rir = rir_wave.SampFreq();
+      num_samp_rir = rir_matrix.NumCols();
+      num_rir_channel = rir_matrix.NumRows();
+      KALDI_VLOG(1) << "sampling frequency of rir: " << samp_freq_rir
+                    << " #samples: " << num_samp_rir
+                    << " #channel: " << num_rir_channel;
+      if (!multi_channel_output) {
+        KALDI_ASSERT(rir_channel < num_rir_channel);
+      }
+    }
+
+    std::vector<Matrix<BaseFloat> > additive_signal_matrices;
+    if (!additive_signals.empty()) {
+      std::vector<std::string> split_string;
+      SplitStringToVector(additive_signals, ",", true, &split_string);
+      for (size_t i = 0; i < split_string.size(); i++) {
+        WaveHolder waveholder;
+        Input ki(split_string[i]);
+        waveholder.Read(ki.Stream());
+        WaveData additive_signal_wave = waveholder.Value();
+        Matrix<BaseFloat> additive_signal_matrix = additive_signal_wave.Data();
+        BaseFloat samp_freq = additive_signal_wave.SampFreq();
+        KALDI_ASSERT(samp_freq == samp_freq_input);
+        int32 num_samp = additive_signal_matrix.NumCols(),
+              num_channel = additive_signal_matrix.NumRows();
+        KALDI_VLOG(1) << "sampling frequency of additive signal: " << samp_freq
+                      << " #samples: " << num_samp
+                      << " #channel: " << num_channel;
+        if (multi_channel_output) {
+          KALDI_ASSERT(num_rir_channel == num_channel);
+        } else {
+          KALDI_ASSERT(noise_channel < num_channel);
+        }
+
+        additive_signal_matrices.push_back(additive_signal_matrix);
       }
     }
 
+    std::vector<BaseFloat> snr_vector;
+    if (!snrs.empty()) {
+      ReadCommaSeparatedCommand(snrs, &snr_vector);
+    }
+
+    std::vector<BaseFloat> start_time_vector;
+    if (!start_times.empty()) {
+      ReadCommaSeparatedCommand(start_times, &start_time_vector);
+    }
+
+    int32 shift_index = 0;
     int32 num_output_channels = (multi_channel_output ? num_rir_channel : 1);
-    Matrix<BaseFloat> out_matrix(num_output_channels, num_samp_input);
+    int32 num_samp_output = (duration > 0 ? samp_freq_input * duration :
+                              (shift_output ? num_samp_input :
+                                              num_samp_input + num_samp_rir - 1));
+    Matrix<BaseFloat> out_matrix(num_output_channels, num_samp_output);
 
     for (int32 output_channel = 0; output_channel < num_output_channels; output_channel++) {
       Vector<BaseFloat> input(num_samp_input);
@@ -224,18 +312,31 @@ int main(int argc, char *argv[]) {
       float power_before_reverb = VecVec(input, input) / input.Dim();
 
       int32 this_rir_channel = (multi_channel_output ? output_channel : rir_channel);
-      Vector<BaseFloat> rir(num_samp_rir);
-      rir.CopyRowFromMat(rir_matrix, this_rir_channel);
-      rir.Scale(1.0 / (1 << 15));
 
-      Vector<BaseFloat> noise(0);
-      if (!noise_file.empty()) {
-        noise.Resize(noise_matrix.NumCols());
-        int32 this_noise_channel = (multi_channel_output ? output_channel : noise_channel);
-        noise.CopyRowFromMat(noise_matrix, this_noise_channel);
+      float early_energy = power_before_reverb;
+      if (!rir_file.empty()) {
+        Vector<BaseFloat> rir;
+        rir.Resize(num_samp_rir);
+        rir.CopyRowFromMat(rir_matrix, this_rir_channel);
+        rir.Scale(1.0 / (1 << 15));
+        early_energy = DoReverberation(rir, samp_freq_rir, &input);
+        if (shift_output) {
+          // find the position of the peak of the impulse response 
+          // and shift the output waveform by this amount
+          rir.Max(&shift_index);
+        }
       }
 
-      DoReverberation(rir, samp_freq_rir, snr_db, &noise, &input);
+      if (additive_signal_matrices.size() > 0) {
+        Vector<BaseFloat> noise(0);
+        int32 this_noise_channel = (multi_channel_output ? output_channel : noise_channel);
+        for (int32 i = 0; i < additive_signal_matrices.size(); i++) {
+          noise.Resize(additive_signal_matrices[i].NumCols());
+          noise.CopyRowFromMat(additive_signal_matrices[i], this_noise_channel);
+          AddNoise(&noise, snr_vector[i], start_time_vector[i],
+                    samp_freq_input, early_energy, &input);
+        }
+      }
 
       float power_after_reverb = VecVec(input, input) / input.Dim();
 
@@ -244,7 +345,16 @@ int main(int argc, char *argv[]) {
       else if (normalize_output)
         input.Scale(sqrt(power_before_reverb / power_after_reverb));
 
-      out_matrix.CopyRowFromVec(input, output_channel);
+      if (num_samp_output <= num_samp_input) {
+        // trim the signal from the start
+        out_matrix.CopyRowFromVec(input.Range(shift_index, num_samp_output), output_channel);
+      } else {
+        // repeat the signal to fill up the duration
+        Vector<BaseFloat> extended_input(num_samp_output);
+        extended_input.SetZero();
+        AddVectorsOfUnequalLength(input.Range(shift_index, num_samp_input), &extended_input);
+        out_matrix.CopyRowFromVec(extended_input, output_channel);
+      }
     }
 
     WaveData out_wave(samp_freq_input, out_matrix);