kaldi-asr · tomkocse · Apr 17, 2016 · Apr 22, 2016 · May 5, 2016 · May 8, 2016
diff --git a/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py b/egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py
@@ -23,38 +23,48 @@ def GetArgs():
     return args
 
 
-# This function generate the rir_list file for the aspire real RIR
+# This function generates the rir_list file for the real RIRs being in ASpIRE experiments.
+# It assumes the availability of data/impulses_noises directory prepared by local/multi_condition/prepare_impulses_noises.sh
 def GenerateRirListFile(input_dir, output_dir):
   rir_list_file = open(output_dir + "/rir_list", 'w')
   rir_id = 1
-  room_id = 1
   for db in ["RVB2014", "RWCP", "air"]:
     rir_files = glob.glob(input_dir + "/{0}_*.wav".format(db))
+    rir_files.sort()
     for rir in rir_files:
       filename = rir.split('/')[-1]
       if "noise" not in filename:
-        rir_list_file.write('--rir-id {0} --room-id {1} {2}\n'.format(str(rir_id).zfill(5), str(room_id).zfill(3), rir))
+        parts = filename.split('_')
+        db_name = parts[0]
+        type_num = parts[1]
+        if db == "RVB2014":
+          noise_pattern = parts[3]
+        elif db == "RWCP" and len(parts) == 4:
+          noise_pattern = parts[3]
+        else:
+          noise_pattern = '_'.join(parts[3:len(parts)-1])
+
+        # We use the string as the room id
+        room_id = db_name + "_" + noise_pattern
+        rir_list_file.write('--rir-id {0} --room-id {1} {2}\n'.format(str(rir_id).zfill(5), room_id, rir))
         rir_id += 1
-    room_id += 1
   rir_list_file.close()
 
 
 # This function generate the noise_list file from the aspire noise-rir pair 
 def GenerateNoiseListFile(input_dir, output_dir):
   noise_list_file = open(output_dir + "/noise_list", 'w')
   noise_files = glob.glob(input_dir + "/*_type*_noise*.wav")
+  noise_files.sort()
   noise_id = 1
   for noise_file in noise_files:
     parts = noise_file.split('/')[-1].split('_')
     db_name = parts[0]
     type_num = parts[1]
     noise_pattern = '_'.join(parts[3:len(parts)-1])
-    if db_name == "RWCP":
-      type_num = "type*"
-    matched_rir_files = glob.glob(input_dir + "/{0}_{1}_rir_{2}*.wav".format(db_name, type_num, noise_pattern))
     noise_line = "--noise-id {0} --noise-type isotropic ".format(str(noise_id).zfill(5))
-    for rir in matched_rir_files:
-      noise_line += "--rir-linkage {0} ".format(rir)
+    room_id = db_name + "_" + noise_pattern
+    noise_line += "--room-linkage {0} ".format(room_id)
     noise_line += "{0}".format(noise_file)
     noise_list_file.write("{0}\n".format(noise_line))
     noise_id += 1

diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
@@ -6,7 +6,6 @@
 . cmd.sh
 
 stage=1
-snrs="20:10:15:5:0"
 foreground_snrs="20:10:15:5:0"
 background_snrs="20:10:15:5:0"
 num_data_reps=3
@@ -34,6 +33,7 @@ if [ $stage -le 1 ]; then
     data/impulses_noises || exit 1;
 
   # Generate the rir_list and noise_list for the reverberate_data_dir.py to corrupt the data
+  # this script just assumes air rwcp rvb2014 databases
   python local/multi_condition/aspire_prep_rir_noise_list.py data/impulses_noises data/impulses_noises/info
 
   # corrupt the fisher data to generate multi-condition data 

diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -137,19 +137,33 @@ def WriteDictToFile(dict, file_name):
                 value = list(value)
             value.sort()
             value = ' '.join(str(value))
-        file.write('{0}\t{1}\n'.format(key, value))
+        file.write('{0} {1}\n'.format(key, value))
     file.close()
 
 
-# This function returns only the isotropic noises according to the specified RIR id
+# This function creates the utt2uniq file from the utterance id in utt2spk file
+def CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, prefix):
+    corrupted_utt2uniq = {}
+    # Parse the utt2spk to get the utterance id
+    utt2spk = ParseFileToDict(input_dir + "/utt2spk", value_processor = lambda x: " ".join(x))
+    keys = utt2spk.keys()
+    keys.sort()
+    for i in range(1, num_replicas+1):
+        for utt_id in keys:
+            new_utt_id = GetNewId(utt_id, prefix, i)
+            corrupted_utt2uniq[new_utt_id] = utt_id
+
+    WriteDictToFile(corrupted_utt2uniq, output_dir + "/utt2uniq")
+
+
+# This function returns only the isotropic noises according to the specified room
 # Please refer to ParseNoiseList() for the format of iso_noise_list
-def FilterIsotropicNoiseList(iso_noise_list, rir_id):
+def FilterIsotropicNoiseList(iso_noise_list, room_id):
     filtered_list = []
     for noise in iso_noise_list:
-        for id in noise.rir_linkage:
-            if id == rir_id:
-                filtered_list.append(noise)
-                break
+        if noise.room_linkage == room_id:
+            filtered_list.append(noise)
+            break
 
     return filtered_list
 
@@ -206,7 +220,7 @@ def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to
         # pick the RIR to reverberate the speech
         reverberate_opts += "--impulse-response={0} ".format(speech_rir.rir_file_location)
 
-    rir_iso_noise_list = FilterIsotropicNoiseList(iso_noise_list, speech_rir.rir_id)
+    rir_iso_noise_list = FilterIsotropicNoiseList(iso_noise_list, speech_rir.room_id)
     # Add the corresponding isotropic noise associated with the selected RIR
     if len(rir_iso_noise_list) > 0 and random.random() < isotropic_noise_addition_probability:
         isotropic_noise = PickItemWithProbability(rir_iso_noise_list)
@@ -267,9 +281,9 @@ def GenerateReverberatedWavScp(wav_scp,  # a dictionary whose values are the Kal
     foreground_snrs = list_cyclic_iterator(foreground_snr_array)
     background_snrs = list_cyclic_iterator(background_snr_array)
     corrupted_wav_scp = {}
+    keys = wav_scp.keys()
+    keys.sort()
     for i in range(1, num_replicas+1):
-        keys = wav_scp.keys()
-        keys.sort()
         for recording_id in keys:
             wav_original_pipe = wav_scp[recording_id]
             # check if it is really a pipe
@@ -350,6 +364,13 @@ def CreateReverberatedCopy(input_dir,
     data_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
                     .format(output_dir = output_dir))
 
+    if os.path.isfile(input_dir + "/utt2uniq"):
+        AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", num_replicas, prefix, field =[0])
+    else:
+        # Create the utt2uniq file
+        CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, prefix)
+
+
     if os.path.isfile(input_dir + "/text"):
         AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, prefix, field =[0])
     if os.path.isfile(input_dir + "/segments"):
@@ -426,27 +447,17 @@ def MakeRoomDict(rir_list):
     return room_dict
 
 
-# This function check if the RIR IO string is listed in the input rir_list file
-# It returns the RIR id if the io string is found
-def ValidateRirIO(rir_io_str, rir_list):
-    for rir in rir_list:
-        if rir_io_str == rir.rir_file_location:
-            return rir.rir_id
-
-    return "Not found"
-
-
 # This function creates the point-source noise list 
 # and the isotropic noise list from the noise information file
 # Each noise item in the list contains the following attributes:
-# noise_id, noise_type, bg_fg_type, rir_linkage, probability, noise_file_location
+# noise_id, noise_type, bg_fg_type, room_linkage, probability, noise_file_location
 # Please refer to the help messages in the parser for the meaning of these attributes
-def ParseNoiseList(noise_list_file, rir_list):
+def ParseNoiseList(noise_list_file):
     noise_parser = argparse.ArgumentParser()
     noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id')
     noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"])
     noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise', choices = ["background", "foreground"])
-    noise_parser.add_argument('--rir-linkage', type=str, action='append', default=None, help='required if isotropic, should not be specified if point-source, this option can be repeatly added to define multiple noise-rir association, the rir linkage can either be a RIR id or a RIR file path')
+    noise_parser.add_argument('--room-linkage', type=str, default=None, help='required if isotropic, should not be specified if point-source.')
     noise_parser.add_argument('--probability', type=float, default=None, help='probability of the noise.')
     noise_parser.add_argument('noise_file_location', type=str, help='noise file location')
 
@@ -456,18 +467,9 @@ def ParseNoiseList(noise_list_file, rir_list):
     for line in noise_lines:
         noise = noise_parser.parse_args(line.split())
         if noise.noise_type == "isotropic":
-            if noise.rir_linkage is None:
-                raise Exception("--rir-linkage must be specified if --noise-type is isotropic")
+            if noise.room_linkage is None:
+                raise Exception("--room-linkage must be specified if --noise-type is isotropic")
             else:
-                for r in range(0, len(noise.rir_linkage)):
-                    if not noise.rir_linkage[r].isdigit():
-                       # this is a RIR IO string, validate if it exist in the input rir_list and return the RIR id
-                       result = ValidateRirIO(noise.rir_linkage[r], rir_list)
-                       if result == "Not found":
-                           raise Exception("RIR {0} specified by isotropic noise {1} not found".format(noise.rir_linkage[r], noise.noise_id))
-                       else:
-                           noise.rir_linkage[r] = result
-
                 iso_noise_list.append(noise)
         else:
             pointsource_noise_list.append(noise)
@@ -482,7 +484,7 @@ def Main():
     rir_list = ParseRirList(args.rir_list_file)
     noise_list = []
     if args.noise_list_file is not None:
-        pointsource_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file, rir_list)
+        pointsource_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file)
         print("Number of point-source noises is {0}".format(len(pointsource_noise_list)))
         print("Number of isotropic noises is {0}".format(len(iso_noise_list)))
     room_dict = MakeRoomDict(rir_list)