Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: A new steps/data/reverberate_data_dir.py script #706

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions egs/aspire/s5/local/multi_condition/aspire_prep_rir_noise_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,48 @@ def GetArgs():
return args


# This function generate the rir_list file for the aspire real RIR
# This function generates the rir_list file for the real RIRs being in ASpIRE experiments.
# It assumes the availability of data/impulses_noises directory prepared by local/multi_condition/prepare_impulses_noises.sh
def GenerateRirListFile(input_dir, output_dir):
rir_list_file = open(output_dir + "/rir_list", 'w')
rir_id = 1
room_id = 1
for db in ["RVB2014", "RWCP", "air"]:
rir_files = glob.glob(input_dir + "/{0}_*.wav".format(db))
rir_files.sort()
for rir in rir_files:
filename = rir.split('/')[-1]
if "noise" not in filename:
rir_list_file.write('--rir-id {0} --room-id {1} {2}\n'.format(str(rir_id).zfill(5), str(room_id).zfill(3), rir))
parts = filename.split('_')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than parsing the file name for the necessary parameters, @tomkocse will later submit a modified list generation function which will use information available in data/impulses_noises/info/. This is however not a high priority change as we will preprocess the individual databases in the future and these scripts would not be part of any recipe.

db_name = parts[0]
type_num = parts[1]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am assuming these filename parsing code is correct.

if db == "RVB2014":
noise_pattern = parts[3]
elif db == "RWCP" and len(parts) == 4:
noise_pattern = parts[3]
else:
noise_pattern = '_'.join(parts[3:len(parts)-1])

# We use the string as the room id
room_id = db_name + "_" + noise_pattern
rir_list_file.write('--rir-id {0} --room-id {1} {2}\n'.format(str(rir_id).zfill(5), room_id, rir))
rir_id += 1
room_id += 1
rir_list_file.close()


# This function generate the noise_list file from the aspire noise-rir pair
def GenerateNoiseListFile(input_dir, output_dir):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a data/impulses_noises/info which has all the information you need. It defines different rooms and the noises and impulse responses available for that room (e.g. data/impulses_noises/info/noise_impulse_RVB2014_largeroom1).

Please do not assign the same room_id to all RIRs in a given database. This is not the right usage of room_id field.

noise_list_file = open(output_dir + "/noise_list", 'w')
noise_files = glob.glob(input_dir + "/*_type*_noise*.wav")
noise_files.sort()
noise_id = 1
for noise_file in noise_files:
parts = noise_file.split('/')[-1].split('_')
db_name = parts[0]
type_num = parts[1]
noise_pattern = '_'.join(parts[3:len(parts)-1])
if db_name == "RWCP":
type_num = "type*"
matched_rir_files = glob.glob(input_dir + "/{0}_{1}_rir_{2}*.wav".format(db_name, type_num, noise_pattern))
noise_line = "--noise-id {0} --noise-type isotropic ".format(str(noise_id).zfill(5))
for rir in matched_rir_files:
noise_line += "--rir-linkage {0} ".format(rir)
room_id = db_name + "_" + noise_pattern
noise_line += "--room-linkage {0} ".format(room_id)
noise_line += "{0}".format(noise_file)
noise_list_file.write("{0}\n".format(noise_line))
noise_id += 1
Expand Down
2 changes: 1 addition & 1 deletion egs/aspire/s5/local/multi_condition/run_nnet2_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
. cmd.sh

stage=1
snrs="20:10:15:5:0"
foreground_snrs="20:10:15:5:0"
background_snrs="20:10:15:5:0"
num_data_reps=3
Expand Down Expand Up @@ -34,6 +33,7 @@ if [ $stage -le 1 ]; then
data/impulses_noises || exit 1;

# Generate the rir_list and noise_list for the reverberate_data_dir.py to corrupt the data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this script just assumes air rwcp rvb2014 databases.

# this script just assumes air rwcp rvb2014 databases
python local/multi_condition/aspire_prep_rir_noise_list.py data/impulses_noises data/impulses_noises/info

# corrupt the fisher data to generate multi-condition data
Expand Down
72 changes: 37 additions & 35 deletions egs/wsj/s5/steps/data/reverberate_data_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,19 +137,33 @@ def WriteDictToFile(dict, file_name):
value = list(value)
value.sort()
value = ' '.join(str(value))
file.write('{0}\t{1}\n'.format(key, value))
file.write('{0} {1}\n'.format(key, value))
file.close()


# This function returns only the isotropic noises according to the specified RIR id
# This function creates the utt2uniq file from the utterance id in utt2spk file
def CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, prefix):
corrupted_utt2uniq = {}
# Parse the utt2spk to get the utterance id
utt2spk = ParseFileToDict(input_dir + "/utt2spk", value_processor = lambda x: " ".join(x))
keys = utt2spk.keys()
keys.sort()
for i in range(1, num_replicas+1):
for utt_id in keys:
new_utt_id = GetNewId(utt_id, prefix, i)
corrupted_utt2uniq[new_utt_id] = utt_id

WriteDictToFile(corrupted_utt2uniq, output_dir + "/utt2uniq")


# This function returns only the isotropic noises according to the specified room
# Please refer to ParseNoiseList() for the format of iso_noise_list
def FilterIsotropicNoiseList(iso_noise_list, rir_id):
def FilterIsotropicNoiseList(iso_noise_list, room_id):
filtered_list = []
for noise in iso_noise_list:
for id in noise.rir_linkage:
if id == rir_id:
filtered_list.append(noise)
break
if noise.room_linkage == room_id:
filtered_list.append(noise)
break

return filtered_list

Expand Down Expand Up @@ -206,7 +220,7 @@ def GenerateReverberationOpts(room_dict, # the room dictionary, please refer to
# pick the RIR to reverberate the speech
reverberate_opts += "--impulse-response={0} ".format(speech_rir.rir_file_location)

rir_iso_noise_list = FilterIsotropicNoiseList(iso_noise_list, speech_rir.rir_id)
rir_iso_noise_list = FilterIsotropicNoiseList(iso_noise_list, speech_rir.room_id)
# Add the corresponding isotropic noise associated with the selected RIR
if len(rir_iso_noise_list) > 0 and random.random() < isotropic_noise_addition_probability:
isotropic_noise = PickItemWithProbability(rir_iso_noise_list)
Expand Down Expand Up @@ -267,9 +281,9 @@ def GenerateReverberatedWavScp(wav_scp, # a dictionary whose values are the Kal
foreground_snrs = list_cyclic_iterator(foreground_snr_array)
background_snrs = list_cyclic_iterator(background_snr_array)
corrupted_wav_scp = {}
keys = wav_scp.keys()
keys.sort()
for i in range(1, num_replicas+1):
keys = wav_scp.keys()
keys.sort()
for recording_id in keys:
wav_original_pipe = wav_scp[recording_id]
# check if it is really a pipe
Expand Down Expand Up @@ -350,6 +364,13 @@ def CreateReverberatedCopy(input_dir,
data_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt"
.format(output_dir = output_dir))

if os.path.isfile(input_dir + "/utt2uniq"):
AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", num_replicas, prefix, field =[0])
else:
# Create the utt2uniq file
CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, prefix)


if os.path.isfile(input_dir + "/text"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have to properly prepare an utt2uniq file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the purpose of the utt2uniq file ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

 grep -R steps -e utt2uniq
steps/cleanup/combine_short_segments.py:    utt2uniq = None
steps/cleanup/combine_short_segments.py:    if os.path.exists(data_dir_file('utt2uniq')):
steps/cleanup/combine_short_segments.py:        utt2uniq = ParseFileToDict(data_dir_file('utt2uniq'))
steps/cleanup/combine_short_segments.py:    return utt2spk, spk2utt, text, feat, utt2dur, utt2uniq
steps/cleanup/combine_short_segments.py:def WriteCombinedDirFiles(output_dir, utt2spk, spk2utt, text, feat, utt2dur, utt2uniq):
steps/cleanup/combine_short_segments.py:        # updating utt2uniq
steps/cleanup/combine_short_segments.py:        if utt2uniq is not None:
steps/cleanup/combine_short_segments.py:                combined_uniqs.append(utt2uniq.pop(utt))
steps/cleanup/combine_short_segments.py:            # utt2uniq file is used to map perturbed data to original unperturbed
steps/cleanup/combine_short_segments.py:            # is no good way to find the utt2uniq mappinng so that we can avoid
steps/cleanup/combine_short_segments.py:            utt2uniq[new_utt_name] = combined_uniqs[0]
steps/cleanup/combine_short_segments.py:    if utt2uniq is not None:
steps/cleanup/combine_short_segments.py:        WriteDictToFile(utt2uniq, out_dir_file('utt2uniq'))
steps/cleanup/combine_short_segments.py:    utt2spk, spk2utt, text, feat, utt2dur, utt2uniq = ParseDataDirInfo(input_dir)
steps/cleanup/combine_short_segments.py:    WriteCombinedDirFiles(output_dir, utt2spk, spk2utt, text, feat, utt2dur, utt2uniq)
steps/nnet2/get_egs.sh:if [ -f $data/utt2uniq ]; then
steps/nnet2/get_egs.sh:  echo "File $data/utt2uniq exists, so augmenting valid_uttlist to"
steps/nnet2/get_egs.sh:  utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
steps/nnet2/get_egs.sh:  cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
steps/nnet2/get_perturbed_feats.sh:# In the combined feature directory, create a file utt2uniq which maps
steps/nnet2/get_perturbed_feats.sh:  > $data/utt2uniq
steps/nnet2/get_egs2.sh:if [ -f $data/utt2uniq ]; then
steps/nnet2/get_egs2.sh:  echo "File $data/utt2uniq exists, so augmenting valid_uttlist to"
steps/nnet2/get_egs2.sh:  utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
steps/nnet2/get_egs2.sh:  cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
steps/nnet3/chain/get_egs.sh:if [ -f $data/utt2uniq ]; then  # this matters if you use data augmentation.
steps/nnet3/chain/get_egs.sh:  echo "File $data/utt2uniq exists, so augmenting valid_uttlist to"
steps/nnet3/chain/get_egs.sh:  utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
steps/nnet3/chain/get_egs.sh:  cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
steps/nnet3/get_egs_targets.sh:if [ -f $data/utt2uniq ]; then  # this matters if you use data augmentation.
steps/nnet3/get_egs_targets.sh:  echo "File $data/utt2uniq exists, so augmenting valid_uttlist to"
steps/nnet3/get_egs_targets.sh:  utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
steps/nnet3/get_egs_targets.sh:  cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
steps/nnet3/get_egs.sh:if [ -f $data/utt2uniq ]; then  # this matters if you use data augmentation.
steps/nnet3/get_egs.sh:  echo "File $data/utt2uniq exists, so augmenting valid_uttlist to"
steps/nnet3/get_egs.sh:  utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
steps/nnet3/get_egs.sh:  cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
steps/nnet3/get_egs_discriminative.sh:if [ -f $data/utt2uniq ]; then  # this matters if you use data augmentation.
steps/nnet3/get_egs_discriminative.sh:  echo "File $data/utt2uniq exists, so augmenting valid_uttlist to"
steps/nnet3/get_egs_discriminative.sh:  utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
steps/nnet3/get_egs_discriminative.sh:  cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
steps/online/nnet2/get_egs.sh:if [ -f $data/utt2uniq ]; then
steps/online/nnet2/get_egs.sh:  echo "File $data/utt2uniq exists, so augmenting valid/uttlist to"
steps/online/nnet2/get_egs.sh:  utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
steps/online/nnet2/get_egs.sh:  cat $dir/valid/uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
steps/online/nnet2/get_egs2.sh:if [ -f $data/utt2uniq ]; then
steps/online/nnet2/get_egs2.sh:  echo "File $data/utt2uniq exists, so augmenting valid/uttlist to"
steps/online/nnet2/get_egs2.sh:  utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
steps/online/nnet2/get_egs2.sh:  cat $dir/valid/uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \

AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, prefix, field =[0])
if os.path.isfile(input_dir + "/segments"):
Expand Down Expand Up @@ -426,27 +447,17 @@ def MakeRoomDict(rir_list):
return room_dict
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

describe the format of room_dict.



# This function check if the RIR IO string is listed in the input rir_list file
# It returns the RIR id if the io string is found
def ValidateRirIO(rir_io_str, rir_list):
for rir in rir_list:
if rir_io_str == rir.rir_file_location:
return rir.rir_id

return "Not found"


# This function creates the point-source noise list
# and the isotropic noise list from the noise information file
# Each noise item in the list contains the following attributes:
# noise_id, noise_type, bg_fg_type, rir_linkage, probability, noise_file_location
# noise_id, noise_type, bg_fg_type, room_linkage, probability, noise_file_location
# Please refer to the help messages in the parser for the meaning of these attributes
def ParseNoiseList(noise_list_file, rir_list):
def ParseNoiseList(noise_list_file):
noise_parser = argparse.ArgumentParser()
noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id')
noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"])
noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise', choices = ["background", "foreground"])
noise_parser.add_argument('--rir-linkage', type=str, action='append', default=None, help='required if isotropic, should not be specified if point-source, this option can be repeatly added to define multiple noise-rir association, the rir linkage can either be a RIR id or a RIR file path')
noise_parser.add_argument('--room-linkage', type=str, default=None, help='required if isotropic, should not be specified if point-source.')
noise_parser.add_argument('--probability', type=float, default=None, help='probability of the noise.')
noise_parser.add_argument('noise_file_location', type=str, help='noise file location')

Expand All @@ -456,18 +467,9 @@ def ParseNoiseList(noise_list_file, rir_list):
for line in noise_lines:
noise = noise_parser.parse_args(line.split())
if noise.noise_type == "isotropic":
if noise.rir_linkage is None:
raise Exception("--rir-linkage must be specified if --noise-type is isotropic")
if noise.room_linkage is None:
raise Exception("--room-linkage must be specified if --noise-type is isotropic")
else:
for r in range(0, len(noise.rir_linkage)):
if not noise.rir_linkage[r].isdigit():
# this is a RIR IO string, validate if it exist in the input rir_list and return the RIR id
result = ValidateRirIO(noise.rir_linkage[r], rir_list)
if result == "Not found":
raise Exception("RIR {0} specified by isotropic noise {1} not found".format(noise.rir_linkage[r], noise.noise_id))
else:
noise.rir_linkage[r] = result

iso_noise_list.append(noise)
else:
pointsource_noise_list.append(noise)
Expand All @@ -482,7 +484,7 @@ def Main():
rir_list = ParseRirList(args.rir_list_file)
noise_list = []
if args.noise_list_file is not None:
pointsource_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file, rir_list)
pointsource_noise_list, iso_noise_list = ParseNoiseList(args.noise_list_file)
print("Number of point-source noises is {0}".format(len(pointsource_noise_list)))
print("Number of isotropic noises is {0}".format(len(iso_noise_list)))
room_dict = MakeRoomDict(rir_list)
Expand Down