-
Notifications
You must be signed in to change notification settings - Fork 5.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: A new steps/data/reverberate_data_dir.py script #706
Changes from all commits
356fa82
8671e59
99b4210
0b7f06c
1068ec4
fdb576d
3802fde
970def5
d335c71
ebfba00
617982b
93a2295
cbe5762
d34f597
4431af6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#!/usr/bin/env python | ||
# Copyright 2016 Tom Ko | ||
# Apache 2.0 | ||
# script to generate rir_list and noise_list in aspire | ||
|
||
# we're using python 3.x style print but want it to work in python 2.x, | ||
from __future__ import print_function | ||
import argparse, glob, math, os, sys | ||
|
||
|
||
def GetArgs(): | ||
parser = argparse.ArgumentParser(description="Prepare rir_list and noise_list for Aspire " | ||
"Usage: reverberate_data_dir.py [options...] <in-data-dir> <out-data-dir> " | ||
"E.g. reverberate_data_dir.py " | ||
"data/impulses_noises data/impulses_noises/info", | ||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||
|
||
parser.add_argument("input_dir", help="Input data directory") | ||
parser.add_argument("output_dir", help="Output data directory") | ||
print(' '.join(sys.argv)) | ||
args = parser.parse_args() | ||
|
||
return args | ||
|
||
|
||
# This function generates the rir_list file for the real RIRs being in ASpIRE experiments. | ||
# It assumes the availability of data/impulses_noises directory prepared by local/multi_condition/prepare_impulses_noises.sh | ||
def GenerateRirListFile(input_dir, output_dir): | ||
rir_list_file = open(output_dir + "/rir_list", 'w') | ||
rir_id = 1 | ||
for db in ["RVB2014", "RWCP", "air"]: | ||
rir_files = glob.glob(input_dir + "/{0}_*.wav".format(db)) | ||
rir_files.sort() | ||
for rir in rir_files: | ||
filename = rir.split('/')[-1] | ||
if "noise" not in filename: | ||
parts = filename.split('_') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than parsing the file name for the necessary parameters, @tomkocse will later submit a modified list generation function which will use information available in |
||
db_name = parts[0] | ||
type_num = parts[1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am assuming these filename parsing code is correct. |
||
if db == "RVB2014": | ||
noise_pattern = parts[3] | ||
elif db == "RWCP" and len(parts) == 4: | ||
noise_pattern = parts[3] | ||
else: | ||
noise_pattern = '_'.join(parts[3:len(parts)-1]) | ||
|
||
# We use the string as the room id | ||
room_id = db_name + "_" + noise_pattern | ||
rir_list_file.write('--rir-id {0} --room-id {1} {2}\n'.format(str(rir_id).zfill(5), room_id, rir)) | ||
rir_id += 1 | ||
rir_list_file.close() | ||
|
||
|
||
# This function generate the noise_list file from the aspire noise-rir pair | ||
def GenerateNoiseListFile(input_dir, output_dir): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a Please do not assign the same |
||
noise_list_file = open(output_dir + "/noise_list", 'w') | ||
noise_files = glob.glob(input_dir + "/*_type*_noise*.wav") | ||
noise_files.sort() | ||
noise_id = 1 | ||
for noise_file in noise_files: | ||
parts = noise_file.split('/')[-1].split('_') | ||
db_name = parts[0] | ||
type_num = parts[1] | ||
noise_pattern = '_'.join(parts[3:len(parts)-1]) | ||
noise_line = "--noise-id {0} --noise-type isotropic ".format(str(noise_id).zfill(5)) | ||
room_id = db_name + "_" + noise_pattern | ||
noise_line += "--room-linkage {0} ".format(room_id) | ||
noise_line += "{0}".format(noise_file) | ||
noise_list_file.write("{0}\n".format(noise_line)) | ||
noise_id += 1 | ||
noise_list_file.close() | ||
|
||
|
||
def Main(): | ||
args = GetArgs() | ||
|
||
if not os.path.exists(args.output_dir): | ||
os.makedirs(args.output_dir) | ||
|
||
# generating the rir_list file for the new steps/data/reverberate_data_dir.py | ||
GenerateRirListFile(args.input_dir, args.output_dir) | ||
|
||
# generating the noise_list file for the new steps/data/reverberate_data_dir.py | ||
GenerateNoiseListFile(args.input_dir, args.output_dir) | ||
|
||
|
||
if __name__ == "__main__": | ||
Main() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,8 @@ | |
. cmd.sh | ||
|
||
stage=1 | ||
snrs="20:10:15:5:0" | ||
foreground_snrs="20:10:15:5:0" | ||
background_snrs="20:10:15:5:0" | ||
num_data_reps=3 | ||
ali_dir=exp/ | ||
db_string="'air' 'rwcp' 'rvb2014'" # RIR dbs to be used in the experiment | ||
|
@@ -31,24 +32,30 @@ if [ $stage -le 1 ]; then | |
--RIR-home $RIR_home \ | ||
data/impulses_noises || exit 1; | ||
|
||
# Generate the rir_list and noise_list for the reverberate_data_dir.py to corrupt the data | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this script just assumes
|
||
# this script just assumes air rwcp rvb2014 databases | ||
python local/multi_condition/aspire_prep_rir_noise_list.py data/impulses_noises data/impulses_noises/info | ||
|
||
# corrupt the fisher data to generate multi-condition data | ||
# for data_dir in train dev test; do | ||
for data_dir in train dev test; do | ||
if [ "$data_dir" == "train" ]; then | ||
num_reps=$num_data_reps | ||
else | ||
num_reps=1 | ||
fi | ||
reverb_data_dirs= | ||
for i in `seq 1 $num_reps`; do | ||
cur_dest_dir=" data/temp_${data_dir}_${i}" | ||
local/multi_condition/reverberate_data_dir.sh --random-seed $i \ | ||
--snrs "$snrs" --log-dir exp/make_corrupted_wav \ | ||
data/${data_dir} data/impulses_noises $cur_dest_dir | ||
reverb_data_dirs+=" $cur_dest_dir" | ||
done | ||
utils/combine_data.sh --extra-files utt2uniq data/${data_dir}_rvb $reverb_data_dirs | ||
rm -rf $reverb_data_dirs | ||
python steps/data/reverberate_data_dir.py \ | ||
--prefix "rev" \ | ||
--rir-list-file data/impulses_noises/info/rir_list \ | ||
--noise-list-file data/impulses_noises/info/noise_list \ | ||
--foreground-snrs $foreground_snrs \ | ||
--background-snrs $background_snrs \ | ||
--speech-rvb-probability 1 \ | ||
--pointsource-noise-addition-probability 1 \ | ||
--isotropic-noise-addition-probability 1 \ | ||
--num-replications $num_reps \ | ||
--max-noises-per-minute 1 \ | ||
--random-seed 1 \ | ||
data/${data_dir} data/${data_dir}_rvb | ||
done | ||
|
||
# create the dev, test and eval sets from the aspire recipe | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import subprocess | ||
|
||
def RunKaldiCommand(command, wait = True): | ||
""" Runs commands frequently seen in Kaldi scripts. These are usually a | ||
sequence of commands connected by pipes, so we use shell=True """ | ||
#logger.info("Running the command\n{0}".format(command)) | ||
p = subprocess.Popen(command, shell = True, | ||
stdout = subprocess.PIPE, | ||
stderr = subprocess.PIPE) | ||
|
||
if wait: | ||
[stdout, stderr] = p.communicate() | ||
if p.returncode is not 0: | ||
raise Exception("There was an error while running the command {0}\n".format(command)+"-"*10+"\n"+stderr) | ||
return stdout, stderr | ||
else: | ||
return p | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We will most probably not use this script for long, as we soon plan to create an RIR database and upload it to openslr.org. This script is a stop gap arrangement to use the new
steps/data/reverberate_data_dir.py
until the new upload is prepared. So I am not going ensure that this script is very generic and I will allow for a lot of assumptions.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean you are going to upload the real RIR to openslr.org including the RVB2014, air and RWCP databases?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have already prepared around 9 RIR databases for use in our experiments. We have figure out which of these can be redistributed by us and prepare an upload.