-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmake_raw_wav.py
161 lines (137 loc) · 5.6 KB
/
make_raw_wav.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# make_wav.py
#
# This script
# -- converts .wv1 (NIST) into .wav (RIFF) using sph2pipe
# -- make filelists for .wav
#
# Import packages
import argparse
import json
import os
import re
from pathlib import Path
from scipy.io import wavfile
from audio import read as read_audio
# Import original sources
from config_path import get_paths
from utils import ExtendedEncoder, write_wav
# Original functions
def line2path(config_path, line_text, subset_name):
# Notice: We need to
# -- substitude discname XX_X_X into XX-X.X
# -- remove data inside 11-2.1/wsj0/si_tr_s/401
# -- be careful about path in file start with /wsjX OR wsjX
pattern = r"(\d+)_(\d+)_(\d+): */?([\w/]*)(wsj[01])([\w/]*)([0-9a-z]{3})([0-9a-z]{5})(\.wv1|)"
m = re.match(pattern, line_text.lower())
if m is None:
# Path is not written in this line
return False
else:
wsj_pattern = m.group(5)
if wsj_pattern == "wsj0":
full_input_path = os.path.join(
config_path.wsj0_root,
m.group(1) + "-" + m.group(2) + "." + m.group(3),
m.group(4) + m.group(5) + m.group(6) + m.group(7) + m.group(8) + ".wv1",
)
elif wsj_pattern == "wsj1":
full_input_path = os.path.join(
config_path.wsj1_root,
m.group(1) + "-" + m.group(2) + "." + m.group(3),
m.group(4) + m.group(5) + m.group(6) + m.group(7) + m.group(8) + ".wv1",
)
else:
# Something is wrong for this line
return False
if "11-2.1/wsj0/si_tr_s/401" in full_input_path:
# We need to remove this from si284 or si84
return False
full_output_path = (
config_path.raw_wav["path"]
/ subset_name
/ (m.group(7) + m.group(8) + ".wav")
)
return [full_input_path, full_output_path, m.group(7), m.group(7) + m.group(8)]
def make_raw_wav(config, config_path):
if (config_path.output_path / config_path.raw_wav["path"]).exists():
print("Original raw directory already exists. Skip.")
return
for subset_key in config_path.subset_list:
print(f"{subset_key}: started")
# Check if directory for output exists
raw_wav_output_dir = (
config_path.output_path / config_path.raw_wav["path"] / subset_key
)
if not os.path.isdir(raw_wav_output_dir):
os.makedirs(raw_wav_output_dir, exist_ok=True)
raw_wav_dict = {}
global_file_index = 0
for path_ndx in config_path.ndx_list[subset_key]:
# Read .ndx file
with open(config_path.original_path / path_ndx, mode="r") as f:
f_lines = f.readlines()
# Get paths for input.wv1 and output.wav
for line in f_lines:
wav_info = line2path(config_path, line.rstrip("\r\n"), subset_key)
if not wav_info:
continue
input_path, output_path, speaker_id, utterance_id = wav_info
# create a subfolder to avoid having too many files in a single folder
subfolder_idx = global_file_index // config_path.max_file_per_folder
parent_dir = output_path.parent
filename = output_path.name
output_path = parent_dir / f"{subfolder_idx:03d}/{filename}"
global_file_index += 1
# read input, write output
frame_rate, audio = read_audio(config_path.original_path / input_path)
write_wav(config_path.output_path / output_path, frame_rate, audio)
wav_samples = audio.shape[0]
# Add info into dict
raw_wav_dict[utterance_id] = {
"raw_wav_path": output_path,
"original_wv1_path": input_path,
"speaker_id": speaker_id,
"utterance_id": utterance_id,
"n_samples": wav_samples,
"frame_rate": frame_rate,
}
# Dump info dict into json
path_raw_wav_json = (
config_path.raw_wav["path"]
/ subset_key
/ config_path.raw_wav["metadata_file"]
)
with open(config_path.output_path / path_raw_wav_json, mode="w") as f:
# the extended encoder allows to save numpy arrays, values and Path objects to JSON
json.dump(raw_wav_dict, f, indent=4, cls=ExtendedEncoder)
# Subset finished
print(
f"{subset_key}: finished converting "
f"{len(raw_wav_dict)}/{config_path.subset_n_files[subset_key]} files"
)
if len(raw_wav_dict) != config_path.subset_n_files[subset_key]:
print(
"Error: the number of files actually "
"converted is different from expected!"
)
# main
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Creates all the configuration files")
parser.add_argument("config", type=Path, help="Path to configuration file")
parser.add_argument(
"original_dataset_paths",
type=Path,
help="Path to folders containing original datasets",
)
parser.add_argument(
"output_path", type=Path, help="Path to destination folder for the output"
)
args = parser.parse_args()
with open(args.config, "r") as f:
config = json.load(f)
config_path = get_paths(
config=config,
original_path=args.original_dataset_paths,
output_path=args.output_path,
)
make_raw_wav(config, config_path)