-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathprep_data_nus.py
executable file
·153 lines (88 loc) · 3.96 KB
/
prep_data_nus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# from __future__ import division
import os,re
import collections
import soundfile as sf
import numpy as np
from scipy.stats import norm
import pyworld as pw
import matplotlib.pyplot as plt
import sys
import h5py
import librosa
import config
import utils
def process_lab_file(filename, stft_len):
lab_f = open(filename)
phos = lab_f.readlines()
lab_f.close()
phonemes=[]
for pho in phos:
st,end,phonote=pho.split()
st = int(np.round(float(st)/0.005804576860324892))
en = int(np.round(float(end)/0.005804576860324892))
if phonote=='pau' or phonote=='br' or phonote == 'sil':
phonote='Sil'
phonemes.append([st,en,phonote])
strings_p = np.zeros((phonemes[-1][1],1))
for i in range(len(phonemes)):
pho=phonemes[i]
value = config.phonemas_nus.index(pho[2])
strings_p[pho[0]:pho[1]+1] = value
return strings_p
def main():
singers = next(os.walk(config.wav_dir_nus))[1]
for singer in singers:
sing_dir = config.wav_dir_nus+singer+'/sing/'
read_dir = config.wav_dir_nus+singer+'/read/'
sing_wav_files=[x for x in os.listdir(sing_dir) if x.endswith('.wav') and not x.startswith('.')]
count = 0
print ("Processing singer %s" % singer)
for lf in sing_wav_files:
audio, fs = librosa.core.load(os.path.join(sing_dir,lf), sr=config.fs)
audio = np.float64(audio)
if len(audio.shape) == 2:
vocals = np.array((audio[:,1]+audio[:,0])/2)
else:
vocals = np.array(audio)
voc_stft = abs(utils.stft(vocals))
out_feats = utils.stft_to_feats(vocals,fs)
strings_p = process_lab_file(os.path.join(sing_dir,lf[:-4]+'.txt'), len(voc_stft))
voc_stft, out_feats, strings_p = utils.match_time([voc_stft, out_feats, strings_p])
hdf5_file = h5py.File(config.voice_dir+'nus_'+singer+'_sing_'+lf[:-4]+'.hdf5', mode='a')
if not "phonemes" in hdf5_file:
hdf5_file.create_dataset("phonemes", [voc_stft.shape[0]], int)
hdf5_file["phonemes"][:,] = strings_p[:,0]
hdf5_file.create_dataset("voc_stft", voc_stft.shape, np.float32)
hdf5_file.create_dataset("feats", out_feats.shape, np.float32)
hdf5_file["voc_stft"][:,:] = voc_stft
hdf5_file["feats"][:,:] = out_feats
hdf5_file.close()
count+=1
utils.progress(count,len(sing_wav_files))
read_wav_files=[x for x in os.listdir(read_dir) if x.endswith('.wav') and not x.startswith('.')]
print ("Processing reader %s" % singer)
count = 0
for lf in read_wav_files:
audio, fs = librosa.core.load(os.path.join(read_dir,lf), sr=config.fs)
audio = np.float64(audio)
if len(audio.shape) == 2:
vocals = np.array((audio[:,1]+audio[:,0])/2)
else:
vocals = np.array(audio)
voc_stft = abs(utils.stft(vocals))
out_feats = utils.stft_to_feats(vocals,fs)
strings_p = process_lab_file(os.path.join(read_dir,lf[:-4]+'.txt'), len(voc_stft))
voc_stft, out_feats, strings_p = utils.match_time([voc_stft, out_feats, strings_p])
hdf5_file = h5py.File(config.voice_dir+'nus_'+singer+'_read_'+lf[:-4]+'.hdf5', mode='a')
if not "phonemes" in hdf5_file:
hdf5_file.create_dataset("phonemes", [voc_stft.shape[0]], int)
hdf5_file["phonemes"][:,] = strings_p[:,0]
hdf5_file.create_dataset("voc_stft", voc_stft.shape, np.float32)
hdf5_file.create_dataset("feats", out_feats.shape, np.float32)
hdf5_file["voc_stft"][:,:] = voc_stft
hdf5_file["feats"][:,:] = out_feats
hdf5_file.close()
count+=1
utils.progress(count,len(read_wav_files))
if __name__ == '__main__':
main()