forked from mravanelli/pytorch-kaldi
-
Notifications
You must be signed in to change notification settings - Fork 0
/
save_raw_fea.py
121 lines (93 loc) · 3.92 KB
/
save_raw_fea.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
##########################################################
# pytorch-kaldi v.0.1
# Mirco Ravanelli, Titouan Parcollet
# Mila, University of Montreal
# October 2018
#
# Description: This script generates kaldi ark files containing raw features.
# The file list must be a file containing "snt_id file.wav".
# Note that only wav files are supported here (sphere or other format are not supported)
##########################################################
import scipy.io.wavfile
import math
import numpy as np
import os
from data_io import read_vec_int_ark, write_mat
# Run it for all the data chunks (e.g., train, dev, test) => uncomment
lab_folder = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_test"
lab_opts = "ali-to-pdf"
out_folder = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/data/raw_TIMIT_200ms/test"
wav_lst = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/data/test/wav.lst"
scp_file_out = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/data/raw_TIMIT_200ms/test/feats_raw.scp"
# lab_folder='quick_test/dnn4_pretrain-dbn_dnn_ali_dev'
# lab_opts='ali-to-pdf'
# out_folder='raw_TIMIT_200ms/dev'
# wav_lst='/home/mirco/pytorch-kaldi-new/quick_test/data/dev/wav_lst.scp'
# scp_file_out='quick_test/data/dev/feats_raw.scp'
# lab_folder='quick_test/dnn4_pretrain-dbn_dnn_ali_test'
# lab_opts='ali-to-pdf'
# out_folder='raw_TIMIT_200ms/test'
# wav_lst='/home/mirco/pytorch-kaldi-new/quick_test/data/test/wav_lst.scp'
# scp_file_out='quick_test/data/test/feats_raw.scp'
sig_fs = 16000 # Hz
sig_wlen = 200 # ms
lab_fs = 16000 # Hz
lab_wlen = 25 # ms
lab_wshift = 10 # ms
sig_wlen_samp = int((sig_fs * sig_wlen) / 1000)
lab_wlen_samp = int((lab_fs * lab_wlen) / 1000)
lab_wshift_samp = int((lab_fs * lab_wshift) / 1000)
# Create the output folder
try:
os.stat(out_folder)
except:
os.makedirs(out_folder)
# Creare the scp file
scp_file = open(scp_file_out, "w")
# reading the labels
lab = {
k: v
for k, v in read_vec_int_ark(
"gunzip -c " + lab_folder + "/ali*.gz | " + lab_opts + " " + lab_folder + "/final.mdl ark:- ark:-|", out_folder
)
}
# reading the list file
with open(wav_lst) as f:
sig_lst = f.readlines()
sig_lst = [x.strip() for x in sig_lst]
for sig_file in sig_lst:
sig_id = sig_file.split(" ")[0]
sig_path = sig_file.split(" ")[1]
[fs, signal] = scipy.io.wavfile.read(sig_path)
signal = signal.astype(float) / 32768
signal = signal / np.max(np.abs(signal))
cnt_fr = 0
beg_samp = 0
frame_all = []
while beg_samp + lab_wlen_samp < signal.shape[0]:
sample_fr = np.zeros(sig_wlen_samp)
central_sample_lab = int(((beg_samp + lab_wlen_samp / 2) - 1))
central_fr_index = int(((sig_wlen_samp / 2) - 1))
beg_signal_fr = int(central_sample_lab - (sig_wlen_samp / 2))
end_signal_fr = int(central_sample_lab + (sig_wlen_samp / 2))
if beg_signal_fr >= 0 and end_signal_fr <= signal.shape[0]:
sample_fr = signal[beg_signal_fr:end_signal_fr]
else:
if beg_signal_fr < 0:
n_left_samples = central_sample_lab
sample_fr[central_fr_index - n_left_samples + 1 :] = signal[0:end_signal_fr]
if end_signal_fr > signal.shape[0]:
n_right_samples = signal.shape[0] - central_sample_lab
sample_fr[0 : central_fr_index + n_right_samples + 1] = signal[beg_signal_fr:]
frame_all.append(sample_fr)
cnt_fr = cnt_fr + 1
beg_samp = beg_samp + lab_wshift_samp
frame_all = np.asarray(frame_all)
# Save the matrix into a kaldi ark
out_file = out_folder + "/" + sig_id + ".ark"
write_mat(out_folder, out_file, frame_all, key=sig_id)
print(sig_id)
scp_file.write(sig_id + " " + out_folder + "/" + sig_id + ".ark:" + str(len(sig_id) + 1) + "\n")
N_fr_comp = 1 + math.floor((signal.shape[0] - 400) / 160)
# print("%s %i %i "%(lab[sig_id].shape[0],N_fr_comp,cnt_fr))
scp_file.close()