-
Notifications
You must be signed in to change notification settings - Fork 99
/
cpc_pca.py
49 lines (40 loc) · 1.49 KB
/
cpc_pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from sklearn.decomposition import IncrementalPCA
import kaldi_io as ko
import scipy.fftpack as fft
import pickle
''' fit incremental PCA for reducing dimension from 40 to 24 '''
cpc_train = 'cpc/cpc-8/cpc_train.scp' # original cpc feature
cpc_val = 'cpc/cpc-8/cpc_validation.scp'
cpc_eval = 'cpc/cpc-8/cpc_eval.scp'
"""
pca_train = 'cpc/cpc-4/cpc_train'
pca_val = 'cpc/cpc-4/cpc_validation'
pca_eval = 'cpc/cpc-4/cpc_eval'
"""
ipca = IncrementalPCA()
# train pca incrementally with train data
for key,mat in ko.read_mat_scp(cpc_train):
ipca.partial_fit(mat)
with open('pca/ipca8.pkl', 'wb') as f:
pickle.dump(ipca, f)
exit()
"""
# fit validation data with pca
ark_scp_output='ark:| copy-feats ark:- ark,scp:' + pca_val + '.ark,' + pca_val + '.scp'
with ko.open_or_fd(ark_scp_output,'wb') as f:
for key,mat in ko.read_mat_scp(cpc_val):
pca_mat = ipca.transform(mat)
ko.write_mat(f, pca_mat, key=key)
# fit eval data with pca
ark_scp_output='ark:| copy-feats ark:- ark,scp:' + pca_eval + '.ark,' + pca_eval + '.scp'
with ko.open_or_fd(ark_scp_output,'wb') as f:
for key,mat in ko.read_mat_scp(cpc_eval):
pca_mat = ipca.transform(mat)
ko.write_mat(f, pca_mat, key=key)
# fit train data with pca
ark_scp_output='ark:| copy-feats ark:- ark,scp:' + pca_train + '.ark,' + pca_train + '.scp'
with ko.open_or_fd(ark_scp_output,'wb') as f:
for key,mat in ko.read_mat_scp(cpc_train):
pca_mat = ipca.transform(mat)
ko.write_mat(f, pca_mat, key=key)
"""