-
Notifications
You must be signed in to change notification settings - Fork 8
/
pitchfilter.m
129 lines (110 loc) · 3.85 KB
/
pitchfilter.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
function [y,f,g,m] = pitchfilter(d, sr, method, do_crossfade, bpf_r, doplot)
% [Y,F,G,T,M] = pitchfilter(D,SR,METHOD,CROSSFADE,BPF,DOPLOT)
% Enhance a signal by filtering at the harmonics of the detected
% pitch. D@SR is a waveform; run noise-robust pitch tracking
% (via SAcC), then resample the waveform so each frame maps the
% detected pitch to 100 Hz. Then apply a mechanism depending
% on METHOD (default 'comb') to
% emphasize all the harmonics of 100 Hz, then undo the resampling
% to get back to the original pitch.
% CROSSFADE if set causes reconstruction to fade back to original
% when voicing is low.
% BPF if greater than zero is the radius of an BPF pole to reduce
% blurring in the flat-f0 spectral channels.
% DOPLOT generates graphical output.
% F returns the resampled (flattened) signal, and G is F after
% filtering.
% M is the map from D's timebase to F's.
% 2014-05-01 Dan Ellis dpwe@ee.columbia.edu
if nargin < 3 ; method = 'comb'; end
if nargin < 4 ; do_crossfade = 1; end
if nargin < 5 ; bpf_r = 0; end
if nargin < 6 ; doplot = 0; end
% (1) Run SAcC pitch tracker with the noisy rats classifier, and
% with the unvoiced state discounted, to get a near-continuous
% pitch track.
[pitch, pvx, times] = sacc_pitchtrack(d, sr);
% Make all frames have a pitch: fill any zeros with .. something
target_pitch = 100.0;
pitch(find(pitch==0)) = target_pitch;
% Build the time mapping
% Map is a two rows; first row indicates a point in original space
% corresponding element in second row is where it ends up
vmap = [times' ; ...
times(1) + [0, cumsum( pitch(1:end-1)'/target_pitch ...
.* diff(times') ) ] ];
% resample waveform to flatten pitches to target_pitch
dm = resample_map(d', sr, vmap);
% Enhance components at the target pitch period
if strcmp(method, 'comb')
dmf = derumble(enhance_period(dm, round(sr/target_pitch)), sr);
elseif strcmp(method, 'median')
win_t = 15; % median filter time window, in ? 8 ms steps
win_f = 7; % local average window in frequency (center pt not used)
dmf = derumble(sgram_enhance(dm, win_t, win_f), sr);
elseif strcmp(method, 'wiener')
dmf = derumble(wiener_icsi(dm, sr), sr);
elseif strcmp(method, 'pvsmooth')
dmf = derumble(smoothsgram(dm, sr), sr);
else
error(['Unrecognized method - ', method]);
end
if bpf_r > 0
% STFTM-dB-domain band-pass filtering to reduce blurriness
dmf = env_bpf(dmf, bpf_r);
end
% Resample back to original domain
du = resample_map(dmf, sr, inv_map(vmap));
% crossfade based on pvx (interpolated up to sr)
%do_crossfade = 0;
if do_crossfade
pvx_dsfact = round(sr*median(diff(times)));
origgain = 0.1; % crossfade to attenuated original
du = crossfade(du', origgain*derumble(d, sr), pvx, pvx_dsfact);
end
% Return values
y = du;
f = dm;
g = dmf;
m = vmap;
% Plotting carved out to make code more readable
if doplot
%figure(1)
subplot(411)
sg_fft = 1024;
sg_olp = sg_fft - sg_fft/8;
specgram(d, sg_fft, sr, sg_fft, sg_olp);
title('Noisy signal');
cax = [-40 20];
caxis(cax);
colormap(1-gray);
hold on;
plot(times, pitch, '-r', times, 500+pvx*450, '-g');
legend('pitch', 'smoothed pvx');
hold off;
% just pitch region
fmax = 1000;
axis([0 length(d)/sr 0 fmax]);
subplot(412)
specgram(dm, sg_fft, sr, sg_fft, sg_olp);
title(['resampled to pitch = ', num2str(target_pitch), ' Hz']);
caxis(cax);
colormap(1-gray);
axis([0 length(dm)/sr 0 fmax]);
subplot(413)
specgram(dmf, sg_fft, sr, sg_fft, sg_olp);
title(['Filtered - ',method]);
caxis(cax);
colormap(1-gray);
axis([0 length(dm)/sr 0 fmax]);
subplot(414)
specgram(du, sg_fft, sr, sg_fft, sg_olp);
if do_crossfade
title('Resampled back to original pitch and mixed with original');
else
title('Resampled back to original pitch');
end
caxis(cax);
colormap(1-gray);
axis([0 length(du)/sr 0 fmax]);
end