Skip to content

Commit

Permalink
update readme
Browse files Browse the repository at this point in the history
  • Loading branch information
haoheliu committed Oct 1, 2021
1 parent ef5cb1c commit 7ac6299
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,17 @@ from voicefixer import Vocoder
voicefixer = VoiceFixer()
# Speech restoration

# Mode 0
# Mode 0: Original Model (suggested by default)
voicefixer.restore(input="", # input wav file path
output="", # output wav file path
cuda=False, # whether to use gpu acceleration
mode = 0) # You can try out mode 0, 1, 2 to find out the best result
# Mode 1
# Mode 1: Add preprocessing module (remove higher frequency)
voicefixer.restore(input="", # input wav file path
output="", # output wav file path
cuda=False, # whether to use gpu acceleration
mode = 1) # You can try out mode 0, 1, 2 to find out the best result
# Mode 2
# Mode 2: Train mode (might work sometimes on seriously degraded real speech)
voicefixer.restore(input="", # input wav file path
output="", # output wav file path
cuda=False, # whether to use gpu acceleration
Expand Down
4 changes: 2 additions & 2 deletions test/streamlit.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def init_voicefixer():
st.write('Inference : ')

# choose options
mode = st.radio('Voice fixer mode (0: rm high frequency, 1: none, 2: train fixer)', [0, 1, 2])
mode = st.radio('Voice fixer modes (0: original mode, 1: Add preprocessing module 2: Train mode (may work sometimes on seriously degraded speech))', [0, 1, 2])
if torch.cuda.is_available():
is_cuda = st.radio('Turn on GPU', [True, False])
if is_cuda != list(voice_fixer._model.parameters())[0].is_cuda:
Expand Down Expand Up @@ -62,5 +62,5 @@ def init_voicefixer():
# make buffer
with BytesIO() as buffer:
soundfile.write(buffer, pred_wav.T, samplerate=sample_rate, format='WAV')
st.write("Time: {:.3f}".format(pred_time))
st.write("Time: {:.3f}s".format(pred_time))
st.audio(buffer.getvalue(), format='audio/wav')
11 changes: 6 additions & 5 deletions voicefixer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,18 @@ def remove_higher_frequency(self, wav, ratio=0.95):
stft = librosa.stft(wav)
real, img = np.real(stft), np.imag(stft)
mag = (real ** 2 + img ** 2) ** 0.5
cos, sin = real / mag, img / mag
cos, sin = real / (mag+EPS), img / (mag+EPS)
spec = np.abs(stft) # [1025,T]
feature = spec.copy()
feature = np.log10(feature)
feature = np.log10(feature+EPS)
feature[feature < 0] = 0
energy_level = np.sum(feature, axis=1)
threshold = np.sum(energy_level) * ratio
curent_level, i = energy_level[0], 0
while (i < energy_level.shape[0] and curent_level < threshold):
curent_level += energy_level[i + 1, ...]
i += 1
print(i)
spec[i:, ...] = np.zeros_like(spec[i:, ...])
stft = spec * cos + 1j * spec * sin
return librosa.istft(stft)
Expand All @@ -92,13 +93,13 @@ def restore_inmem(self, wav_10k, cuda=False, mode=0, your_vocoder_func=None):
elif(mode == 2):
self._model.train() # More effective on seriously demaged speech

if(mode == 0):
wav_10k = self.remove_higher_frequency(wav_10k)
res = []
seg_length = 44100*60
seg_length = 44100*30
break_point = seg_length
while break_point < wav_10k.shape[0]+seg_length:
segment = wav_10k[break_point-seg_length:break_point]
if (mode == 1):
segment = self.remove_higher_frequency(segment)
sp,mel_noisy = self._pre(self._model, segment, cuda)
out_model = self._model(sp, mel_noisy)
denoised_mel = from_log(out_model['mel'])
Expand Down

0 comments on commit 7ac6299

Please sign in to comment.