-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict.py
87 lines (67 loc) · 1.99 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import matplotlib.pyplot as plt
import torch
import numpy as np
from mel2lpc.utils import plot, plot_spec, load_wav, save_wav
from mel2lpc.mel2lpc_torch import Audio2Mel, Mel2LPC, LPC2Wav, PreEmphasis
wav_name = 'wavs/vox_1_0.wav'
sample_rate = 44100
n_fft = 2048
num_mels = 128
hop_length = 512
win_length = 2048
lpc_order = 14
clip_lpc = True
mel_fmin = 40
mel_fmax = 16000
wav_data = load_wav(wav_name, sample_rate)
wav_data = torch.tensor(wav_data).unsqueeze(0).unsqueeze(1)
preemph = PreEmphasis(coefficient=0.9375)
preemph_data = preemph(wav_data)
a2w = Audio2Mel(
sampling_rate=sample_rate,
hop_length=hop_length,
win_length=win_length,
n_fft=n_fft,
n_mel_channels=num_mels,
mel_fmin=mel_fmin,
mel_fmax=mel_fmax
)
mel = a2w(preemph_data)
m2l = Mel2LPC(
sampling_rate=sample_rate,
hop_length=hop_length,
win_length=win_length,
n_fft=n_fft,
n_mel_channels=num_mels,
mel_fmin=mel_fmin,
mel_fmax=mel_fmax,
lpc_order=lpc_order
)
LPC_ctrl_mel = m2l(mel.transpose(1, 2))
wav_pred_mel = LPC2Wav(LPC_ctrl_mel, wav_data, lpc_order=lpc_order, clip_lpc=clip_lpc)
# Make sure the predicted audio and the original audio have the same shape
if wav_pred_mel.shape[2] > wav_data.shape[2]:
wav_pred_mel = wav_pred_mel[:, :, :wav_data.shape[2]]
elif wav_pred_mel.shape[2] < wav_data.shape[2]:
wav_data = wav_data[:, :, :wav_pred_mel.shape[2]]
# Compute the residual
residual = wav_data - wav_pred_mel
wav_data = wav_data.squeeze(0).squeeze(0).numpy()
wav_pred_mel = wav_pred_mel.squeeze(0).squeeze(0).numpy()
error = residual.squeeze(0).squeeze(0).numpy()
save_wav(wav_pred_mel, 'wavs/pred.wav', sample_rate)
save_wav(error, 'wavs/error.wav', sample_rate)
fig = plt.figure(figsize=(30, 5))
plt.subplot(311)
plt.ylabel('wav_data')
plt.xlabel('time')
plt.plot(wav_data)
plt.subplot(312)
plt.ylabel('wav_pred_mel')
plt.xlabel('time')
plt.plot(wav_pred_mel)
plt.subplot(313)
plt.ylabel('error')
plt.xlabel('time')
plt.plot(error)
plt.show()