Skip to content

Commit

Permalink
Merge pull request #5 from YuzukiTsuru/Shine
Browse files Browse the repository at this point in the history
Shine
  • Loading branch information
YuzukiTsuru authored Jun 3, 2022
2 parents 214f453 + 610c763 commit eda0135
Show file tree
Hide file tree
Showing 26 changed files with 412 additions and 180 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/cmake-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: CMake macOS

on:
push:
branches: [ master ]
branches: "*"
pull_request:
branches: [ master ]
branches: "*"

env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/cmake-ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: CMake Ubuntu

on:
push:
branches: [ master ]
branches: "*"
pull_request:
branches: [ master ]
branches: "*"

env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/cmake-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: CMake Windows

on:
push:
branches: [ master ]
branches: "*"
pull_request:
branches: [ master ]
branches: "*"

env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
Expand Down
1 change: 1 addition & 0 deletions src/AudioModel/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Library Audio Model

add_subdirectory(WorldModule)
add_subdirectory(Synthesis)

file(GLOB audio_model_source *.cpp *.h)
add_library(AudioModel ${audio_model_source})
Expand Down
48 changes: 48 additions & 0 deletions src/AudioModel/Synthesis/Synthesis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,51 @@
//

#include "Synthesis.h"

#include <world/synthesis.h>

#include "Utils/LOG.h"
#include "world/synthesisrealtime.h"

Synthesis::Synthesis(lessAudioModel audioModel, int x_length) : x_length(x_length) {
YALL_DEBUG_ << "Allocate Memory for output wav, length: " + std::to_string(x_length);
AllocateMemory();

}

void Synthesis::AllocateMemory() {
x = new double[x_length];
}

void Synthesis::SynthesisWav() const {
WorldSynthesizer synthesizer = {0};
int buffer_size = 64;
InitializeSynthesizer(audioModel.fs, audioModel.frame_period,
audioModel.fft_size, buffer_size, 100, &synthesizer);

int offset = 0;
int index = 0;
for (int i = 0; i < audioModel.f0_length;) {
// Add one frame ('i' shows the frame index that should be added)
if (AddParameters(&audioModel.f0[i], 1, &audioModel.spectrogram[i], &audioModel.aperiodicity[i], &synthesizer) == 1) {
++i;
}

// Synthesize speech with length of buffer_size sample.
// It is repeated until the function returns 0
// (it suggests that the synthesizer cannot generate speech).
while (Synthesis2(&synthesizer) != 0) {
index = offset * buffer_size;
for (int j = 0; j < buffer_size; ++j)
x[j + index] = synthesizer.buffer[j];
offset++;
}

// Check the "Lock" (Please see synthesisrealtime.h)
if (IsLocked(&synthesizer) == 1) {
YALL_WARN_ << "Synthesis Buffer Locked";
break;
}
}
DestroySynthesizer(&synthesizer);
}
24 changes: 24 additions & 0 deletions src/AudioModel/Synthesis/Synthesis.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,33 @@
#ifndef LESSAMPLER_SYNTHESIS_H
#define LESSAMPLER_SYNTHESIS_H

#include <iostream>

#include "AudioModel/lessAudioModel.h"

class SynthesisPara {
public:
int fs;
int f0_length;
double *f0;
double **spectrogram;
double **aperiodicity;
};

class Synthesis {
public:
explicit Synthesis(lessAudioModel audioModel, int x_length);

private:
double *x = nullptr;
int x_length = 0;

lessAudioModel audioModel{};

private:
void AllocateMemory();

void SynthesisWav() const;
};


Expand Down
8 changes: 0 additions & 8 deletions src/AudioModel/lessAudioModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,4 @@ class lessAudioModel {
int fft_size = 0;
};

class TransAudioModel {
public:
int t_f0_length;
double *t_f0;
double **t_spectrogram;
double **t_aperiodicity;
};

#endif //LESSAMPLER_LESSAUDIOMODEL_H
109 changes: 38 additions & 71 deletions src/AudioProcess/AduioProcess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,80 +18,45 @@
//
#include <cmath>
#include <utility>
#include <cstring>

#include "Utils/exception.h"
#include "Utils/LOG.h"
#include "AudioProcess.h"
#include "libUTAU/PitchBendDecoder.h"

AduioProcess::AduioProcess(lessAudioModel audioModel, UTAUPara utauPara, UTAUFlags flags) : audioModel(audioModel), utauPara(std::move(utauPara)),
flags(flags) {
AudioProcess::AudioProcess(lessAudioModel audioModel, ShinePara shine) : audioModel(audioModel), shine(std::move(shine)) {
YALL_DEBUG_ << "Equalizing Picth...";
PicthEqualizing();
YALL_DEBUG_ << "Decode Pitch Bend...";
DecodePitchBend();
YALL_DEBUG_ << "Time Stretch...";
TimeStretch();
}

TransAudioModel AduioProcess::GetTransAudioModel() {
lessAudioModel AudioProcess::GetTransAudioModel() {
return transAudioModel;
}

void AduioProcess::PicthEqualizing() {
void AudioProcess::PicthEqualizing() {
auto freq_avg = GetAvgFreq();
YALL_DEBUG_ << "The average frequency is " + std::to_string(freq_avg);
if (freq_avg == 0.0) {
for (int i = 0; i < audioModel.f0_length; ++i) {
if (audioModel.f0[i] != 0.0) {
audioModel.f0[i] = utauPara.scaleNum;
audioModel.f0[i] = shine.scale_num;
} else {
audioModel.f0[i] = 0;
}
}
} else {
for (int i = 0; i < audioModel.f0_length; ++i) {
if (audioModel.f0[i] != 0.0) {
audioModel.f0[i] = ((audioModel.f0[i] - freq_avg) * utauPara.modulation / 100.0 + freq_avg) * (utauPara.scaleNum / freq_avg);
audioModel.f0[i] = ((audioModel.f0[i] - freq_avg) * shine.modulation / 100.0 + freq_avg) * (shine.scale_num / freq_avg);
} else {
audioModel.f0[i] = 0;
}
}
}
}

void AduioProcess::DecodePitchBend() {
if (utauPara.tempoNum == 0)
utauPara.tempoNum = 120;

if (utauPara.isCustomPitch) {
pitch_step = static_cast<int>(lround(60.0 / 96.0 / utauPara.tempoNum * audioModel.fs));
pitch_length = utauPara.output_samples / pitch_step + 1;

YALL_DEBUG_ << "The Pitch Length is: " + std::to_string(pitch_length);

PitchBendDecoder pitchBendDecoder(utauPara.pitch, pitch_length);

utauPara.pitch_bend = new int[pitch_length + 1];
for (int i = 0; i < pitch_length + 1; ++i) {
utauPara.pitch_bend[i] = 0;
}

std::memcpy(utauPara.pitch_bend, pitchBendDecoder.getPitchBend(), sizeof(int) * pitch_length);
} else {
utauPara.pitch_bend = new int[pitch_length + 1];
for (int i = 0; i < pitch_length + 1; ++i) {
utauPara.pitch_bend[i] = 0;
}
}

required_frame = static_cast<int>(1000.0 * utauPara.output_samples / audioModel.fs / audioModel.frame_period) + 1;
YALL_DEBUG_ << "The required frame is: " + std::to_string(required_frame);
transAudioModel.t_f0_length = required_frame;
}

double AduioProcess::GetAvgFreq() const {
double AudioProcess::GetAvgFreq() const {
double freq_avg = 0.0, timePercent, r, p[6], q, base_timePercent = 0;
for (int i = 0; i < audioModel.f0_length; ++i) {
timePercent = audioModel.f0[i];
Expand All @@ -114,25 +79,27 @@ double AduioProcess::GetAvgFreq() const {
return freq_avg;
}

void AduioProcess::TimeStretch() {
void AudioProcess::TimeStretch() {
YALL_DEBUG_ << "Allocate memory for target audio f0, sp, ap";

if (transAudioModel.t_f0_length == 0)
if (shine.required_frame == 0)
throw parameter_error("The target audio frame length is 0");

transAudioModel.t_f0 = new double[transAudioModel.t_f0_length];
for (int i = 0; i < transAudioModel.t_f0_length; ++i) {
transAudioModel.t_f0[i] = 0.0;
transAudioModel.f0_length = shine.required_frame;

transAudioModel.f0 = new double[transAudioModel.f0_length];
for (int i = 0; i < transAudioModel.f0_length; ++i) {
transAudioModel.f0[i] = 0.0;
}

transAudioModel.t_spectrogram = new double *[transAudioModel.t_f0_length];
transAudioModel.t_aperiodicity = new double *[transAudioModel.t_f0_length];
for (int i = 0; i < transAudioModel.t_f0_length; ++i) {
transAudioModel.t_spectrogram[i] = new double[audioModel.w_length];
transAudioModel.t_aperiodicity[i] = new double[audioModel.w_length];
transAudioModel.spectrogram = new double *[transAudioModel.f0_length];
transAudioModel.aperiodicity = new double *[transAudioModel.f0_length];
for (int i = 0; i < transAudioModel.f0_length; ++i) {
transAudioModel.spectrogram[i] = new double[audioModel.w_length];
transAudioModel.aperiodicity[i] = new double[audioModel.w_length];
for (int j = 0; j < audioModel.w_length; ++j) {
transAudioModel.t_spectrogram[i][j] = 0.0;
transAudioModel.t_aperiodicity[i][j] = 0.0;
transAudioModel.spectrogram[i][j] = 0.0;
transAudioModel.aperiodicity[i][j] = 0.0;
}
}

Expand All @@ -142,12 +109,12 @@ void AduioProcess::TimeStretch() {
double _sample_sp_trans_index, _sample_ap_trans_index, _out_sample_index, _in_sample_index;
int _sp_trans_index, _ap_trans_index;

for (int i = 0; i < transAudioModel.t_f0_length; ++i) {
for (int i = 0; i < transAudioModel.f0_length; ++i) {
_out_sample_index = audioModel.frame_period * i;
if (_out_sample_index < utauPara.base_length) {
_in_sample_index = utauPara.offset + _out_sample_index * utauPara.velocity;
if (_out_sample_index < shine.base_length) {
_in_sample_index = shine.offset + _out_sample_index * shine.velocity;
} else {
_in_sample_index = utauPara.offset + utauPara.firstHalfFixedPart + (_out_sample_index - utauPara.base_length) * utauPara.stretch_length;
_in_sample_index = shine.offset + shine.first_half_fixed_part + (_out_sample_index - shine.base_length) * shine.stretch_length;
}
YALL_DEBUG_ << "_in_sample_index -> " + std::to_string(_in_sample_index);
YALL_DEBUG_ << "_out_sample_index -> " + std::to_string(_out_sample_index);
Expand All @@ -173,34 +140,34 @@ void AduioProcess::TimeStretch() {
}
}

_sample_ap_trans_index = _out_sample_index * 0.001 * audioModel.fs / pitch_step;
_sample_ap_trans_index = _out_sample_index * 0.001 * audioModel.fs / shine.pitch_step;
_ap_trans_index = static_cast<int>(floor(_sample_ap_trans_index));
_sample_ap_trans_index -= _ap_trans_index;

if (_ap_trans_index >= pitch_length) {
_ap_trans_index = pitch_length - 1;
if (_ap_trans_index >= shine.pitch_length) {
_ap_trans_index = shine.pitch_length - 1;
_sample_sp_trans_index = 0.0;
}

YALL_DEBUG_ << "_ap_trans_index -> " + std::to_string(_ap_trans_index);
YALL_DEBUG_ << "_sample_ap_trans_index -> " + std::to_string(_ap_trans_index + _sample_ap_trans_index);

YALL_DEBUG_ << "Apply Pitch Shift With Pitch Bend";
auto pitch_base = utauPara.scaleNum * pow(2, (utauPara.pitch_bend[_ap_trans_index] * (1.0 - _sample_ap_trans_index) +
utauPara.pitch_bend[_ap_trans_index + 1] * _sample_ap_trans_index) / 1200.0);
auto pitch_base = shine.scale_num * pow(2, (shine.pitch_bend[_ap_trans_index] * (1.0 - _sample_ap_trans_index) +
shine.pitch_bend[_ap_trans_index + 1] * _sample_ap_trans_index) / 1200.0);

YALL_DEBUG_ << "Trans F0 " + std::to_string(transAudioModel.t_f0[i]) + " Add " + std::to_string(pitch_base);
transAudioModel.t_f0[i] = pitch_base;
YALL_DEBUG_ << "Trans F0 " + std::to_string(transAudioModel.f0[i]) + " Add " + std::to_string(pitch_base);
transAudioModel.f0[i] = pitch_base;

transAudioModel.t_f0[i] = transAudioModel.t_f0[i] * pow(temp_f0 / avg_freq, utauPara.modulation * 0.01);
transAudioModel.f0[i] = transAudioModel.f0[i] * pow(temp_f0 / avg_freq, shine.modulation * 0.01);

YALL_DEBUG_ << "Trans SP ";
for (int j = 0; j < audioModel.w_length; ++j) {
if (_sp_trans_index < audioModel.f0_length - 1) {
transAudioModel.t_spectrogram[i][j] = audioModel.spectrogram[_sp_trans_index][j] * (1.0 - _sample_sp_trans_index) +
transAudioModel.spectrogram[i][j] = audioModel.spectrogram[_sp_trans_index][j] * (1.0 - _sample_sp_trans_index) +
audioModel.spectrogram[_sp_trans_index + 1][j] * _sample_sp_trans_index;
} else {
transAudioModel.t_spectrogram[i][j] = audioModel.spectrogram[audioModel.f0_length - 1][j];
transAudioModel.spectrogram[i][j] = audioModel.spectrogram[audioModel.f0_length - 1][j];
}
}

Expand All @@ -212,15 +179,15 @@ void AduioProcess::TimeStretch() {

for (int j = 0; j < audioModel.w_length; ++j) {
if (_ap_trans_index < audioModel.f0_length) {
transAudioModel.t_aperiodicity[i][j] = audioModel.aperiodicity[_ap_trans_index][j];
transAudioModel.aperiodicity[i][j] = audioModel.aperiodicity[_ap_trans_index][j];
} else {
transAudioModel.t_aperiodicity[i][j] = audioModel.aperiodicity[audioModel.f0_length - 1][j];
transAudioModel.aperiodicity[i][j] = audioModel.aperiodicity[audioModel.f0_length - 1][j];
}
}
}
}

void AduioProcess::interp1(const double *x, const double *y, int x_length, const double *xi, int xi_length, double *yi) {
void AudioProcess::interp1(const double *x, const double *y, int x_length, const double *xi, int xi_length, double *yi) {
auto *h = new double[x_length - 1];
int *k = new int[xi_length];

Expand All @@ -243,7 +210,7 @@ void AduioProcess::interp1(const double *x, const double *y, int x_length, const
delete[] h;
}

void AduioProcess::histc(const double *x, int x_length, const double *edges, int edges_length, int *index) {
void AudioProcess::histc(const double *x, int x_length, const double *edges, int edges_length, int *index) {
int count = 1;

int i = 0;
Expand Down
19 changes: 6 additions & 13 deletions src/AudioProcess/AudioProcess.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,22 @@
#define LESSAMPLER_AUDIOPROCESS_H

#include "AudioModel/lessAudioModel.h"
#include "libUTAU/libUTAU.h"
#include "Shine/ShinePara.h"

class AduioProcess {
class AudioProcess {
public:
AduioProcess(lessAudioModel audioModel, UTAUPara utauPara, UTAUFlags flags);
AudioProcess(lessAudioModel audioModel, ShinePara shine);

TransAudioModel GetTransAudioModel();
lessAudioModel GetTransAudioModel();

private:
lessAudioModel audioModel{};
TransAudioModel transAudioModel{};
UTAUPara utauPara{};
UTAUFlags flags;

int pitch_length = 0;
int pitch_step = 256;
int required_frame = 0;
lessAudioModel transAudioModel{};
ShinePara shine;

private:
void PicthEqualizing();

void DecodePitchBend();

[[nodiscard]] double GetAvgFreq() const;

void TimeStretch();
Expand Down
Loading

0 comments on commit eda0135

Please sign in to comment.