Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

実装にeigenを使用 #2

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[submodule "eigen"]
path = eigen
url = https://gitlab.com/libeigen/eigen.git
[submodule "kissfft"]
path = kissfft
url = https://github.com/mborgerding/kissfft.git
1 change: 1 addition & 0 deletions eigen
Submodule eigen added at d49ede
1 change: 1 addition & 0 deletions kissfft
Submodule kissfft added at 8f47a6
197 changes: 96 additions & 101 deletions librosapp.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
#pragma once

#include <cmath>
#include <complex>
#include <string>
#include <vector>

#include "eigen/Eigen/Core"
#include "eigen/Eigen/Dense"
#include "eigen/unsupported/Eigen/FFT"
#include "kiss_fft.h"

using Eigen::Dynamic;
using Eigen::Matrix;
using Eigen::MatrixXd;
using Eigen::VectorXd;

using std::complex;
using std::string;
using std::vector;

Expand Down Expand Up @@ -47,54 +57,69 @@ vector<vector<kiss_fft_cpx>> transpose(vector<vector<kiss_fft_cpx>> v) {
return res;
}

vector<vector<kiss_fft_cpx>> stft(stft_arg* arg) {
if (arg->hop_length <= 0) {
arg->hop_length = arg->n_fft / 4;
vector<vector<std::complex<float>>> convert_complex_matrix(
Matrix<complex<float>, Dynamic, Dynamic> m) {
auto res_vec = vector<vector<complex<float>>>(m.rows());
for (int i = 0; i < m.rows(); i++) {
for (int j = 0; j < m.cols(); j++) {
std::complex<float> a(m(i, j).real(), m(i, j).imag());
res_vec[i].push_back(a);
}
}
return res_vec;
}

vector<vector<float>> convert_matrix(Matrix<float, Dynamic, Dynamic> m) {
auto res_vec = vector<vector<float>>(m.rows());
for (int i = 0; i < m.rows(); i++) {
for (int j = 0; j < m.cols(); j++) {
res_vec[i].push_back(m(i, j));
}
}
return res_vec;
}

Matrix<complex<float>, Dynamic, Dynamic> stft(stft_arg* arg) {
auto padded_len = arg->y.size() + arg->n_fft / 2 * 2;
auto cx_in = vector<kiss_fft_cpx>(padded_len);
for (int i = 0; i < cx_in.size(); i++) {
cx_in[i] = kiss_fft_cpx();
auto input = Matrix<float, 1, Dynamic>(padded_len);
for (int i = 0; i < padded_len; i++) {
if (i < arg->n_fft / 2 || i >= arg->y.size() + arg->n_fft / 2) {
cx_in[i].r = 0.0f;
cx_in[i].i = 0.0f;
input(i) = 0.0;
} else {
cx_in[i].r = arg->y[i - arg->n_fft / 2];
cx_in[i].i = 0.0f;
input(i) = arg->y[i - arg->n_fft / 2];
}
}

auto hanning = vector<float>(arg->n_fft);
auto hanning = Matrix<float, 1, Dynamic>(1, arg->n_fft);
for (int i = 0; i < arg->n_fft; i++) {
auto n = 1 - arg->n_fft + 2 * i;
hanning[i] =
hanning(i) =
0.5 + 0.5 * cosf(LIBROSA_PI * float(n) / float(arg->n_fft - 1));
}

kiss_fft_cfg cfg = kiss_fft_alloc(arg->n_fft, false, 0, 0);

auto cols = (cx_in.size() - arg->n_fft) / arg->hop_length + 1;
auto result = vector<vector<kiss_fft_cpx>>(
cols, vector<kiss_fft_cpx>(arg->n_fft / 2 + 1));
auto fft_input = vector<kiss_fft_cpx>(arg->n_fft);
auto fft_result = vector<kiss_fft_cpx>(arg->n_fft);
auto cols = (padded_len - arg->n_fft) / arg->hop_length + 1;
auto result =
Matrix<complex<float>, Dynamic, Dynamic>(cols, arg->n_fft / 2 + 1);
auto fft_input = vector<float>(arg->n_fft);
auto fft_result = vector<complex<float>>(arg->n_fft);
for (int col = 0; col < cols; col++) {
auto start = col * arg->hop_length;
auto fft_input_m =
input(0, Eigen::seqN(start, arg->n_fft)).cwiseProduct(hanning);

for (int i = 0; i < arg->n_fft; i++) {
fft_input[i].r = cx_in[start + i].r * hanning[i];
fft_input[i].i = 0.0f;
fft_input[i] = fft_input_m(0, i);
}
kiss_fft(cfg, fft_input.data(), fft_result.data());

Eigen::FFT<float> fft;
fft.fwd(fft_result, fft_input);

for (int j = 0; j < arg->n_fft / 2 + 1; j++) {
result[col][j].r = fft_result[j].r;
result[col][j].i = fft_result[j].i;
result(col, j) = fft_result[j];
}
}
auto res = librosa::transpose(result);

kiss_fft_free(cfg);
return res;
return result.transpose();
}

namespace core {
Expand Down Expand Up @@ -128,29 +153,27 @@ vector<float> hz_to_mel(vector<float> freqs, bool htk = false) {
return mels;
}

vector<float> mel_to_hz(vector<float> mels, bool htk = false) {
vector<float> freqs(mels.size());
VectorXd mel_to_hz(VectorXd mels, bool htk = false) {
VectorXd freqs(mels.size());
if (htk) {
for (int i = 0; i < mels.size(); i++) {
freqs[i] = 700.0 * (powf(10.0, mels[i] / 2595.0) - 1.0);
freqs(i) = 700.0 * (powf(10.0, mels(i) / 2595.0) - 1.0);
}
return freqs;
}

float f_min = 0.0;
float f_sp = 200.0 / 3;

for (int i = 0; i < mels.size(); i++) {
freqs[i] = f_min + f_sp * mels[i];
}
freqs = f_sp * mels + VectorXd::Constant(mels.size(), f_min);

float min_log_hz = 1000.0;
float min_log_mel = (min_log_hz - f_min) / f_sp;
float logstep = logf(6.4) / 27.0;

for (int i = 0; i < mels.size(); i++) {
if (mels[i] >= min_log_mel) {
freqs[i] = min_log_hz * expf(logstep * (mels[i] - min_log_mel));
if (mels(i) >= min_log_mel) {
freqs(i) = min_log_hz * expf(logstep * (mels(i) - min_log_mel));
}
}

Expand All @@ -171,17 +194,13 @@ struct mel_frequencies_arg {
}
};

vector<float> mel_frequencies(mel_frequencies_arg* arg) {
VectorXd mel_frequencies(mel_frequencies_arg* arg) {
auto fmin_v = vector<float>(1, arg->fmin);
auto fmax_v = vector<float>(1, arg->fmax);
float min_mel = hz_to_mel(fmin_v, arg->htk)[0];
float max_mel = hz_to_mel(fmax_v, arg->htk)[0];

auto step = (max_mel - min_mel) / float(arg->n_mels - 1);
vector<float> mels = vector<float>(arg->n_mels);
for (int i = 0; i < arg->n_mels; i++) {
mels[i] = min_mel + step * float(i);
}
auto mels = VectorXd::LinSpaced(arg->n_mels, min_mel, max_mel);

return mel_to_hz(mels, arg->htk);
}
Expand All @@ -206,28 +225,19 @@ struct _spectrogram_arg {
}
};

vector<vector<float>> _spectrogram(_spectrogram_arg* arg) {
Matrix<float, Dynamic, Dynamic> _spectrogram(_spectrogram_arg* arg) {
stft_arg s_arg;
s_arg.y = arg->y;
s_arg.n_fft = arg->n_fft;
s_arg.hop_length = arg->hop_length;

auto stft_result = stft(&s_arg);
if (stft_result.size() == 0) {
return vector<vector<float>>();
if (stft_result.rows() == 0) {
return Matrix<float, Dynamic, Dynamic>(0, 0);
}

auto spec = vector<vector<float>>(stft_result.size(),
vector<float>(stft_result[0].size()));
for (int i = 0; i < stft_result.size(); i++) {
for (int j = 0; j < stft_result[i].size(); j++) {
auto norm = sqrtf(powf(stft_result[i][j].i, 2.0) +
powf(stft_result[i][j].r, 2.0));

spec[i][j] = powf(norm, arg->power);
}
}
return spec;
auto a = stft_result.cwiseAbs();
return a.pow(arg->power);
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ここでreturn a;とするとビルドが通る
return a.pow(arg->power);とするとビルドが通らない(powerのところを2.0とかにしても同じく)

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a.unaryExpr([](double d) { return std::pow(d, arg->power); })で解決

}
} // namespace spectrum
} // namespace core
Expand All @@ -253,18 +263,16 @@ struct mel_arg {
}
};

vector<vector<float>> mel(mel_arg* arg) {
Matrix<float, Dynamic, Dynamic> mel(mel_arg* arg) {
int length = 1 + arg->n_fft / 2;
if (arg->fmax < 0.0) {
arg->fmax = float(arg->sr) / 2.0;
}

vector<vector<float>> weights(arg->n_mels, vector<float>(length));
Matrix<float, Dynamic, Dynamic> weights(arg->n_mels, length);

vector<float> fft_freqs(length);
for (int i = 0; i < length; i++) {
fft_freqs[i] = float(arg->sr) / float(arg->n_fft) * float(i);
}
auto fft_freqs = VectorXd::LinSpaced(length, 0.0, float(length - 1)) *
float(arg->sr) / float(arg->n_fft);

librosa::core::convert::mel_frequencies_arg mel_freq_arg;
mel_freq_arg.fmin = arg->fmin;
Expand All @@ -273,51 +281,39 @@ vector<vector<float>> mel(mel_arg* arg) {
mel_freq_arg.htk = arg->htk;
auto mel_f = librosa::core::convert::mel_frequencies(&mel_freq_arg);

vector<float> fdiff(mel_f.size() - 1);
VectorXd fdiff(mel_f.size() - 1);
for (int i = 0; i < fdiff.size(); i++) {
fdiff[i] = mel_f[i + 1] - mel_f[i];
fdiff(i) = mel_f(i + 1) - mel_f(i);
}

vector<vector<float>> ramps(mel_f.size(), vector<float>(fft_freqs.size()));
// TODO: 流石にどうにかできそう
Matrix<float, Dynamic, Dynamic> ramps(mel_f.size(), fft_freqs.size());
for (int i = 0; i < mel_f.size(); i++) {
for (int j = 0; j < fft_freqs.size(); j++) {
ramps[i][j] = mel_f[i] - fft_freqs[j];
ramps(i, j) = mel_f(i) - fft_freqs(j);
}
}

auto lower = vector<float>(fft_freqs.size());
auto upper = vector<float>(fft_freqs.size());
auto lower = VectorXd(fft_freqs.size());
auto upper = VectorXd(fft_freqs.size());
for (int i = 0; i < arg->n_mels; i++) {
for (int j = 0; j < lower.size(); j++) {
lower[j] = -1 * ramps[i][j] / fdiff[i];
}
auto lower = -1.0 * ramps(i, Eigen::placeholders::all) / fdiff(i);
// for (int j = 0; j < lower.size(); j++) {
// lower[j] = -1 * ramps[i][j] / fdiff[i];
// }

for (int j = 0; j < lower.size(); j++) {
upper[j] = ramps[i + 2][j] / fdiff[i + 1];
}
auto upper = ramps(i + 2, Eigen::placeholders::all) / fdiff(i + 1);
// for (int j = 0; j < lower.size(); j++) {
// upper[j] = ramps[i + 2][j] / fdiff[i + 1];
// }

// weights[i] = np.maximum(0, np.minimum(lower, upper));
for (int j = 0; j < lower.size(); j++) {
auto lower_upper_minimum = 0.0;
if (lower[j] > upper[j]) {
lower_upper_minimum = upper[j];
} else {
lower_upper_minimum = lower[j];
}

if (lower_upper_minimum > 0.0) {
weights[i][j] = lower_upper_minimum;
} else {
weights[i][j] = 0.0;
}
}
weights(i, Eigen::placeholders::all) = lower.cwiseMin(upper).cwiseMax(0.0);
}

for (int i = 0; i < arg->n_mels; i++) {
auto enorm = 2.0 / (mel_f[2 + i] - mel_f[i]);
for (int j = 0; j < length; j++) {
weights[i][j] = enorm * weights[i][j];
}
auto enorm = 2.0 / (mel_f(2 + i) - mel_f(i));
weights(i, Eigen::placeholders::all) *= enorm;
}

return weights;
Expand Down Expand Up @@ -349,7 +345,7 @@ struct melspectrogram_arg {
}
};

vector<vector<float>> melspectrogram(melspectrogram_arg* arg) {
Matrix<float, Dynamic, Dynamic> melspectrogram(melspectrogram_arg* arg) {
librosa::core::spectrum::_spectrogram_arg spec_arg;
spec_arg.y = arg->y;
spec_arg.n_fft = arg->n_fft;
Expand All @@ -358,8 +354,8 @@ vector<vector<float>> melspectrogram(melspectrogram_arg* arg) {
spec_arg.power = arg->power;

auto S = librosa::core::spectrum::_spectrogram(&spec_arg);
if (S.size() == 0) {
return vector<vector<float>>();
if (S.rows() == 0) {
return Matrix<float, Dynamic, Dynamic>(0, 0);
}

librosa::filters::mel_arg mel_arg;
Expand All @@ -371,15 +367,14 @@ vector<vector<float>> melspectrogram(melspectrogram_arg* arg) {
auto mel_basis = librosa::filters::mel(&mel_arg);

// return np.einsum("...ft,mf->...mt", S, mel_basis, optimize=True)
auto melspec =
vector<vector<float>>(mel_basis.size(), vector<float>(S[0].size()));
for (int m = 0; m < mel_basis.size(); m++) {
for (int t = 0; t < S[0].size(); t++) {
Matrix<float, Dynamic, Dynamic> melspec(mel_basis.rows(), S.cols());
for (int m = 0; m < mel_basis.rows(); m++) {
for (int t = 0; t < S.cols(); t++) {
float val = 0.0;
for (int f = 0; f < S.size(); f++) {
val += S[f][t] * mel_basis[m][f];
for (int f = 0; f < S.rows(); f++) {
val += S(f, t) * mel_basis(m, f);
}
melspec[m][t] = val;
melspec(m, t) = val;
}
}

Expand Down
Loading