Skip to content

Commit

Permalink
Add silenceThreshold parameter to MFCC (#543)
Browse files Browse the repository at this point in the history
Add silenceThreshold parameter default to 1e-9.

Get rid of pointer to compressor function because it is not flexible
as soon as one needs to run functions with different number of
parameters (amp2db vs linear) or different threshold log values (amb2db
db threshold vs log)
  • Loading branch information
dbogdanov committed Dec 28, 2016
1 parent 060ee72 commit 9a35819
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 28 deletions.
44 changes: 21 additions & 23 deletions src/algorithms/spectral/mfcc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const char* MFCC::name = "MFCC";
const char* MFCC::category = "Spectral";
const char* MFCC::description = DOC("This algorithm computes the mel-frequency cepstrum coefficients of a spectrum. As there is no standard implementation, the MFCC-FB40 is used by default:\n"
" - filterbank of 40 bands from 0 to 11000Hz\n"
" - take the log value of the spectrum energy in each mel band\n"
" - take the log value of the spectrum energy in each mel band. Bands energy values below silence threshold will be clipped to its value before computing log-energies\n"
" - DCT of the 40 bands down to 13 mel coefficients\n"
"There is a paper describing various MFCC implementations [1].\n"
"\n"
Expand Down Expand Up @@ -93,10 +93,13 @@ void MFCC::configure() {
INHERIT("liftering"));
_logbands.resize(parameter("numberBands").toInt());

setCompressor(parameter("logType").toString());

_logType = parameter("logType").toLower();
_silenceThreshold = parameter("silenceThreshold").toReal();
_dbSilenceThreshold = 10 * log10(_silenceThreshold);
_logSilenceThreshold = log(_silenceThreshold);
}


void MFCC::compute() {

// get the inputs and outputs
Expand All @@ -111,30 +114,25 @@ void MFCC::compute() {

// take the dB amplitude of the spectrum
for (int i=0; i<int(bands.size()); ++i) {
_logbands[i] = (*_compressor)(bands[i]);
if (_logType == "dbpow") {
_logbands[i] = pow2db(bands[i], _silenceThreshold, _dbSilenceThreshold);
}
else if (_logType == "dbamp") {
_logbands[i] = amp2db(bands[i], _silenceThreshold, _dbSilenceThreshold);
}
else if (_logType == "log") {
_logbands[i] = lin2log(bands[i], _silenceThreshold, _logSilenceThreshold);
}
else if (_logType == "natural") {
_logbands[i] = bands[i];
}
else {
throw EssentiaException("MFCC: Bad 'logType' parameter");
}
}

// compute the DCT of these bands
_dct->input("array").set(_logbands);
_dct->output("dct").set(mfcc);
_dct->compute();
}

void MFCC::setCompressor(std::string logType){
if (logType == "natural"){
_compressor = linear;
}
else if (logType == "dbpow"){
_compressor = pow2db;
}
else if (logType == "dbamp"){
_compressor = amp2db;
}
else if (logType == "log"){
_compressor = log;
}
else{
throw EssentiaException("MFCC: Bad 'logType' parameter");
}

}
10 changes: 5 additions & 5 deletions src/algorithms/spectral/mfcc.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,10 @@ class MFCC : public Algorithm {
Algorithm* _dct;

std::vector<Real> _logbands;

typedef Real (*funcPointer)(Real);
funcPointer _compressor;

void setCompressor(std::string logType);
std::string _logType;
Real _silenceThreshold;
Real _dbSilenceThreshold;
Real _logSilenceThreshold;

public:
MFCC() {
Expand Down Expand Up @@ -68,6 +67,7 @@ class MFCC : public Algorithm {
declareParameter("weighting", "type of weighting function for determining triangle area","{warping,linear}","warping");
declareParameter("normalize", "'unit_max' makes the vertex of all the triangles equal to 1, 'unit_sum' makes the area of all the triangles equal to 1","{unit_sum,unit_max}", "unit_sum");
declareParameter("type", "use magnitude or power spectrum","{magnitude,power}", "power");
declareParameter("silenceThreshold", "silence threshold for computing log-energy bands", "(0,inf)", 1e-9);
declareParameter("dctType", "the DCT type", "[2,3]", 2);
declareParameter("liftering", "the liftering coefficient. Use '0' to bypass it", "[0,inf)", 0);
declareParameter("logType","logarithmic compression type. Use 'dbpow' if working with power and 'dbamp' if working with magnitudes","{natural,dbpow,dbamp,log}","dbamp");
Expand Down

0 comments on commit 9a35819

Please sign in to comment.