From 2fbd0110e0ecead6643961cb263fe43e24fe619a Mon Sep 17 00:00:00 2001 From: Polochon-street Date: Mon, 27 Apr 2020 22:53:48 +0200 Subject: [PATCH] Add loudness score --- bliss-rs/examples/analyze.rs | 2 +- bliss-rs/src/analyze.rs | 11 +++++++ bliss-rs/src/lib.rs | 4 ++- bliss-rs/src/misc.rs | 61 ++++++++++++++++++++++++++++++++++++ bliss-rs/src/temporal.rs | 2 ++ bliss-rs/src/timbral.rs | 1 + 6 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 bliss-rs/src/misc.rs diff --git a/bliss-rs/examples/analyze.rs b/bliss-rs/examples/analyze.rs index a9bbf35..d2cdeef 100644 --- a/bliss-rs/examples/analyze.rs +++ b/bliss-rs/examples/analyze.rs @@ -11,7 +11,7 @@ fn main() { let args: Vec = env::args().skip(1).collect(); for path in &args { match decode_and_analyze(&path) { - Ok(song) => println!("{:?}: {}", song.analysis.tempo, path), + Ok(song) => println!("{:?}: {}", song.analysis.loudness, path), Err(e) => println!("{}: {}", path, e), } } diff --git a/bliss-rs/src/analyze.rs b/bliss-rs/src/analyze.rs index 0a9e136..fc76592 100644 --- a/bliss-rs/src/analyze.rs +++ b/bliss-rs/src/analyze.rs @@ -11,6 +11,7 @@ use crate::timbral::{ ZeroCrossingRateDesc, }; use crate::decode::decode_song; +use crate::misc::LoudnessDesc; use crate::temporal::BPMDesc; use crate::{Analysis, Song}; @@ -26,6 +27,7 @@ pub fn analyze(song: &Song) -> Analysis { let mut spectral_desc = SpectralDesc::new(song.sample_rate); let mut zcr_desc = ZeroCrossingRateDesc::default(); let mut tempo_desc = BPMDesc::new(song.sample_rate); + let mut loudness_desc = LoudnessDesc::default(); for i in 1..song.sample_array.len() { if (i % SpectralDesc::HOP_SIZE) == 0 { @@ -40,6 +42,13 @@ pub fn analyze(song: &Song) -> Analysis { let end = i; tempo_desc.do_(&song.sample_array[beginning..end]); } + + // Contiguous windows, so WINDOW_SIZE here + if (i % LoudnessDesc::WINDOW_SIZE) == 0 { + let beginning = (i / LoudnessDesc::WINDOW_SIZE - 1) * LoudnessDesc::WINDOW_SIZE; + let end = i; + loudness_desc.do_(&song.sample_array[beginning..end]); + } } Analysis { @@ -48,6 +57,7 @@ pub fn analyze(song: &Song) -> Analysis { zero_crossing_rate: zcr_desc.get_value(), spectral_rolloff: spectral_desc.get_rolloff(), spectral_flatness: spectral_desc.get_flatness(), + loudness: loudness_desc.get_value(), } } @@ -65,6 +75,7 @@ mod tests { zero_crossing_rate: 0.075, spectral_rolloff: 2026.76, spectral_flatness: 0.11, + loudness: -32.79, }; assert!(expected_analysis.approx_eq(&analyze(&song))); } diff --git a/bliss-rs/src/lib.rs b/bliss-rs/src/lib.rs index a08a329..8381bd6 100644 --- a/bliss-rs/src/lib.rs +++ b/bliss-rs/src/lib.rs @@ -1,8 +1,9 @@ // temporarily pub pub mod analyze; pub mod decode; -pub mod timbral; +pub mod misc; pub mod temporal; +pub mod timbral; pub mod utils; pub const CHANNELS: u16 = 1; @@ -28,6 +29,7 @@ pub struct Analysis { pub zero_crossing_rate: f32, pub spectral_rolloff: f32, pub spectral_flatness: f32, + pub loudness: f32, } impl Analysis { diff --git a/bliss-rs/src/misc.rs b/bliss-rs/src/misc.rs new file mode 100644 index 0000000..549c5aa --- /dev/null +++ b/bliss-rs/src/misc.rs @@ -0,0 +1,61 @@ +//! Miscellaneous feature extraction module. +//! +//! Contains various descriptors that don't fit in one of the +//! existing categories. +#[cfg(feature = "aubio-lib")] +extern crate aubio_lib; + +use aubio_rs::level_lin; + +use super::utils::mean; + +/** + * Loudness (in dB) detection object. + * + * It indicates how "loud" a recording of a song is. For a given audio signal, + * this value increases if the amplitude of the signal, and nothing else, is + * increased. + * + * Of course, this makes this result dependent of the recording, meaning + * the same song would yield different loudness on different recordings. Which + * is exactly what we want, given that this is not a music theory project, but + * one that aims at giving the best real-life results. + * + * Ranges between -90 dB (~silence) and ~50 dB. + * + * (This is technically the sound pressure level of the track, but loudness is + * way more visual) + */ +#[derive(Default)] +pub struct LoudnessDesc { + pub values: Vec, +} + +impl LoudnessDesc { + pub const WINDOW_SIZE: usize = 1024; + + pub fn do_(&mut self, chunk: &[f32]) { + let level = level_lin(chunk); + self.values.push(level); + } + + pub fn get_value(&mut self) -> f32 { + 10.0 * (mean(&self.values)).log10() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::decode::decode_song; + + #[test] + fn test_loudness() { + let song = decode_song("data/s16_mono_22_5kHz.flac").unwrap(); + let mut loudness_desc = LoudnessDesc::default(); + for chunk in song.sample_array.chunks_exact(LoudnessDesc::WINDOW_SIZE) { + loudness_desc.do_(&chunk); + } + assert!(0.01 > (-32.7931 - loudness_desc.get_value()).abs()); + } +} diff --git a/bliss-rs/src/temporal.rs b/bliss-rs/src/temporal.rs index a7f3c22..43b32ea 100644 --- a/bliss-rs/src/temporal.rs +++ b/bliss-rs/src/temporal.rs @@ -19,6 +19,8 @@ use aubio_rs::{OnsetMode, Tempo}; * onset detection; it proved to be the best for finding out the BPM of a panel * of songs I had, but it could very well be replaced by something better in the * future. + * + * Ranges from 0 (theoretically...) to 200 BPM. * * (Also, if someone knows a way in aubio to get the correct value of 200 BPM * for "Through the Fire and Flames", please chip in) diff --git a/bliss-rs/src/timbral.rs b/bliss-rs/src/timbral.rs index 308cc08..2f95a6c 100644 --- a/bliss-rs/src/timbral.rs +++ b/bliss-rs/src/timbral.rs @@ -194,6 +194,7 @@ impl ZeroCrossingRateDesc { #[cfg(test)] mod tests { use super::*; + // TODO change these tests to use a fixed vec (maybe?) use crate::decode::decode_song; #[test]