Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adapt spl, rms & energy to work with Signal and FramedSignal #208

Merged
merged 2 commits into from
Feb 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ API relevant changes:

Other changes:

* SPL and RMS can be computed on `Signal` and `FramedSignal` (#208)
* `num_threads` is passed to `ParallelProcessor` in single mode (#217)
* Use `install_requires` in `setup.py` to specify dependencies (#226)
* Use new Cython build system to build extensions (#227)
Expand Down
141 changes: 95 additions & 46 deletions madmom/audio/signal.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def remix(signal, num_channels):

def resample(signal, sample_rate, **kwargs):
"""
Resample the signal (by calling ffmeg).
Resample the signal.

Parameters
----------
Expand All @@ -234,8 +234,7 @@ def resample(signal, sample_rate, **kwargs):

Notes
-----
This function saves the given signal as a temporary file and reloads it
with the desired sample rate.
This function uses ``ffmpeg`` to resample the signal.

"""
from .ffmpeg import load_ffmpeg_file
Expand Down Expand Up @@ -323,10 +322,9 @@ def trim(signal, where='fb'):
return signal[first:last]


def root_mean_square(signal):
def energy(signal):
"""
Computes the root mean square of the signal. This can be used as a
measurement of power.
Compute the energy of a (framed) signal.

Parameters
----------
Expand All @@ -335,23 +333,61 @@ def root_mean_square(signal):

Returns
-------
rms : float
Root mean square of the signal.
energy : float
Energy of the signal.

Notes
-----
If `signal` is a `FramedSignal`, the energy is computed for each frame
individually.

"""
# compute the energy for every frame of the signal
if isinstance(signal, FramedSignal):
return np.array([energy(frame) for frame in signal])
# make sure the signal is a numpy array
if not isinstance(signal, np.ndarray):
raise TypeError("Invalid type for signal, must be a numpy array.")
# take the abs if the signal is complex
if np.iscomplex(signal).any():
signal = np.abs(signal)
# Note: type conversion needed because of integer overflows
if signal.dtype != np.float:
signal = signal.astype(np.float)
# return
return np.sqrt(np.dot(signal.flatten(), signal.flatten()) / signal.size)
# return energy
return np.dot(signal.flatten(), signal.flatten())


def root_mean_square(signal):
"""
Compute the root mean square of a (framed) signal. This can be used as a
measurement of power.

Parameters
----------
signal : numpy array
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adapt Parameters docstring (signal can also be a FramedSignal)

Signal.

Returns
-------
rms : float
Root mean square of the signal.

Notes
-----
If `signal` is a `FramedSignal`, the root mean square is computed for each
frame individually.

"""
# compute the root mean square for every frame of the signal
if isinstance(signal, FramedSignal):
return np.array([root_mean_square(frame) for frame in signal])
return np.sqrt(energy(signal) / signal.size)


def sound_pressure_level(signal, p_ref=None):
"""
Computes the sound pressure level of a signal.
Compute the sound pressure level of a (framed) signal.

Parameters
----------
Expand All @@ -374,22 +410,25 @@ def sound_pressure_level(signal, p_ref=None):
pressure of a sound relative to a reference value. It is measured in
decibels (dB) above a standard reference level.

If `signal` is a `FramedSignal`, the sound pressure level is computed for
each frame individually.

"""
# compute the sound pressure level for every frame of the signal
if isinstance(signal, FramedSignal):
return np.array([sound_pressure_level(frame) for frame in signal])
# compute the RMS
rms = root_mean_square(signal)
# compute the SPL
if rms == 0:
# return the smallest possible negative number
return -np.finfo(float).max
else:
if p_ref is None:
# find a reasonable default reference value
if np.issubdtype(signal.dtype, np.integer):
p_ref = float(np.iinfo(signal.dtype).max)
else:
p_ref = 1.0
# normal SPL computation
return 20.0 * np.log10(rms / p_ref)
# find a reasonable default reference value if None is given
if p_ref is None:
if np.issubdtype(signal.dtype, np.integer):
p_ref = float(np.iinfo(signal.dtype).max)
else:
p_ref = 1.0
# normal SPL computation. ignore warnings when taking the log of 0,
# then replace the resulting -inf values with the smallest finite number
with np.errstate(divide='ignore'):
return np.nan_to_num(20.0 * np.log10(rms / p_ref))


# functions to load / write audio files
Expand Down Expand Up @@ -771,6 +810,22 @@ def write(self, filename):
"""
return write_wave_file(self, filename)

def energy(self):
"""Energy of signal."""
return energy(self)

def root_mean_square(self):
"""Root mean square of signal."""
return root_mean_square(self)

rms = root_mean_square

def sound_pressure_level(self):
"""Sound pressure level of signal."""
return sound_pressure_level(self)

spl = sound_pressure_level


class SignalProcessor(Processor):
"""
Expand Down Expand Up @@ -1005,28 +1060,6 @@ def signal_frame(signal, index, frame_size, hop_size, origin=0):
return signal[start:stop]


def total_energy(frames):
"""
Computes the total energy (sum of squared magnitudes) for each frame of a
FramedSignal.

Parameters
----------
frames : FramedSignal
Frames (i.e. FramedSignal).

Returns
-------
te : numpy array
Total energy per frame

"""
# make sure we have a FramedSignal
if not isinstance(frames, FramedSignal):
raise TypeError("Invalid type for input, must be a FramedSignal.")
return np.array([sum(x**2) for x in frames])


FRAME_SIZE = 2048
HOP_SIZE = 441.
FPS = None
Expand Down Expand Up @@ -1294,6 +1327,22 @@ def ndim(self):
"""Dimensionality of the FramedSignal."""
return len(self.shape)

def energy(self):
"""Energy of the individual frames."""
return energy(self)

def root_mean_square(self):
"""Root mean square of the individual frames."""
return root_mean_square(self)

rms = root_mean_square

def sound_pressure_level(self):
"""Sound pressure level of the individual frames."""
return sound_pressure_level(self)

spl = sound_pressure_level


class FramedSignalProcessor(Processor):
"""
Expand Down
4 changes: 2 additions & 2 deletions madmom/audio/spectrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -1514,7 +1514,7 @@ def __init__(self, signal, fps=50., fmin=27.5, fmax=4200.):
def __new__(cls, signal, fps=50., fmin=27.5, fmax=4200.):
from scipy.signal import filtfilt
from .filters import SemitoneBandpassFilterbank
from .signal import FramedSignal, Signal, total_energy, resample
from .signal import FramedSignal, Signal, energy, resample
# check if we got a mono Signal
if not isinstance(signal, Signal) or signal.num_channels != 1:
signal = Signal(signal, num_channels=1)
Expand Down Expand Up @@ -1548,7 +1548,7 @@ def __new__(cls, signal, fps=50., fmin=27.5, fmax=4200.):
# compute total energy of the frames
# Note: the energy of the signal is computed with respect to the
# reference sampling rate as in the MATLAB chroma toolbox
bands.append(total_energy(frames) / band_sample_rate * 22050.)
bands.append(energy(frames) / band_sample_rate * 22050.)
# cast as SemitoneBandpassSpectrogram
obj = np.vstack(bands).T.view(cls)
# save additional attributes
Expand Down
102 changes: 102 additions & 0 deletions tests/test_audio_signal.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,43 @@ def test_values(self):
self.assertTrue(np.allclose(result[:, 1], np.arange(1, 5)))


class TestEnergyFunction(unittest.TestCase):

def test_types(self):
# mono signals
result = energy(sig_1d)
self.assertIsInstance(result, float)
# multi-channel signals
result = energy(sig_2d)
self.assertIsInstance(result, float)

def test_values(self):
# mono signals
result = energy(sig_1d)
self.assertTrue(np.allclose(result, 3))
result = energy(np.zeros(100))
self.assertTrue(np.allclose(result, 0))
# multi-channel signals
result = energy(sig_2d)
self.assertTrue(np.allclose(result, 8))
result = energy(np.zeros(100).reshape(-1, 2))
self.assertTrue(np.allclose(result, 0))

def test_frames(self):
# mono signals
frames = FramedSignal(sig_1d, frame_size=4, hop_size=2)
result = energy(frames)
self.assertTrue(np.allclose(result, [0, 1, 2, 1, 1]))
result = energy(np.zeros(100))
self.assertTrue(np.allclose(result, 0))
# multi-channel signals
frames = FramedSignal(sig_2d, frame_size=4, hop_size=2)
result = energy(frames)
self.assertTrue(np.allclose(result, [1, 3, 4, 3, 3]))
result = energy(np.zeros(100).reshape(-1, 2))
self.assertTrue(np.allclose(result, 0))


class TestRootMeanSquareFunction(unittest.TestCase):

def test_types(self):
Expand All @@ -564,6 +601,22 @@ def test_values(self):
result = root_mean_square(np.zeros(100).reshape(-1, 2))
self.assertTrue(np.allclose(result, 0))

def test_frames(self):
# mono signals
frames = FramedSignal(sig_1d, frame_size=4, hop_size=2)
result = root_mean_square(frames)
self.assertTrue(np.allclose(result, [0, 0.5, 0.70710678, 0.5, 0.5]))
result = root_mean_square(np.zeros(100))
self.assertTrue(np.allclose(result, 0))
# multi-channel signals
frames = FramedSignal(sig_2d, frame_size=4, hop_size=2)
result = root_mean_square(frames)
self.assertTrue(np.allclose(result, [0.35355339, 0.61237244,
0.70710678, 0.61237244,
0.61237244]))
result = root_mean_square(np.zeros(100).reshape(-1, 2))
self.assertTrue(np.allclose(result, 0))


class TestSoundPressureLevelFunction(unittest.TestCase):

Expand Down Expand Up @@ -606,6 +659,23 @@ def test_values(self):
result = sound_pressure_level(sig)
self.assertTrue(np.allclose(result, 0.))

def test_frames(self):
# mono signals
frames = FramedSignal(sig_1d, frame_size=4, hop_size=2)
result = sound_pressure_level(frames)
self.assertTrue(np.allclose(result, [-np.finfo(float).max, -6.0206,
-3.0103, -6.0206, -6.0206]))
result = sound_pressure_level(np.zeros(100))
self.assertTrue(np.allclose(result, -np.finfo(float).max))
# multi-channel signals
frames = FramedSignal(sig_2d, frame_size=4, hop_size=2)
result = sound_pressure_level(frames)
self.assertTrue(np.allclose(result, [-9.03089987, -4.25968732,
-3.01029996, -4.25968732,
-4.25968732]))
result = sound_pressure_level(np.zeros(100).reshape(-1, 2))
self.assertTrue(np.allclose(result, -np.finfo(float).max))


class TestLoadWaveFileFunction(unittest.TestCase):

Expand Down Expand Up @@ -937,6 +1007,19 @@ def test_write_method(self):
result = Signal(tmp_file)
self.assertTrue(np.allclose(orig, result))

def test_methods(self):
# mono signals
signal = Signal(sig_1d)
self.assertTrue(np.allclose(signal.energy(), 3))
self.assertTrue(np.allclose(signal.rms(), 0.57735026919))
self.assertTrue(np.allclose(signal.spl(), -4.7712125472))
# multi-channel signals
signal = Signal(sig_2d)
self.assertTrue(np.allclose(signal.energy(), 8))
self.assertTrue(np.allclose(signal.root_mean_square(), 2. / 3))
self.assertTrue(np.allclose(signal.sound_pressure_level(),
-3.52182518111))


class TestSignalProcessorClass(unittest.TestCase):

Expand Down Expand Up @@ -1388,6 +1471,25 @@ def test_values_file_fps(self):
self.assertTrue(result.frame_size == 2048)
self.assertTrue(result.hop_size == 882.)

def test_methods(self):
# mono signals
frames = FramedSignal(sig_1d, frame_size=4, hop_size=2)
self.assertTrue(np.allclose(frames.energy(), [0, 1, 2, 1, 1]))
self.assertTrue(np.allclose(frames.rms(),
[0, 0.5, 0.70710678, 0.5, 0.5]))
self.assertTrue(np.allclose(frames.spl(),
[-np.finfo(float).max, -6.0206, -3.0103,
-6.0206, -6.0206]))
# multi-channel signals
frames = FramedSignal(sig_2d, frame_size=4, hop_size=2)
self.assertTrue(np.allclose(frames.energy(), [1, 3, 4, 3, 3]))
self.assertTrue(np.allclose(frames.root_mean_square(),
[0.35355339, 0.61237244, 0.70710678,
0.61237244, 0.61237244]))
self.assertTrue(np.allclose(frames.sound_pressure_level(),
[-9.03089987, -4.25968732, -3.01029996,
-4.25968732, -4.25968732]))


class TestFramedSignalProcessorClass(unittest.TestCase):

Expand Down