From 7d7df9acf3f4424d4c8a92f3f5477e8641ed0972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 15 Jul 2016 16:30:12 +0200 Subject: [PATCH] replace online parameter of FramedSignalProcessor by origin; fixes #168 --- CHANGES.rst | 1 + bin/ComplexFlux | 6 +-- bin/LogFiltSpecFlux | 6 +-- bin/OnsetDetectorLL | 5 +-- bin/SpectralOnsetDetection | 9 ++-- bin/SuperFlux | 6 +-- bin/SuperFluxNN | 1 - madmom/audio/signal.py | 92 +++++++++++++++++++++----------------- madmom/features/onsets.py | 4 +- madmom/processors.py | 4 +- tests/test_audio_signal.py | 21 ++------- 11 files changed, 75 insertions(+), 80 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 93b6a4cad..ed119c8a7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -22,6 +22,7 @@ API relevant changes: * Reorderd the dimensions of comb_filters to time, freq, tau (#135) * `write_notes` uses `delimiter` instead of `sep` to seperate columns (#155) * `LSTMLayer` takes `Gate`s as arguments, all layers are callable (#161) +* replaced `online` parameter of `FramedSignalProcessor` by `origin` (#169) Other changes: diff --git a/bin/ComplexFlux b/bin/ComplexFlux index 7a0753658..8448d780b 100755 --- a/bin/ComplexFlux +++ b/bin/ComplexFlux @@ -58,7 +58,7 @@ def main(): io_arguments(p, output_suffix='.onsets.txt') ActivationsProcessor.add_arguments(p) SignalProcessor.add_arguments(p, norm=False, gain=0) - FramedSignalProcessor.add_arguments(p, fps=200, online=False) + FramedSignalProcessor.add_arguments(p, fps=200) FilterbankProcessor.add_arguments(p, num_bands=24, fmin=30, fmax=17000, norm_filters=False) LogarithmicSpectrogramProcessor.add_arguments(p, log=True, mul=1, add=1) @@ -69,9 +69,7 @@ def main(): combine=0.03, delay=0) # parse arguments args = p.parse_args() - # switch to offline mode - if args.norm: - args.online = False + # print arguments if args.verbose: print(args) diff --git a/bin/LogFiltSpecFlux b/bin/LogFiltSpecFlux index 01b482616..e5f9e99f7 100755 --- a/bin/LogFiltSpecFlux +++ b/bin/LogFiltSpecFlux @@ -57,7 +57,7 @@ def main(): io_arguments(p, output_suffix='.onsets.txt') ActivationsProcessor.add_arguments(p) SignalProcessor.add_arguments(p, norm=False, gain=0) - FramedSignalProcessor.add_arguments(p, fps=100, online=False) + FramedSignalProcessor.add_arguments(p, fps=100) FilterbankProcessor.add_arguments(p, num_bands=12, fmin=30, fmax=17000, norm_filters=False) LogarithmicSpectrogramProcessor.add_arguments(p, log=True, mul=1, add=1) @@ -68,9 +68,7 @@ def main(): combine=0.03, delay=0) # parse arguments args = p.parse_args() - # switch to offline mode - if args.norm: - args.online = False + # print arguments if args.verbose: print(args) diff --git a/bin/OnsetDetectorLL b/bin/OnsetDetectorLL index 19cbba435..554855302 100755 --- a/bin/OnsetDetectorLL +++ b/bin/OnsetDetectorLL @@ -66,7 +66,7 @@ def main(): io_arguments(p, output_suffix='.onsets.txt') ActivationsProcessor.add_arguments(p) # signal processing arguments - SignalProcessor.add_arguments(p, norm=False, gain=0) + SignalProcessor.add_arguments(p, gain=0) # peak picking arguments PeakPickingProcessor.add_arguments(p, threshold=0.23) @@ -74,7 +74,6 @@ def main(): args = p.parse_args() # set immutable defaults - args.online = True args.fps = 100 args.pre_max = 1. / args.fps args.post_max = 0 @@ -90,7 +89,7 @@ def main(): in_processor = ActivationsProcessor(mode='r', **vars(args)) else: # use a RNN to predict the onsets - in_processor = RNNOnsetProcessor(online=args.online) + in_processor = RNNOnsetProcessor(online=True) # output processor if args.save: diff --git a/bin/SpectralOnsetDetection b/bin/SpectralOnsetDetection index a7bf19e92..063e57fe4 100755 --- a/bin/SpectralOnsetDetection +++ b/bin/SpectralOnsetDetection @@ -74,9 +74,12 @@ def main(): combine=0.03, delay=0) # parse arguments args = p.parse_args() - # switch to offline mode - if args.norm: - args.online = False + + # set online mode parameters + if args.origin == 'online': + args.post_avg = 0 + args.post_max = 0 + # add circular shift for correct phase and remove filterbank if needed if args.onset_method in ('phase_deviation', 'weighted_phase_deviation', 'normalized_weighted_phase_deviation', diff --git a/bin/SuperFlux b/bin/SuperFlux index 24cb1bd32..aa0c22444 100755 --- a/bin/SuperFlux +++ b/bin/SuperFlux @@ -56,7 +56,7 @@ def main(): io_arguments(p, output_suffix='.onsets.txt') ActivationsProcessor.add_arguments(p) SignalProcessor.add_arguments(p, norm=False, gain=0) - FramedSignalProcessor.add_arguments(p, fps=200, online=False) + FramedSignalProcessor.add_arguments(p, fps=200) FilterbankProcessor.add_arguments(p, num_bands=24, fmin=30, fmax=17000, norm_filters=False) LogarithmicSpectrogramProcessor.add_arguments(p, log=True, mul=1, add=1) @@ -68,9 +68,7 @@ def main(): combine=0.03, delay=0) # parse arguments args = p.parse_args() - # switch to offline mode - if args.norm: - args.online = False + # print arguments if args.verbose: print(args) diff --git a/bin/SuperFluxNN b/bin/SuperFluxNN index 8baf87658..099b19a6e 100755 --- a/bin/SuperFluxNN +++ b/bin/SuperFluxNN @@ -79,7 +79,6 @@ def main(): # set immutable defaults args.num_channels = 1 args.fps = 100 - args.online = False args.onset_method = 'superflux' # print arguments diff --git a/madmom/audio/signal.py b/madmom/audio/signal.py index 7a7336ee1..9d8fa59ef 100644 --- a/madmom/audio/signal.py +++ b/madmom/audio/signal.py @@ -517,6 +517,15 @@ def load_audio_file(filename, sample_rate=None, num_channels=None, start=None, # signal classes +SAMPLE_RATE = None +NUM_CHANNELS = None +START = None +STOP = None +NORM = False +GAIN = 0. +DTYPE = None + + class Signal(np.ndarray): """ The :class:`Signal` class represents a signal as a (memory-mapped) numpy @@ -568,13 +577,14 @@ class Signal(np.ndarray): # pylint: disable=super-init-not-called # pylint: disable=attribute-defined-outside-init - def __init__(self, data, sample_rate=None, num_channels=None, start=None, - stop=None, norm=False, gain=0, dtype=None): + def __init__(self, data, sample_rate=SAMPLE_RATE, + num_channels=NUM_CHANNELS, start=START, stop=STOP, norm=NORM, + gain=GAIN, dtype=DTYPE): # this method is for documentation purposes only pass - def __new__(cls, data, sample_rate=None, num_channels=None, start=None, - stop=None, norm=False, gain=0, dtype=None): + def __new__(cls, data, sample_rate=SAMPLE_RATE, num_channels=NUM_CHANNELS, + start=START, stop=STOP, norm=NORM, gain=GAIN, dtype=DTYPE): # try to load an audio file if the data is not a numpy array if not isinstance(data, np.ndarray): data, sample_rate = load_audio_file(data, sample_rate=sample_rate, @@ -654,15 +664,9 @@ class SignalProcessor(Processor): dtypes use the complete value range, float dtypes the range [-1, +1]. """ - SAMPLE_RATE = None - NUM_CHANNELS = None - START = None - STOP = None - NORM = False - GAIN = 0. def __init__(self, sample_rate=SAMPLE_RATE, num_channels=NUM_CHANNELS, - start=None, stop=None, norm=NORM, gain=GAIN, **kwargs): + start=START, stop=STOP, norm=NORM, gain=GAIN, **kwargs): # pylint: disable=unused-argument self.sample_rate = sample_rate self.num_channels = num_channels @@ -855,6 +859,14 @@ def signal_frame(signal, index, frame_size, hop_size, origin=0): return signal[start:stop] +FRAME_SIZE = 2048 +HOP_SIZE = 441. +FPS = None +ORIGIN = 0 +END_OF_SIGNAL = 'normal' +NUM_FRAMES = None + + # classes for splitting a signal into frames class FramedSignal(object): """ @@ -924,8 +936,9 @@ class FramedSignal(object): """ - def __init__(self, signal, frame_size=2048, hop_size=441., fps=None, - origin=0, end='normal', num_frames=None, **kwargs): + def __init__(self, signal, frame_size=FRAME_SIZE, hop_size=HOP_SIZE, + fps=FPS, origin=ORIGIN, end=END_OF_SIGNAL, + num_frames=NUM_FRAMES, **kwargs): # signal handling if not isinstance(signal, Signal): # try to instantiate a Signal @@ -1061,8 +1074,8 @@ class FramedSignalProcessor(Processor): fps : float, optional Use given frames per second; if set, this computes and overwrites the given `hop_size` value. - online : bool, optional - Operate in online mode (see notes below). + origin : int, optional + Location of the window relative to the reference sample of a frame. end : int or str, optional End of signal handling (see :class:`FramedSignal`). num_frames : int, optional @@ -1071,31 +1084,30 @@ class FramedSignalProcessor(Processor): If no :class:`Signal` instance was given, one is instantiated with these additional keyword arguments. - Notes - ----- - The location of the window relative to its reference sample can be set - with the `online` parameter: - - - 'False': the window is centered on its reference sample, - - 'True': the window is located to the left of its reference sample - (including the reference sample), i.e. only past information is used. - + See Also + -------- + :class:`FramedSignal` for a detailed description of the parameters. """ - FRAME_SIZE = 2048 - HOP_SIZE = 441. - FPS = 100. - START = 0 - END_OF_SIGNAL = 'normal' - - def __init__(self, frame_size=FRAME_SIZE, hop_size=HOP_SIZE, fps=None, - online=False, end=END_OF_SIGNAL, **kwargs): + + def __init__(self, frame_size=FRAME_SIZE, hop_size=HOP_SIZE, fps=FPS, + origin=ORIGIN, end=END_OF_SIGNAL, num_frames=NUM_FRAMES, + online=None, **kwargs): # pylint: disable=unused-argument self.frame_size = frame_size self.hop_size = hop_size self.fps = fps # do not convert here, pass it to FramedSignal - self.online = online + self.origin = origin self.end = end + self.num_frames = num_frames + if online is not None: + import warnings + warnings.warn('`online` is deprecated as of version 0.14 and will ' + 'be removed in version 0.15. Use `origin` instead.') + if online: + self.origin = 'online' + else: + self.origin = 'offline' def process(self, data, **kwargs): """ @@ -1115,15 +1127,11 @@ def process(self, data, **kwargs): FramedSignal instance """ - # translate online / offline mode - if self.online: - origin = 'online' - else: - origin = 'offline' # instantiate a FramedSignal from the data and return it return FramedSignal(data, frame_size=self.frame_size, hop_size=self.hop_size, fps=self.fps, - origin=origin, end=self.end, **kwargs) + origin=self.origin, end=self.end, + num_frames=self.num_frames, **kwargs) @staticmethod def add_arguments(parser, frame_size=FRAME_SIZE, fps=FPS, @@ -1172,10 +1180,12 @@ def add_arguments(parser, frame_size=FRAME_SIZE, fps=FPS, g.add_argument('--fps', action='store', type=float, default=fps, help='frames per second [default=%(default).1f]') if online is False: - g.add_argument('--online', dest='online', action='store_true', + g.add_argument('--online', dest='origin', action='store_const', + const='online', default='offline', help='operate in online mode [default=offline]') elif online is True: - g.add_argument('--offline', dest='online', action='store_false', + g.add_argument('--offline', dest='origin', action='store_const', + const='offline', default='online', help='operate in offline mode [default=online]') # return the argument group so it can be modified if needed return g diff --git a/madmom/features/onsets.py b/madmom/features/onsets.py index 9b840a4fb..902210240 100755 --- a/madmom/features/onsets.py +++ b/madmom/features/onsets.py @@ -692,9 +692,11 @@ def __init__(self, online=False, **kwargs): # choose the appropriate models and set frame sizes accordingly if online: + origin = 'online' nn_files = ONSETS_RNN frame_sizes = [512, 1024, 2048] else: + origin = 'offline' nn_files = ONSETS_BRNN frame_sizes = [1024, 2048, 4096] @@ -704,7 +706,7 @@ def __init__(self, online=False, **kwargs): multi = ParallelProcessor([]) for frame_size in frame_sizes: frames = FramedSignalProcessor(frame_size=frame_size, fps=100, - online=online) + origin=origin) filt = FilteredSpectrogramProcessor( num_bands=6, fmin=30, fmax=17000, norm_filters=True) spec = LogarithmicSpectrogramProcessor(mul=5, add=1) diff --git a/madmom/processors.py b/madmom/processors.py index 01038c009..ce5fc5b98 100644 --- a/madmom/processors.py +++ b/madmom/processors.py @@ -114,9 +114,9 @@ def process(self, data): """ raise NotImplementedError('must be implemented by subclass.') - def __call__(self, *args): + def __call__(self, *args, **kwargs): # this magic method makes a Processor callable - return self.process(*args) + return self.process(*args, **kwargs) class OutputProcessor(Processor): diff --git a/tests/test_audio_signal.py b/tests/test_audio_signal.py index 8b1076537..1c11eade4 100644 --- a/tests/test_audio_signal.py +++ b/tests/test_audio_signal.py @@ -1282,8 +1282,9 @@ def test_values(self): self.assertTrue(self.processor.frame_size == 2048) self.assertTrue(self.processor.hop_size == 441.) self.assertTrue(self.processor.fps is None) - self.assertTrue(self.processor.online is False) + self.assertTrue(self.processor.origin == 0) self.assertTrue(self.processor.end == 'normal') + self.assertTrue(self.processor.num_frames is None) def test_process(self): result = self.processor.process(sample_file) @@ -1308,8 +1309,8 @@ def test_rewrite_values(self): def test_process_online(self): # set online - self.processor.online = True - self.assertTrue(self.processor.online) + self.processor.origin = 'online' + self.assertEqual(self.processor.origin, 'online') result = self.processor.process(sample_file) self.assertTrue(np.allclose(result[0][-1], -2494)) self.assertTrue(len(result) == 281) @@ -1359,17 +1360,3 @@ def test_process_end(self): # reset end self.processor.end = 'normal' self.assertTrue(self.processor.end == 'normal') - - def test_constant_types(self): - self.assertIsInstance(FramedSignalProcessor.FRAME_SIZE, int) - self.assertIsInstance(FramedSignalProcessor.HOP_SIZE, float) - self.assertIsInstance(FramedSignalProcessor.FPS, float) - self.assertIsInstance(FramedSignalProcessor.START, int) - self.assertIsInstance(FramedSignalProcessor.END_OF_SIGNAL, str) - - def test_constant_values(self): - self.assertTrue(FramedSignalProcessor.FRAME_SIZE == 2048) - self.assertTrue(FramedSignalProcessor.HOP_SIZE == 441.) - self.assertTrue(FramedSignalProcessor.FPS == 100.) - self.assertTrue(FramedSignalProcessor.START == 0) - self.assertTrue(FramedSignalProcessor.END_OF_SIGNAL == 'normal')