adapting key recognition classes to new model.

CPJKU · Oct 30, 2018 · 71f510c · 71f510c
1 parent 7b72f5a
commit 71f510c
Show file tree

Hide file tree

Showing 7 changed files with 23 additions and 29 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -11,7 +11,7 @@ New features:
 * Bar tracking functionality (#316)
 * Added `quantize_notes` function (#327)
 * Added global key evaluation (#336)
-* Added key recognition feature and program (#345)
+* Added key recognition feature and program (#345, #381)
 
 Bug fixes:
 

diff --git a/README.rst b/README.rst
@@ -354,10 +354,9 @@ References
     Proceedings of IEEE International Workshop on Machine Learning for Signal
     Processing (MLSP), 2016.
 .. [18] Filip Korzeniowski and Gerhard Widmer,
-    *End-to-End Musical Key Estimation Using a Convolutional Neural Network*,
-    Proceedings of the 25th European Signal Processing Conference (EUSIPCO),
-    2017.
-
+    *Genre-Agnostic Key Classification with Convolutional Neural Networks*,
+    Proceedings of the 19th International Society for Music Information
+    Retrieval Conference (ISMIR), 2018.
 
 Acknowledgements
 ================

diff --git a/bin/KeyRecognition b/bin/KeyRecognition
@@ -23,13 +23,9 @@ def main():
     using a Convolutional Neural Network, as described in
 
     Filip Korzeniowski and Gerhard Widmer,
-    "End-to-End Musical Key Estimation Using a Convolutional Neural Network",
-    In Proceedings of the 25th European Signal Processing Conference (EUSIPCO),
-    Kos, Greece, 2017.
-
-    The model used here differs slightly from the one in the paper: it was
-    trained on snippets of audio instead of full songs, and using a dataset
-    that includes (mostly piano) classical music.
+    "Genre-Agnostic Key Classification with Convolutional Neural Networks",
+    In Proceedings of the 19th International Society for Music Information 
+    Retrieval Conference (ISMIR), Paris, France, 2018.
 
     This program can be run in 'single' file mode to process a single audio
     file and write the recognised chords to STDOUT or the given output file.
@@ -42,7 +38,7 @@ def main():
       $ KeyRecognition batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] FILES
 
     If no output directory is given, the program writes the files with the
-    extracted chords to the same location as the audio files.
+    extracted key to the same location as the audio files.
 
     The 'pickle' mode can be used to store the used parameters to be able to
     exactly reproduce experiments.
@@ -51,7 +47,7 @@ def main():
     )
     # version
     p.add_argument('--version', action='version',
-                   version='KeyRecognition.2017')
+                   version='KeyRecognition.2018')
     io_arguments(p, output_suffix='.key.txt')
     ActivationsProcessor.add_arguments(p)
 

diff --git a/madmom/features/key.py b/madmom/features/key.py
@@ -38,6 +38,10 @@ def key_prediction_to_label(prediction):
     return KEY_LABELS[prediction[0].argmax()]
 
 
+def add_axis(x):
+    return x[np.newaxis, ...]
+
+
 class CNNKeyRecognitionProcessor(SequentialProcessor):
     """
     Recognise the global key of a musical piece using a Convolutional Neural
@@ -48,16 +52,12 @@ class CNNKeyRecognitionProcessor(SequentialProcessor):
     nn_files : list, optional
         List with trained CNN model files. Per default ('None'), an ensemble
         of networks will be used.
-    single_net : bool, optional
-        Use only a single CNN for prediction. This speeds up processing, but
-        slightly worsens the results.
 
     References
     ----------
-    .. [1] Filip Korzeniowski and Gerhard Widmer,
-           "End-to-End Musical Key Estimation Using a Convolutional Neural
-           Network", In Proceedings of the 25th European Signal Processing
-           Conference (EUSIPCO), Kos, Greece, 2017.
+    .. [1] "Genre-Agnostic Key Classification with Convolutional Neural
+           Networks", In Proceedings of the 19th International Society for
+           Music Information Retrieval Conference (ISMIR), Paris, France, 2018.
 
     Examples
     --------
@@ -68,19 +68,18 @@ class CNNKeyRecognitionProcessor(SequentialProcessor):
     >>> proc  # doctest: +ELLIPSIS
     <madmom.features.key.CNNKeyRecognitionProcessor object at 0x...>
     >>> proc('tests/data/audio/sample.wav')  # doctest: +NORMALIZE_WHITESPACE
-    array([[0.     , 0.     , 0.00001, 0.00012, 0.     , 0.     ,
-            0.00151, 0.     , 0.     , 0.     , 0.00003, 0.81958,
-            0.     , 0.     , 0.     , 0.01747, 0.     , 0.     ,
-            0.00001, 0.     , 0.00006, 0.     , 0.00001, 0.16119]],
-          dtype=float32)
-
+    array([[0.03426, 0.0331 , 0.02979, 0.04423, 0.04215, 0.0311 , 0.05225,
+            0.04263, 0.04141, 0.02907, 0.03755, 0.09546, 0.0431 , 0.02792,
+            0.02138, 0.05589, 0.03276, 0.02786, 0.02415, 0.04608, 0.05329,
+            0.02804, 0.03868, 0.08786]])
     """
 
     def __init__(self, nn_files=None, **kwargs):
         from ..audio.signal import SignalProcessor, FramedSignalProcessor
         from ..audio.stft import ShortTimeFourierTransformProcessor
         from ..audio.spectrogram import LogarithmicFilteredSpectrogramProcessor
         from ..ml.nn import NeuralNetworkEnsemble
+        from ..ml.nn.activations import softmax
         from ..models import KEY_CNN
 
         # spectrogram computation
@@ -97,5 +96,5 @@ def __init__(self, nn_files=None, **kwargs):
 
         # create processing pipeline
         super(CNNKeyRecognitionProcessor, self).__init__([
-            sig, frames, stft, spec, nn
+            sig, frames, stft, spec, nn, add_axis, softmax
         ])
diff --git a/madmom/models b/madmom/models
diff --git a/tests/data/activations/sample.key_cnn.npz b/tests/data/activations/sample.key_cnn.npz
diff --git a/tests/data/activations/sample2.key_cnn.npz b/tests/data/activations/sample2.key_cnn.npz