Skip to content

Commit

Permalink
Merge pull request #381 from CPJKU/key2018
Browse files Browse the repository at this point in the history
Integrated new Key Classification Model (ISMIR 2018)
  • Loading branch information
fdlm authored Nov 5, 2018
2 parents cf34790 + f9cb6e8 commit 870189b
Show file tree
Hide file tree
Showing 10 changed files with 110 additions and 30 deletions.
2 changes: 1 addition & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ New features:
* Bar tracking functionality (#316)
* Added `quantize_notes` function (#327)
* Added global key evaluation (#336)
* Added key recognition feature and program (#345)
* Added key recognition feature and program (#345, #381)

Bug fixes:

Expand Down
7 changes: 3 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,9 @@ References
Proceedings of IEEE International Workshop on Machine Learning for Signal
Processing (MLSP), 2016.
.. [18] Filip Korzeniowski and Gerhard Widmer,
*End-to-End Musical Key Estimation Using a Convolutional Neural Network*,
Proceedings of the 25th European Signal Processing Conference (EUSIPCO),
2017.
*Genre-Agnostic Key Classification with Convolutional Neural Networks*,
Proceedings of the 19th International Society for Music Information
Retrieval Conference (ISMIR), 2018.
Acknowledgements
================
Expand Down
14 changes: 5 additions & 9 deletions bin/KeyRecognition
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,9 @@ def main():
using a Convolutional Neural Network, as described in
Filip Korzeniowski and Gerhard Widmer,
"End-to-End Musical Key Estimation Using a Convolutional Neural Network",
In Proceedings of the 25th European Signal Processing Conference (EUSIPCO),
Kos, Greece, 2017.
The model used here differs slightly from the one in the paper: it was
trained on snippets of audio instead of full songs, and using a dataset
that includes (mostly piano) classical music.
"Genre-Agnostic Key Classification with Convolutional Neural Networks",
In Proceedings of the 19th International Society for Music Information
Retrieval Conference (ISMIR), Paris, France, 2018.
This program can be run in 'single' file mode to process a single audio
file and write the recognised chords to STDOUT or the given output file.
Expand All @@ -42,7 +38,7 @@ def main():
$ KeyRecognition batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] FILES
If no output directory is given, the program writes the files with the
extracted chords to the same location as the audio files.
extracted key to the same location as the audio files.
The 'pickle' mode can be used to store the used parameters to be able to
exactly reproduce experiments.
Expand All @@ -51,7 +47,7 @@ def main():
)
# version
p.add_argument('--version', action='version',
version='KeyRecognition.2017')
version='KeyRecognition.2018')
io_arguments(p, output_suffix='.key.txt')
ActivationsProcessor.add_arguments(p)

Expand Down
26 changes: 13 additions & 13 deletions madmom/features/key.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ def key_prediction_to_label(prediction):
return KEY_LABELS[prediction[0].argmax()]


def add_axis(x):
return x[np.newaxis, ...]


class CNNKeyRecognitionProcessor(SequentialProcessor):
"""
Recognise the global key of a musical piece using a Convolutional Neural
Expand All @@ -48,16 +52,13 @@ class CNNKeyRecognitionProcessor(SequentialProcessor):
nn_files : list, optional
List with trained CNN model files. Per default ('None'), an ensemble
of networks will be used.
single_net : bool, optional
Use only a single CNN for prediction. This speeds up processing, but
slightly worsens the results.
References
----------
.. [1] Filip Korzeniowski and Gerhard Widmer,
"End-to-End Musical Key Estimation Using a Convolutional Neural
Network", In Proceedings of the 25th European Signal Processing
Conference (EUSIPCO), Kos, Greece, 2017.
"Genre-Agnostic Key Classification with Convolutional Neural
Networks", In Proceedings of the 19th International Society for
Music Information Retrieval Conference (ISMIR), Paris, France, 2018.
Examples
--------
Expand All @@ -68,19 +69,18 @@ class CNNKeyRecognitionProcessor(SequentialProcessor):
>>> proc # doctest: +ELLIPSIS
<madmom.features.key.CNNKeyRecognitionProcessor object at 0x...>
>>> proc('tests/data/audio/sample.wav') # doctest: +NORMALIZE_WHITESPACE
array([[0. , 0. , 0.00001, 0.00012, 0. , 0. ,
0.00151, 0. , 0. , 0. , 0.00003, 0.81958,
0. , 0. , 0. , 0.01747, 0. , 0. ,
0.00001, 0. , 0.00006, 0. , 0.00001, 0.16119]],
dtype=float32)
array([[0.03426, 0.0331 , 0.02979, 0.04423, 0.04215, 0.0311 , 0.05225,
0.04263, 0.04141, 0.02907, 0.03755, 0.09546, 0.0431 , 0.02792,
0.02138, 0.05589, 0.03276, 0.02786, 0.02415, 0.04608, 0.05329,
0.02804, 0.03868, 0.08786]])
"""

def __init__(self, nn_files=None, **kwargs):
from ..audio.signal import SignalProcessor, FramedSignalProcessor
from ..audio.stft import ShortTimeFourierTransformProcessor
from ..audio.spectrogram import LogarithmicFilteredSpectrogramProcessor
from ..ml.nn import NeuralNetworkEnsemble
from ..ml.nn.activations import softmax
from ..models import KEY_CNN

# spectrogram computation
Expand All @@ -97,5 +97,5 @@ def __init__(self, nn_files=None, **kwargs):

# create processing pipeline
super(CNNKeyRecognitionProcessor, self).__init__([
sig, frames, stft, spec, nn
sig, frames, stft, spec, nn, add_axis, softmax
])
7 changes: 6 additions & 1 deletion madmom/ml/nn/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ def _kernel_margins(kernel_shape, margin_shift):
start_x, end_x, start_y, end_y : tuple
Indices determining the valid part of the convolution output.
"""

start_x = int(np.floor(kernel_shape[0] / 2.))
start_y = int(np.floor(kernel_shape[1] / 2.))

Expand All @@ -671,15 +672,19 @@ def _kernel_margins(kernel_shape, margin_shift):
end_x -= margin_shift
else:
end_x = start_x
start_x = start_x if start_x > 0 else None
end_x = -end_x if end_x > 0 else None

if kernel_shape[1] % 2 == 0:
end_y = start_y - 1
start_y += margin_shift
end_y -= margin_shift
else:
end_y = start_y
start_y = start_y if start_y > 0 else None
end_y = -end_y if end_y > 0 else None

return start_x, -end_x, start_y, -end_y
return start_x, end_x, start_y, end_y


try:
Expand Down
2 changes: 1 addition & 1 deletion madmom/models
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
'models/chords/*/*',
'models/chroma/*/*',
'models/downbeats/*/*',
'models/key/*/*',
'models/key/2018/*',
'models/notes/*/*',
'models/onsets/*/*',
'models/patterns/*/*',
Expand Down
Binary file modified tests/data/activations/sample.key_cnn.npz
Binary file not shown.
Binary file modified tests/data/activations/sample2.key_cnn.npz
Binary file not shown.
80 changes: 80 additions & 0 deletions tests/test_ml_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,3 +506,83 @@ def test_constant_padding(self):
self.assertTrue(np.allclose(out[:, :, 3:-3], data))
self.assertTrue(np.allclose(out[:, :, :3], 2.2))
self.assertTrue(np.allclose(out[:, :, -3:], 2.2))


class ConvolutionalLayerClassTest(unittest.TestCase):

W1 = np.array([[[[0.69557322]],
[[0.45649655]],
[[0.58179561]]],
[[[0.20438251]],
[[0.17404747]],
[[0.41624290]]]])

W3 = np.array([[[[0.57353216, 0.72422232, 0.15716315],
[0.82000373, 0.26902348, 0.69203708],
[0.45564084, 0.89265194, 0.98080186]],
[[0.44920649, 0.52442715, 0.33103038],
[0.24536095, 0.49307102, 0.28850389],
[0.38324254, 0.46965330, 0.76865911]],
[[0.44225901, 0.34989312, 0.92381997],
[0.32123710, 0.04856574, 0.87387125],
[0.70175767, 0.38149251, 0.40178089]]],
[[[0.28197446, 0.35315104, 0.53862099],
[0.01224023, 0.94672135, 0.87194315],
[0.69193064, 0.27611521, 0.51076897]],
[[0.22228372, 0.58605351, 0.17730248],
[0.10949298, 0.43124835, 0.71336330],
[0.57694486, 0.44623928, 0.11774881]],
[[0.76850363, 0.46740177, 0.76900027],
[0.61551742, 0.62841514, 0.05235070],
[0.01321052, 0.93591818, 0.61256317]]]])

B = np.array([0.27614033, 0.87995416, 0.23540803])

O1 = np.array([[[0.97171354, 1.33645070, 0.81720366],
[0.27614033, 0.87995416, 0.23540803],
[0.27614033, 0.87995416, 0.23540803],
[0.27614033, 0.87995416, 0.23540803],
[0.48052284, 1.05400163, 0.65165093]],
[[0.27614033, 0.87995416, 0.23540803],
[0.97171354, 1.33645070, 0.81720366],
[0.27614033, 0.87995416, 0.23540803],
[0.48052284, 1.05400163, 0.65165093],
[0.27614033, 0.87995416, 0.23540803]],
[[0.27614033, 0.87995416, 0.23540803],
[0.27614033, 0.87995416, 0.23540803],
[1.17609602, 1.51049817, 1.23344656],
[0.27614033, 0.87995416, 0.23540803],
[0.27614033, 0.87995416, 0.23540803]],
[[0.27614033, 0.87995416, 0.23540803],
[0.48052284, 1.05400163, 0.65165093],
[0.27614033, 0.87995416, 0.23540803],
[0.97171354, 1.33645070, 0.81720366],
[0.27614033, 0.87995416, 0.23540803]],
[[0.48052284, 1.05400163, 0.65165093],
[0.27614033, 0.87995416, 0.23540803],
[0.27614033, 0.87995416, 0.23540803],
[0.27614033, 0.87995416, 0.23540803],
[0.97171354, 1.33645070, 0.81720366]]])

O3 = np.array([[[2.38147223, 2.81317455, 1.89651736],
[2.05779099, 2.38843173, 2.54209157],
[2.61057651, 2.39648026, 2.56985398]],
[[2.35418737, 2.29051489, 2.02105685],
[4.27677071, 3.77638656, 2.53863951],
[2.84045803, 2.85248774, 2.44744130]],
[[2.90905416, 2.44869238, 2.34779163],
[3.13685429, 2.75457102, 1.92640647],
[2.61026680, 2.70863968, 1.74057683]]])

def setUp(self):
self.layer1x1 = ConvolutionalLayer(ConvolutionalLayerClassTest.W1,
ConvolutionalLayerClassTest.B)
self.layer3x3 = ConvolutionalLayer(ConvolutionalLayerClassTest.W3,
ConvolutionalLayerClassTest.B)
self.data = np.stack([np.eye(5), np.eye(5)[:, ::-1]], axis=-1)

def test_activate(self):
out1 = self.layer1x1.activate(self.data)
self.assertTrue(np.allclose(out1, ConvolutionalLayerClassTest.O1))
out3 = self.layer3x3.activate(self.data)
self.assertTrue(np.allclose(out3, ConvolutionalLayerClassTest.O3))

0 comments on commit 870189b

Please sign in to comment.