Skip to content

Commit

Permalink
pre-release
Browse files Browse the repository at this point in the history
  • Loading branch information
anxefaraldo committed Dec 11, 2017
1 parent d2e5d91 commit fdde624
Show file tree
Hide file tree
Showing 2 changed files with 266 additions and 67 deletions.
260 changes: 193 additions & 67 deletions miran/key.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,17 @@
import madmom as md

from miran.vector import *
from miran.format import int_to_key
from miran.utils import bin_to_pc
from miran.defs import KEY_SETTINGS, KEY_LABELS


# TODO: we should try to separate tonic from mode id...
# TODO: si la confidencia es relativaente alta pero hay muchos candidatos posiblemente sea atonical
# TODO: si la confidencia es baja y hay pocos candidatos posiblemente sea unpitched



def _select_profile_type(profile, templates_dict):
try:
return templates_dict[profile]
Expand Down Expand Up @@ -50,7 +57,9 @@ def _dur_to_endtime(**kwargs):
return None


def _key2(pcp, profile_type='bgate', interpolation='linear', conf_thres=0.5):
def _key2(pcp, profile_type='bgate', interpolation='linear', candidates=4, conf_thres=0.5):
# for some reason, increasing the number of candidates estropea el proceso

key_templates = {

'bgate': np.array([[1., 0.00, 0.42, 0.00, 0.53, 0.37, 0.00, 0.77, 0.00, 0.38, 0.21, 0.30],
Expand Down Expand Up @@ -125,68 +134,180 @@ def _key2(pcp, profile_type='bgate', interpolation='linear', conf_thres=0.5):
if (pcp.size < 12) or (pcp.size % 12 != 0):
raise IndexError("Input PCP size is not a positive multiple of 12")

_major, _minor = _select_profile_type(profile_type, key_templates)

if _major.size > pcp.size:
pcp = resize_vector(pcp, _major.size)

if _major.size < pcp.size:
_major = resize_vector(_major, pcp.size, interpolation)
_minor = resize_vector(_minor, pcp.size, interpolation)

first_max_major = -1
second_max_major = -1
key_index_major = -1

first_max_minor = -1
second_max_minor = -1
key_index_minor = -1

for shift in np.arange(pcp.size):
correlation_major = crosscorrelation(pcp, np.roll(_major, shift))
if correlation_major > first_max_major:
second_max_major = first_max_major
first_max_major = correlation_major
key_index_major = shift

correlation_minor = crosscorrelation(pcp, np.roll(_minor, shift))
if correlation_minor > first_max_minor:
second_max_minor = first_max_minor
first_max_minor = correlation_minor
key_index_minor = shift

if first_max_major > first_max_minor:
key_index = key_index_major
scale = 'major'
first_max = first_max_major
second_max = second_max_major
elif first_max_minor > first_max_major:
key_index = key_index_minor
scale = 'minor'
first_max = first_max_minor
second_max = second_max_minor
else:
key_index = -1
first_max = -1
second_max = -1
scale = 'unknown'

key_index /= pcp.size / 12.
key_index = int(np.round(key_index)) % 12

if key_index < 0:
raise IndexError("key_index smaller than zero. Could not find key.")
else:
first_to_second_ratio = (first_max - second_max) / first_max
if first_max < conf_thres:
#return 'NoTonic', 'NoMode', first_max, first_to_second_ratio
return 'NoTonic', 'NoMode', first_max, first_to_second_ratio
# TODO: we should try to separate tonic from mode id...
# TODO: si la confidencia es relativaente alta pero hay muchos candidatos posiblemente sea atonical
# TODO: si la confidencia es baja y hay pocos candidatos posiblemente sea unpitched
else:
return KEY_LABELS[key_index], scale, first_max, first_to_second_ratio

_key_profiles = _select_profile_type(profile_type, key_templates)

# TODO: IMPORTANT FIX THIS!
# if _key_profiles[0].size > pcp.size:
# pcp = resize_vector(pcp, _key_profiles[0].size)

# if _key_profiles[0].size < pcp.size:
# _major = resize_vector(_major, pcp.size, interpolation)
# _minor = resize_vector(_minor, pcp.size, interpolation)

corr_values = []

for profile in _key_profiles:
for shift in np.arange(pcp.size):
corr_values.append(crosscorrelation(pcp, np.roll(profile, shift)))

corr_indexes = np.argpartition(corr_values, -candidates)[-candidates:]

keys = []
keys_confidences = []

for index in corr_indexes[::-1]:
keys.append(int_to_key(index))
keys_confidences.append(corr_values[index])

first_to_second_ratio = (corr_values[0] - corr_values[1]) # / corr_values[0]

if keys_confidences[0] < conf_thres:
return 'X', keys_confidences[0], first_to_second_ratio

if np.mean(keys_confidences - corr_values[0]) < 0.1:
print("Too many key Candidates, random result")

return keys[0], keys, keys_confidences, first_to_second_ratio



# def _key2(pcp, profile_type='bgate', interpolation='linear', conf_thres=0.5):
# key_templates = {
#
# 'bgate': np.array([[1., 0.00, 0.42, 0.00, 0.53, 0.37, 0.00, 0.77, 0.00, 0.38, 0.21, 0.30],
# [1., 0.00, 0.36, 0.39, 0.00, 0.38, 0.00, 0.74, 0.27, 0.00, 0.42, 0.23]]),
#
# # almost identical to bgate. kept for backwards compatibility
# 'bmtg3': np.array([[1.00, 0.00, 0.42, 0.00, 0.53, 0.37, 0.00, 0.76, 0.00, 0.38, 0.21, 0.30],
# [1.00, 0.00, 0.36, 0.39, 0.10, 0.37, 0.00, 0.76, 0.27, 0.00, 0.42, 0.23]]),
#
# 'bmtg2': np.array([[1.00, 0.10, 0.42, 0.10, 0.53, 0.37, 0.10, 0.77, 0.10, 0.38, 0.21, 0.30],
# [1.00, 0.10, 0.36, 0.39, 0.29, 0.38, 0.10, 0.74, 0.27, 0.10, 0.42, 0.23]]),
#
# # was originally bmtg1
# 'braw': np.array([[1., 0.1573, 0.4200, 0.1570, 0.5296, 0.3669, 0.1632, 0.7711, 0.1676, 0.3827, 0.2113, 0.2965],
# [1., 0.2330, 0.3615, 0.3905, 0.2925, 0.3777, 0.1961, 0.7425, 0.2701, 0.2161, 0.4228, 0.2272]]),
#
# 'diatonic': np.array([[1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1],
# [1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1]]),
#
# 'monotonic': np.array([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
# [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]),
#
# 'triads': np.array([[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0],
# [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]]),
#
# 'edma_ecir': np.array([[0.16519551, 0.04749026, 0.08293076, 0.06687112, 0.09994645, 0.09274123, 0.05294487, 0.13159476, 0.05218986,
# 0.07443653, 0.06940723, 0.0642515],
# [0.17235348, 0.05336489, 0.0761009, 0.10043649, 0.05621498, 0.08527853, 0.0497915, 0.13451001, 0.07458916, 0.05003023,
# 0.09187879, 0.05545106]]),
#
# 'edmm_ecir': np.array([[0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083],
# [0.17235348, 0.04, 0.0761009, 0.12, 0.05621498, 0.08527853, 0.0497915, 0.13451001, 0.07458916, 0.05003023, 0.09187879,
# 0.05545106]]),
#
# 'edma': np.array([[1., 0.2875, 0.5020, 0.4048, 0.6050, 0.5614, 0.3205, 0.7966, 0.3159, 0.4506, 0.4202, 0.3889],
# [1., 0.3096, 0.4415, 0.5827, 0.3262, 0.4948, 0.2889, 0.7804, 0.4328, 0.2903, 0.5331, 0.3217]]),
#
# 'edmm': np.array([[1., 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
# [1., 0.2321, 0.4415, 0.6962, 0.3262, 0.4948, 0.2889, 0.7804, 0.4328, 0.2903, 0.5331, 0.3217]]),
#
# 'krumhansl': np.array([[6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88],
# [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]]),
#
# 'temperley99': np.array([[5.0, 2.0, 3.5, 2.0, 4.5, 4.0, 2.0, 4.5, 2.0, 3.5, 1.5, 4.0],
# [5.0, 2.0, 3.5, 4.5, 2.0, 4.0, 2.0, 4.5, 3.5, 2.0, 1.5, 4.0]]),
#
# 'temperley05': np.array([[0.748, 0.060, 0.488, 0.082, 0.67, 0.46, 0.096, 0.715, 0.104, 0.366, 0.057, 0.4],
# [0.712, 0.084, 0.474, 0.618, 0.049, 0.46, 0.105, 0.747, 0.404, 0.067, 0.133, 0.33]]),
#
# 'temperley-essen': np.array([[0.184, 0.001, 0.155, 0.003, 0.191, 0.109, 0.005, 0.214, 0.001, 0.078, 0.004, 0.055],
# [0.192, 0.005, 0.149, 0.179, 0.002, 0.144, 0.002, 0.201, 0.038, 0.012, 0.053, 0.022]]),
#
# 'thpcp': np.array([[0.95162, 0.20742, 0.71758, 0.22007, 0.71341, 0.48841, 0.31431, 1.00000, 0.20957, 0.53657, 0.22585, 0.55363],
# [0.94409, 0.21742, 0.64525, 0.63229, 0.27897, 0.57709, 0.26428, 1.0000, 0.26428, 0.30633, 0.45924, 0.35929]]),
#
# 'shaath': np.array([[6.6, 2.0, 3.5, 2.3, 4.6, 4.0, 2.5, 5.2, 2.4, 3.7, 2.3, 3.4],
# [6.5, 2.7, 3.5, 5.4, 2.6, 3.5, 2.5, 5.2, 4.0, 2.7, 4.3, 3.2]]),
#
# 'gomez': np.array([[0.82, 0.00, 0.55, 0.00, 0.53, 0.30, 0.08, 1.00, 0.00, 0.38, 0.00, 0.47],
# [0.81, 0.00, 0.53, 0.54, 0.00, 0.27, 0.07, 1.00, 0.27, 0.07, 0.10, 0.36]]),
#
# 'faraldo': np.array([[7.0, 2.0, 3.8, 2.3, 4.7, 4.1, 2.5, 5.2, 2.0, 3.7, 3.0, 3.4],
# [7.0, 3.0, 3.8, 4.5, 2.6, 3.5, 2.5, 5.2, 4.0, 2.5, 4.5, 3.0]]),
#
# 'pentatonic': np.array([[1.0, 0.1, 0.25, 0.1, 0.5, 0.7, 0.1, 0.8, 0.1, 0.25, 0.1, 0.5],
# [1.0, 0.2, 0.25, 0.5, 0.1, 0.7, 0.1, 0.8, 0.3, 0.2, 0.6, 0.2]]),
#
# 'noland': np.array([[0.0629, 0.0146, 0.061, 0.0121, 0.0623, 0.0414, 0.0248, 0.0631, 0.015, 0.0521, 0.0142, 0.0478],
# [0.0682, 0.0138, 0.0543, 0.0519, 0.0234, 0.0544, 0.0176, 0.067, 0.0349, 0.0297, 0.0401, 0.027]])
# }
#
# if (pcp.size < 12) or (pcp.size % 12 != 0):
# raise IndexError("Input PCP size is not a positive multiple of 12")
#
# _major, _minor = _select_profile_type(profile_type, key_templates)
#
# if _major.size > pcp.size:
# pcp = resize_vector(pcp, _major.size)
#
# if _major.size < pcp.size:
# _major = resize_vector(_major, pcp.size, interpolation)
# _minor = resize_vector(_minor, pcp.size, interpolation)
#
# first_max_major = -1
# second_max_major = -1
# key_index_major = -1
#
# first_max_minor = -1
# second_max_minor = -1
# key_index_minor = -1
#exo
# for shift in np.arange(pcp.size):
# correlation_major = crosscorrelation(pcp, np.roll(_major, shift))
# if correlation_major > first_max_major:
# second_max_major = first_max_major
# first_max_major = correlation_major
# key_index_major = shift
#
# correlation_minor = crosscorrelation(pcp, np.roll(_minor, shift))
# if correlation_minor > first_max_minor:
# second_max_minor = first_max_minor
# first_max_minor = correlation_minor
# key_index_minor = shift
#
# if first_max_major > first_max_minor:
# key_index = key_index_major
# scale = 'major'
# first_max = first_max_major
# second_max = second_max_major
# elif first_max_minor > first_max_major:
# key_index = key_index_minor
# scale = 'minor'
# first_max = first_max_minor
# second_max = second_max_minor
# else:
# key_index = -1
# first_max = -1
# second_max = -1
# scale = 'unknown'
#
# key_index /= pcp.size / 12.
# key_index = int(np.round(key_index)) % 12
#
# if key_index < 0:
# raise IndexError("key_index smaller than zero. Could not find key.")
# else:
# first_to_second_ratio = (first_max - second_max) / first_max
# key = '{} {}'.format(KEY_LABELS[key_index], scale)
#
# if first_max < conf_thres:
# #return 'NoTonic', 'NoMode', first_max, first_to_second_ratio
# return 'X', first_max, first_to_second_ratio
# else:
# # return KEY_LABELS[key_index], scale, first_max, first_to_second_ratio
# return key, first_max, first_to_second_ratio
#

def _key3(pcp, profile_type='bgate', interpolation='linear', conf_thres=0.5):
if (pcp.size < 12) or (pcp.size % 12 != 0):
Expand Down Expand Up @@ -1015,13 +1136,18 @@ def key_angel(input_audio_file, output_text_file, **kwargs):
# IMPORTANT! Adjust to essentia's HPCP calculation starting on A...
chroma = np.roll(chroma, -3 * (kwargs["HPCP_SIZE"] // 12))

print(list(chroma))


if kwargs["USE_THREE_PROFILES"]:
estimation_1 = _key3(chroma, kwargs["KEY_PROFILE"], kwargs["PROFILE_INTERPOLATION"])
else:
estimation_1 = _key2(chroma, kwargs["KEY_PROFILE"], kwargs["PROFILE_INTERPOLATION"])

key_1 = estimation_1[0] + '\t' + estimation_1[1]
correlation_value = estimation_1[2]
#key_1 = estimation_1[0] + '\t' + estimation_1[1]
#correlation_value = estimation_1[2]
key_1 = estimation_1[0]
correlation_value = estimation_1[1]

if kwargs["WITH_MODAL_DETAILS"]:
estimation_2 = _key7(chroma, kwargs["PROFILE_INTERPOLATION"])
Expand All @@ -1038,8 +1164,8 @@ def key_angel(input_audio_file, output_text_file, **kwargs):
key = key_1

textfile = open(output_text_file, 'w')
#textfile.write(key + '\t' + str(correlation_value) + '\n')
textfile.write(key)
textfile.write(key + '\t' + str(correlation_value) + '\n')
#textfile.write(key)
textfile.close()

return key, correlation_value
73 changes: 73 additions & 0 deletions miran/settings/edmNew.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
{
"DURATION": null,
"START_TIME": 0,

"SAMPLE_RATE": 44100,
"WINDOW_SIZE": 32768,
"HOP_SIZE": 4096,
"WINDOW_SHAPE": "hann",

"PCP_THRESHOLD": 0.2,
"HIGHPASS_CUTOFF": 200,
"SPECTRAL_WHITENING": true,
"DETUNING_CORRECTION": false,
"DETUNING_CORRECTION_SCOPE": "average",

"MIN_HZ": 25.0,
"MAX_HZ": 3500.0,
"SPECTRAL_PEAKS_THRESHOLD": 0.0001,
"SPECTRAL_PEAKS_MAX": 60,

"HPCP_BAND_PRESET": false,
"HPCP_SPLIT_HZ": 250,
"HPCP_HARMONICS": 4,
"HPCP_REFERENCE_HZ": 440,
"HPCP_NON_LINEAR": false,
"HPCP_NORMALIZE": false,
"HPCP_SHIFT": false,
"HPCP_SIZE": 12,
"HPCP_WEIGHT_WINDOW_SEMITONES": 1,
"HPCP_WEIGHT_TYPE": "cosine",
"PROFILE_INTERPOLATION": "linear",


"KEY_POLYPHONY": false,
"KEY_USE_THREE_CHORDS": false,
"KEY_HARMONICS": 15,
"KEY_SLOPE": 0.2,

"ANALYSIS_TYPE": "global",
"N_WINDOWS": 100,
"WINDOW_INCREMENT": 100,

"KEY_PROFILE": "bgate",
"USE_THREE_PROFILES": false,
"WITH_MODAL_DETAILS": false,

"OPTIONS": {
"DETUNING_CORRECTION_SCOPE": [
"average",
"frame"
],
"HPCP_NORMALIZE": [
"none",
"unitSum",
"unitMax"
],
"HPCP_WEIGHT_TYPE": [
"none",
"cosine",
"squaredCosine"
],
"KEY_PROFILE": [
"bgate",
"braw",
"edma",
"edmm"
],
"ANALYSIS_TYPE": [
"global",
"local"
]
}
}

0 comments on commit fdde624

Please sign in to comment.