-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmidi_process.py
executable file
·177 lines (142 loc) · 5.31 KB
/
midi_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import os,re
import numpy as np
import vamp
import re
import matplotlib.pyplot as plt
from scipy.stats import norm
import config
def coarse_code(x, n_states = 3, sigma = 0.4):
"""Coarse-code value to finite number of states, each with a Gaussian response.
Parameters
----------
x : ndarray
Vector of normalized values [0.0;1.0], shape (nframes,).
n_states : int
Number of states to use for coase coding.
sigma : float
Sigma (scale, standard deviation) parameter of normal distribution
used internally to perform coarse coding. Default: 0.4
Returns
-------
ndarray
Matrix of shape (nframes, n_states).
See also
--------
https://en.wikipedia.org/wiki/Neural_coding#Position_coding
https://plus.google.com/+IlyaEdrenkin/posts/B55jf3wUBvD
https://github.com/CSTR-Edinburgh/merlin/blob/master/src/frontend/label_normalisation.py
"""
assert np.all(x >= 0.0) and np.all(x <= 1.0), 'expected input to be normalized in range [0;1]'
mu = np.linspace(0.0, 1.0, num=n_states, endpoint=True)
return np.hstack([norm.pdf(x, mu_k, sigma).reshape((-1, 1)) for mu_k in mu])
def note_str_to_num(note, base_octave=-1):
"""Convert note pitch as string to MIDI note number."""
patt = re.match('^([CDEFGABcdefgab])([b#]*)(-?)(\d+)$', note)
if patt is None:
raise ValueError('invalid note string "{}"'.format(note))
base_map = {'C': 0,
'D': 2,
'E': 4,
'F': 5,
'G': 7,
'A': 9,
'B': 11}
base, modifiers, sign, octave = patt.groups()
base_num = base_map[base.upper()]
mod_num = -modifiers.count('b') + modifiers.count('#')
sign_mul = -1 if sign == '-' else 1
octave_num = 12*int(octave)*sign_mul - 12*base_octave
note_num = base_num + mod_num + octave_num
if note_num < 0 or note_num >= 128:
raise ValueError('note string "{}" resulted in out-of-bounds note number {:d}'.format(note, note_num))
return note_num
def note_num_to_str(note, base_octave=-1):
"""Convert MIDI note number to note pitch as string."""
base = note % 12
# XXX: base_map should probably depend on key
base_map = ['C',
'C#',
'D',
'D#',
'E',
'F',
'F#',
'G',
'G#',
'A',
'A#',
'B']
base_note = note%12
octave = int(np.floor(note/12)) + base_octave
return '{}{:d}'.format(base_map[base_note], octave)
def rock(audio):
jojo = vamp.collect(audio, config.fs, "pyin:pyin", step_size=config.hopsize, output="notes")
import pdb;pdb.set_trace()
def process_lab_file(filename, stft_len, div_factor, pho_list):
lab_f = open(filename)
# note_f=open(in_dir+lf[:-4]+'.notes')
phos = lab_f.readlines()
lab_f.close()
phonemes=[]
for pho in phos:
st,end,phonote=pho.split()
st = int(np.round(float(st)/div_factor))
en = int(np.round(float(end)/div_factor))
if phonote=='pau' or phonote=='br' or phonote == 'sil':
phonote='Sil'
phonemes.append([st,en,phonote])
strings_p = np.zeros((phonemes[-1][1],6))
prev = pho_list.index('Sil')
for i in range(len(phonemes)):
pho=phonemes[i]
if not i == len(phonemes)-1:
npho = phonemes[i+1]
next_pho = pho_list.index(npho[2])
else:
next_pho = pho_list.index('Sil')
value = pho_list.index(pho[2])
context = coarse_code(np.linspace(0.0,1.0, len(strings_p[pho[0]:pho[1]+1,0])))
strings_p[pho[0]:pho[1]+1,0] = prev
prev = value
strings_p[pho[0]:pho[1]+1,1] = value
strings_p[pho[0]:pho[1]+1,2] = next_pho
strings_p[pho[0]:pho[1]+1,3:] = context
return strings_p
def process_notes_file(filename, stft_len, div_factor):
lab_f = open(filename)
# note_f=open(in_dir+lf[:-4]+'.notes')
phos = lab_f.readlines()
lab_f.close()
phonemes=[]
for pho in phos:
st,end,phonote=pho.split()
note, combo = phonote.split('/p:')
if note == 'xx':
note_num = 0
else:
note_num = note_str_to_num(note)
st = int(np.round(float(st)/config.hoptime)/div_factor)
en = int(np.round(float(end)/config.hoptime)/div_factor)
# if phonote=='pau' or phonote=='br':
# phonote='sil'
phonemes.append([st,en,note_num, combo])
strings_p = np.zeros((phonemes[-1][1],6))
strings_c = np.zeros((phonemes[-1][1],6))
prev = 0
for i in range(len(phonemes)):
if not i == len(phonemes)-1:
npho = phonemes[i+1]
next_pho = config.notes.index(npho[2])
else:
next_pho = 0
pho=phonemes[i]
value = config.notes.index(pho[2])
context = coarse_code(np.linspace(0.0,1.0, len(strings_p[pho[0]:pho[1]+1,0])))
strings_p[pho[0]:pho[1]+1, 1] = value
strings_p[pho[0]:pho[1]+1, 0] = prev
strings_p[pho[0]:pho[1]+1, 2] = next_pho
for j, p in enumerate(pho[3].split('-')):
strings_c[pho[0]:pho[1] + 1, j+1] = config.phonemas.index(p)+1
strings_p[pho[0]:pho[1]+1,3:] = context
prev = value
return strings_p, strings_c.reshape(-1,6)