-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
138 lines (108 loc) · 6.07 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 23 00:40:54 2018
@author: kbalu
"""
# Importing Libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm
import midi_manipulation
import glob
import msgpack
from tensorflow.python.ops import control_flow_ops
def get_songs(path):
files = glob.glob('{}/*.mid*'.format(path))
songs = []
for f in tqdm(files):
try:
song = np.array(midi_manipulation.midiToNoteStateMatrix(f))
if np.array(song).shape[0] > 50:
songs.append(song)
except Exception as e:
raise e
return songs
songs = get_songs('arr-songs-midi') #These songs have already been converted from midi to msgpack
print ("{} songs processed".format(len(songs)))
# step 1 - Hyperparameters setup
lowest_node = midi_manipulation.lowerBound
highest_node = midi_manipulation.upperBound
note_range = highest_node-lowest_node
num_timesteps = 15 #This is the number of timesteps that we will create at a time
n_visible = 2*note_range*num_timesteps #This is the size of the visible layer.
n_hidden = 50 #This is the size of the hidden layer
num_epochs = 200 #The number of training epochs that we are going to run. For each epoch we go through the entire data set.
batch_size = 100 #The number of training examples that we are going to send through the RBM at a time.
lr = tf.constant(0.005, tf.float32) #The learning rate of our model
### Variables:
# Next, let's look at the variables we're going to use:
x = tf.placeholder(tf.float32, [None, n_visible], name="x") #The placeholder variable that holds our data
W = tf.Variable(tf.random_normal([n_visible, n_hidden], 0.01), name="W") #The weight matrix that stores the edge weights
bh = tf.Variable(tf.zeros([1, n_hidden], tf.float32, name="bh")) #The bias vector for the hidden layer
bv = tf.Variable(tf.zeros([1, n_visible], tf.float32, name="bv")) #The bias vector for the visible layer
#### Helper functions.
#This function lets us easily sample from a vector of probabilities
def sample(probs):
#Takes in a vector of probabilities, and returns a random vector of 0s and 1s sampled from the input vector
return tf.floor(probs + tf.random_uniform(tf.shape(probs), 0, 1))
#This function runs the gibbs chain. We will call this function in two places:
# - When we define the training update step
# - When we sample our music segments from the trained RBM
def gibbs_sample(k):
#Runs a k-step gibbs chain to sample from the probability distribution of the RBM defined by W, bh, bv
def gibbs_step(count, k, xk):
#Runs a single gibbs step. The visible values are initialized to xk
hk = sample(tf.sigmoid(tf.matmul(xk, W) + bh)) #Propagate the visible values to sample the hidden values
xk = sample(tf.sigmoid(tf.matmul(hk, tf.transpose(W)) + bv)) #Propagate the hidden values to sample the visible values
return count+1, k, xk
#Run gibbs steps for k iterations
ct = tf.constant(0) #counter
[_, _, x_sample] = control_flow_ops.while_loop(lambda count, num_iter, *args: count < num_iter,
gibbs_step, [ct, tf.constant(k), x])
#This is not strictly necessary in this implementation, but if you want to adapt this code to use one of TensorFlow's
#optimizers, you need this in order to stop tensorflow from propagating gradients back through the gibbs step
x_sample = tf.stop_gradient(x_sample)
return x_sample
### Training Update Code
# Now we implement the contrastive divergence algorithm. First, we get the samples of x and h from the probability distribution
#The sample of x
x_sample = gibbs_sample(1)
#The sample of the hidden nodes, starting from the visible state of x
h = sample(tf.sigmoid(tf.matmul(x, W) + bh))
#The sample of the hidden nodes, starting from the visible state of x_sample
h_sample = sample(tf.sigmoid(tf.matmul(x_sample, W) + bh))
#Next, we update the values of W, bh, and bv, based on the difference between the samples that we drew and the original values
size_bt = tf.cast(tf.shape(x)[0], tf.float32)
W_adder = tf.multiply(lr/size_bt, tf.subtract(tf.matmul(tf.transpose(x), h), tf.matmul(tf.transpose(x_sample), h_sample)))
bv_adder = tf.multiply(lr/size_bt, tf.reduce_sum(tf.subtract(x, x_sample), 0, True))
bh_adder = tf.multiply(lr/size_bt, tf.reduce_sum(tf.subtract(h, h_sample), 0, True))
#When we do sess.run(updt), TensorFlow will run all 3 update steps
updt = [W.assign_add(W_adder), bv.assign_add(bv_adder), bh.assign_add(bh_adder)]
# Now it's time to start a session and run the graph!
with tf.Session() as sess:
#First, we train the model
#initialize the variables of the model
init = tf.global_variables_initializer()
sess.run(init)
#Run through all of the training data num_epochs times
for epoch in tqdm(range(num_epochs)):
for song in songs:
#The songs are stored in a time x notes format. The size of each song is timesteps_in_song x 2*note_range
#Here we reshape the songs so that each training example is a vector with num_timesteps x 2*note_range elements
song = np.array(song)
song = song[:int(np.floor(song.shape[0]/num_timesteps)*num_timesteps)]
song = np.reshape(song, [int(np.floor(song.shape[0]/num_timesteps)), song.shape[1]*num_timesteps])
#Train the RBM on batch_size examples at a time
for i in range(1, len(song), batch_size):
tr_x = song[i:i+batch_size]
sess.run(updt, feed_dict={x: tr_x})
#Now the model is fully trained, so let's make some music!
#Run a gibbs chain where the visible nodes are initialized to 0
sample = gibbs_sample(1).eval(session=sess, feed_dict={x: np.zeros((50, n_visible))})
for i in range(sample.shape[0]):
if not any(sample[i,:]):
continue
#Here we reshape the vector to be time x notes, and then save the vector as a midi file
S = np.reshape(sample[i,:], (num_timesteps, 2*note_range))
midi_manipulation.noteStateMatrixToMidi(S, "generated_chord_{}".format(i))