b_classi.py

# -*- coding: utf-8 -*-
"""B_CLassi.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/github/asigalov61/B-CLassi/blob/main/B_CLassi.ipynb

# B CLassi (ver. 1.0)

***

Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools

***

#### Project Los Angeles

#### Tegridy Code 2023

***

# (SETUP ENVIRONMENT)
"""

#@title Install all dependencies (run only once per session)

!git clone https://github.com/asigalov61/B-CLassi
!pip install tqdm

# Commented out IPython magic to ensure Python compatibility.
#@title Import all needed modules

print('=' * 70)
print('Loading needed modules. Please wait...')

import os
import copy
import math
import statistics
import random

import shutil

from collections import Counter

from joblib import Parallel, delayed, parallel_config

from tqdm import tqdm

import matplotlib.pyplot as plt

print('=' * 70)
print('Creating I/O dirs...')

if not os.path.exists('/content/GOOD'):
    os.makedirs('/content/GOOD')

if not os.path.exists('/content/BAD'):
    os.makedirs('/content/BAD')

if not os.path.exists('/content/EVAL_IN'):
    os.makedirs('/content/EVAL_IN')

if not os.path.exists('/content/EVAL_OUT'):
    os.makedirs('/content/EVAL_OUT')

if not os.path.exists('/content/EVAL_OUT/GOOD'):
    os.makedirs('/content/EVAL_OUT/GOOD')

if not os.path.exists('/content/EVAL_OUT/BAD'):
    os.makedirs('/content/EVAL_OUT/BAD')

print('=' * 70)
print('Loading TMIDIX module...')

# %cd /content/B-CLassi/

import TMIDIX

# %cd /content/

print('=' * 70)
print('Loading Tensorflow module...')

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.metrics import Precision, Recall, BinaryAccuracy
from tensorflow.keras.models import load_model

# Avoid OOM errors by setting GPU Memory Consumption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

tf.config.list_physical_devices('GPU')

print('=' * 70)
print('Done!')
print('Enjoy! :)')
print('=' * 70)

"""# (DOWNLOAD MIDI DATASET)"""

# Commented out IPython magic to ensure Python compatibility.
# @title Donwload and unzip sample MIDI classification dataset
# %cd /content/
!wget https://github.com/asigalov61/Tegridy-MIDI-Dataset/raw/master/B-CLassi-MIDI-Dataset-CC-BY-NC-SA.zip
!unzip B-CLassi-MIDI-Dataset-CC-BY-NC-SA.zip
!rm B-CLassi-MIDI-Dataset-CC-BY-NC-SA.zip
# %cd /content/

"""# (LOAD MIDI PROCESSOR)"""

#@title TMIDIX MIDI Processor

print('=' * 70)
print('Loading TMIDIX MIDI Processor...')
print('=' * 70)

def group_single_elements(lst):
  new_lst = []
  temp = []
  for sublist in lst:
      if len(sublist) == 1:
          temp.extend(sublist)
      else:
          if temp:
              new_lst.append(temp)
              temp = []
          new_lst.append(sublist)
  if temp:
      new_lst.append(temp)
  return new_lst

def TMIDIX_MIDI_Processor(midi_file):

    melody_chords = []

    try:

        fn = os.path.basename(midi_file)

        # Filtering out GIANT4 MIDIs
        file_size = os.path.getsize(midi_file)

        if file_size <= 1000000:

          #=======================================================
          # START PROCESSING

          # Convering MIDI to ms score with MIDI.py module
          score = TMIDIX.midi2single_track_ms_score(open(midi_file, 'rb').read(), recalculate_channels=False)

          # INSTRUMENTS CONVERSION CYCLE
          events_matrix = []
          itrack = 1
          patches = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

          while itrack < len(score):
              for event in score[itrack]:
                  if event[0] == 'note' or event[0] == 'patch_change':
                      events_matrix.append(event)
              itrack += 1

          events_matrix.sort(key=lambda x: x[1])

          events_matrix1 = []

          for event in events_matrix:
                  if event[0] == 'patch_change':
                        patches[event[2]] = event[3]

                  if event[0] == 'note':
                        event.extend([patches[event[3]]])

                        if events_matrix1:
                            if (event[1] == events_matrix1[-1][1]):
                                if ([event[3], event[4]] != events_matrix1[-1][3:5]):
                                    events_matrix1.append(event)
                            else:
                                events_matrix1.append(event)

                        else:
                            events_matrix1.append(event)

        if len(events_matrix1) > 0:
            if min([e[1] for e in events_matrix1]) >= 0 and min([e[2] for e in events_matrix1]) >= 0:

                #=======================================================
                # PRE-PROCESSING

                # checking number of instruments in a composition
                instruments_list = list(set([y[3] for y in events_matrix1]))

                if len(events_matrix1) > 0:

                    #===================================
                    # ORIGINAL COMPOSITION
                    #===================================

                    # Sorting by patch, pitch, then by start-time

                    events_matrix1.sort(key=lambda x: x[6])
                    events_matrix1.sort(key=lambda x: x[4], reverse=True)
                    events_matrix1.sort(key=lambda x: x[1])

                    #=======================================================
                    # FINAL PROCESSING

                    #=======================================================
                    # MAIN PROCESSING CYCLE
                    #=======================================================

                    for e in events_matrix1:

                        cha = max(0, min(15, e[3]))

                        # Pitches
                        if cha == 9: # Drums patch will be == 128
                            e[4] = max(1, min(127, e[4]))

                        else:
                            e[4] = max(1, min(127, e[4]))+128

                    events_matrix1.sort(key=lambda x: x[4], reverse=True)
                    events_matrix1.sort(key=lambda x: x[1])


                    chords = []
                    cho = []

                    pe = events_matrix1[0]

                    for e in events_matrix1:
                      if e[1] - pe[1] == 0:
                        cho.append(e)
                      else:
                        if len(cho) > 0:
                          chords.append(cho)
                        cho = []
                        cho.append(e)

                      pe = e

                    if len(cho) > 0:
                      chords.append(cho)

                    chords1 = group_single_elements(chords)

                    chords2 = []

                    for t in chords1:
                      if len(t) == 1:
                        chords2.extend([256, t[0][4]])

                      elif len(t) > 1 and len(list(set([tt[1] for tt in t]))) > 1:
                        chords2.extend([256] + [tt[4] for tt in t])

                      elif len(t) > 1 and len(list(set([tt[1] for tt in t]))) == 1:
                        chords2.extend([257] + [tt[4] for tt in t])

                    #=======================================================

                    # TOTAL DICTIONARY SIZE 257

                    #=======================================================

                    return chords2, fn

    except:
      return None

print('Done!')
print('=' * 70)

"""# (PROCESS GOOD MIDI DATA)"""

#@title Save file list
###########

print('=' * 70)
print('Loading MIDI files...')
print('This may take a while on a large dataset in particular.')

dataset_addr = "/content/GOOD"

# os.chdir(dataset_addr)
filez = list()
for (dirpath, dirnames, filenames) in os.walk(dataset_addr):
    filez += [os.path.join(dirpath, file) for file in filenames]
print('=' * 70)

if not filez:
    print('Could not find any MIDI files. Please check Dataset dir...')
    print('=' * 70)

else:
  print('Randomizing file list...')
  random.shuffle(filez)
  print('Done!')
  print('=' * 70)
  print('Total files:', len(filez))
  print('=' * 70)

#@title Process MIDIs with TMIDIX MIDI processor

print('=' * 70)
print('TMIDIX MIDI Processor')
print('=' * 70)
print('Starting up...')
print('=' * 70)

###########

melody_chords_f = []

print('Processing MIDI files. Please wait...')
print('=' * 70)

for i in tqdm(range(0, len(filez), 16)):

  with parallel_config(backend='threading', n_jobs=4, verbose = 0):

    output = Parallel()(delayed(TMIDIX_MIDI_Processor)(f) for f in filez[i:i+16])

    for o in output:

        if o is not None:
            melody_chords_f.append(o)

print('Done!')
print('=' * 70)

"""# (PROCESS BAD MIDI DATA)"""

#@title Save file list
###########

print('=' * 70)
print('Loading MIDI files...')
print('This may take a while on a large dataset in particular.')

dataset_addr = "/content/BAD"

# os.chdir(dataset_addr)
filez1 = list()
for (dirpath, dirnames, filenames) in os.walk(dataset_addr):
    filez1 += [os.path.join(dirpath, file) for file in filenames]
print('=' * 70)

if not filez1:
    print('Could not find any MIDI files. Please check Dataset dir...')
    print('=' * 70)

else:
  print('Randomizing file list...')
  random.shuffle(filez1)
  print('Done!')
  print('=' * 70)
  print('Total files:', len(filez1))
  print('=' * 70)

#@title Process MIDIs with TMIDIX MIDI processor

print('=' * 70)
print('TMIDIX MIDI Processor')
print('=' * 70)
print('Starting up...')
print('=' * 70)

###########

melody_chords_f1 = []

print('Processing MIDI files. Please wait...')
print('=' * 70)

for i in tqdm(range(0, len(filez1), 16)):

  with parallel_config(backend='threading', n_jobs=4, verbose = 0):

    output = Parallel()(delayed(TMIDIX_MIDI_Processor)(f) for f in filez1[i:i+16])

    for o in output:

        if o is not None:
            melody_chords_f1.append(o)

print('Done!')
print('=' * 70)

"""# (SAVE/LOAD ALL MIDI DATA)"""

# @title Save
print('=' * 70)
TMIDIX.Tegridy_Any_Pickle_File_Writer([melody_chords_f, melody_chords_f1], '/content/B_CLassi_ALL_MIDI_DATA')
print('=' * 70)

# @title Load
print('=' * 70)
melody_chords_f, melody_chords_f1 = TMIDIX.Tegridy_Any_Pickle_File_Reader('/content/B_CLassi_ALL_MIDI_DATA')
print('Done!')
print('=' * 70)

"""# (PREP ALL MIDI DATA)"""

# @title Prep data
classifier_seq_length_in_notes = 640 # @param {type:"slider", min:128, max:2048, step:128}
composition_sampling_steps_in_notes = 32 # @param {type:"slider", min:8, max:64, step:8}
equalize_data_splits = False # @param {type:"boolean"}

#===============================================================================

SEQ_LEN = classifier_seq_length_in_notes # In notes
STEP = composition_sampling_steps_in_notes # Composition sampling in notes
EQUALIZE_DATA_SPLITS = equalize_data_splits # Balancing equalization

#===============================================================================

gdata = []
bdata = []

data = []

good = 0
bad = 0

print('=' * 70)
print('Prepping data...')
print('=' * 70)

# Good data == 1
for m in tqdm(melody_chords_f):
  for i in range(0, len(m[0]), STEP):
    if len(m[0][i:i+SEQ_LEN]) == SEQ_LEN:
      gdata.append([m[0][i:i+SEQ_LEN], [1]])
      good += 1

# Bad data == 0
for m in tqdm(melody_chords_f1):
  for i in range(0, len(m[0]), STEP):
    if len(m[0][i:i+SEQ_LEN]) == SEQ_LEN:
      bdata.append([m[0][i:i+SEQ_LEN], [0]])
      bad += 1

random.shuffle(gdata)
random.shuffle(bdata)

if EQUALIZE_DATA_SPLITS:
  equal_data_size = min(len(gdata), len(bdata))
  data = gdata[:equal_data_size] + bdata[:equal_data_size]
  good = bad = equal_data_size

else:
  data = gdata + bdata

random.shuffle(data)

print('Done!')
print('=' * 70)
print('Total data size:', len(data))
print('Good data size:', good, '/', good / len(data))
print('Bad data size:', bad, '/',  bad / len(data))
print('Good to bad ratio:', good / bad)
print('=' * 70)

# @title Create training datasets and their splits
training_batch_size = 32 # @param {type:"slider", min:4, max:128, step:4}
train_dataset_size_ratio = 0.9 # @param {type:"slider", min:0.66, max:0.96, step:0.01}
validation_dataset_size_ratio = 0.02 # @param {type:"slider", min:0.01, max:0.3, step:0.01}
test_dataset_size_ratio = 0.02 # @param {type:"slider", min:0.01, max:0.3, step:0.01}

#===============================================================================

BATCH_SIZE = training_batch_size

TRAIN_DATASET_SIZE_RATIO = train_dataset_size_ratio
VAL_DATASET_SIZE_RATIO = validation_dataset_size_ratio
TEST_DATASET_SIZE_RATIO = test_dataset_size_ratio

#===============================================================================

def create_dataset(data, batch_size=16):
    # Separate the features and labels
    features = [lst[0] for lst in data]
    labels = [lst[1][0] for lst in data]

    # Convert the lists to TensorFlow tensors
    tensor_features = tf.constant(features, dtype=tf.float32)
    tensor_labels = tf.constant(labels, dtype=tf.int32)

    # Create a tf.data.Dataset from the tensors
    dataset = tf.data.Dataset.from_tensor_slices((tensor_features, tensor_labels))

    # Batch the dataset and discard incomplete batches
    batched_dataset = dataset.batch(batch_size, drop_remainder=True)

    return batched_dataset

#===============================================================================

print('=' * 70)
print('Creating B Classi Training Datasets...')

dataset = create_dataset(data, BATCH_SIZE)

train_size = int(len(dataset) * TRAIN_DATASET_SIZE_RATIO)
val_size = int(len(dataset) * TEST_DATASET_SIZE_RATIO)
test_size = int(len(dataset) * VAL_DATASET_SIZE_RATIO)

train = dataset.take(train_size)
val = dataset.skip(train_size).take(val_size)
test = dataset.skip(train_size+val_size).take(test_size)

print('Done!')
print('=' * 70)

"""# (CREATE MODEL)"""

# @title Create the model
model_size = 1024 # @param {type:"slider", min:512, max:2048, step:512}
model_dropout = 0.5 # @param {type:"slider", min:0, max:1, step:0.1}
model_kernel_size = 4 # @param {type:"slider", min:2, max:16, step:2}
model_pool_size = 2 # @param {type:"slider", min:2, max:8, step:1}

#=======================================================

MODEL_SIZE = model_size
DROPOUT = model_dropout
KERNEL_SIZE = model_kernel_size
POOL_SIZE = model_pool_size

#=======================================================

print('=' * 70)
print('Creating model...')

# Define the model
model = tf.keras.models.Sequential()

# Add a Conv1D layer
model.add(layers.Conv1D(MODEL_SIZE, KERNEL_SIZE, 1, activation='relu', input_shape=(SEQ_LEN, 1)))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(DROPOUT))

# Add a MaxPooling layer
model.add(layers.MaxPooling1D(pool_size=POOL_SIZE))

# Add a Conv1D layer
model.add(layers.Conv1D(int(MODEL_SIZE // 2), KERNEL_SIZE, 1, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(DROPOUT))

# Add a MaxPooling layer
model.add(layers.MaxPooling1D(pool_size=POOL_SIZE))

# Add a Conv1D layer
model.add(layers.Conv1D(int(MODEL_SIZE // 4), KERNEL_SIZE, 1, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(DROPOUT))

# Add a MaxPooling layer
model.add(layers.MaxPooling1D(pool_size=POOL_SIZE))

# Add a Conv1D layer
model.add(layers.Conv1D(int(MODEL_SIZE // 8), KERNEL_SIZE, 1, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(DROPOUT))

# Add a MaxPooling layer
model.add(layers.MaxPooling1D(pool_size=POOL_SIZE))

# Add a Conv1D layer
model.add(layers.Conv1D(int(MODEL_SIZE // 16), KERNEL_SIZE, 1, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(DROPOUT))

# Add a MaxPooling layer
model.add(layers.MaxPooling1D(pool_size=POOL_SIZE))

# Add a Flatten layer
model.add(layers.Flatten())
model.add(layers.Dense(MODEL_SIZE, activation='relu'))
model.add(layers.Dropout(DROPOUT))

# Add a Dense layer
model.add(layers.Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Tensorboard
logdir='LOGS'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

# Add early stopping
es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

print('=' * 70)
print('Done!')
print('=' * 70)

model.summary()
print('=' * 70)

"""# (TRAIN)"""

# @title Train the model
number_of_training_epochs = 10 # @param {type:"slider", min:1, max:15, step:1}

print('=' * 70)
print('Training...Please wait...')
print('=' * 70)

hist = model.fit(train, validation_data=val, batch_size=BATCH_SIZE, epochs=number_of_training_epochs, callbacks=[es_callback, tensorboard_callback])

print('=' * 70)
print('Done!')
print('=' * 70)

"""# (PLOT TRAINING RESULTS)"""

# @title Plot performance

print('=' * 70)
print('Plotting training results...')
print('=' * 70)

fig = plt.figure()
plt.plot(hist.history['loss'], color='teal', label='loss')
plt.plot(hist.history['val_loss'], color='orange', label='val_loss')
fig.suptitle('Loss', fontsize=20)
plt.legend(loc="upper left")
plt.show()

print('=' * 70)

fig = plt.figure()
plt.plot(hist.history['accuracy'], color='teal', label='accuracy')
plt.plot(hist.history['val_accuracy'], color='orange', label='val_accuracy')
fig.suptitle('Accuracy', fontsize=20)
plt.legend(loc="upper left")
plt.show()

print('=' * 70)

"""# (EVAL MODEL)"""

# @title Eval model

pre = Precision()
re = Recall()
acc = BinaryAccuracy()

print('=' * 70)
print('Evaluating model...')
print('=' * 70)

for batch in tqdm(test):
    X, y = batch
    yhat = model.predict(X, verbose=0)
    pre.update_state(y, yhat)
    re.update_state(y, yhat)
    acc.update_state(y, yhat)

print('Done!')
print('=' * 70)
print('Model Precision:', pre.result().numpy())
print('Model Recall:', re.result().numpy())
print('Model Accuracy:', acc.result().numpy())
print('=' * 70)

"""# (SAVE/LOAD MODEL)"""

#@title Save model

print('=' * 70)
print('Saving model...')
model.save(os.path.join('/content','B_CLassi_Pre_Trained_Model_' + str(round(pre.result().numpy(), 4)) + '_Precision' + '.keras'))
print('=' * 70)
print('Done!')
print('=' * 70)

# @title Load model
full_path_to_trained_model = "/content/B_CLassi_Pre_Trained_Model_0.9942_Precision.keras" # @param {type:"string"}
print('=' * 70)
print('Loading model...')
model = load_model(full_path_to_trained_model)
print('=' * 70)
print('Done!')
print('=' * 70)

"""# (CLASSIFY)

# (CUSTOM MIDI FILE)
"""

# @title Load custom MIDI
full_path_to_custom_MIDI_file = "/content/B-CLassi/Come To My Window.mid" # @param {type:"string"}
sampling_step_in_notes = 32 # @param {type:"slider", min:8, max:128, step:8}

print('=' * 70)
print('Loading custom MIDI file...')
print('=' * 70)

#=======================================================
# START PROCESSING

test_midi = [TMIDIX_MIDI_Processor(full_path_to_custom_MIDI_file)]

test_data = []

# Good data == 1
for m in tqdm(test_midi):
  for i in range(0, len(m[0]), sampling_step_in_notes):
    if len(m[0][i:i+SEQ_LEN]) == SEQ_LEN:
      test_data.append(m[0][i:i+SEQ_LEN])

# Separate the features and labels
features = [lst for lst in test_data]  # Corrected this line

# Convert the lists to TensorFlow tensors
tensor_features = tf.constant(features, dtype=tf.float32)

# Create a tf.data.Dataset from the tensors
test_dataset = tf.data.Dataset.from_tensor_slices(tensor_features)

# Batch the dataset and discard incomplete batches
batched_test_dataset = test_dataset.batch(BATCH_SIZE)

print('Done!')
print('=' * 70)

# @title Classify
good_classification_threshold = 0.5 # @param {type:"slider", min:0.1, max:1, step:0.05}

print('=' * 70)
print('Classifying...')
print('-' * 70)

yhat = model.predict(batched_test_dataset)

avg_label = round(sum([y[0] for y in yhat.tolist()]) / len(yhat.tolist()), 4)

print('=' * 70)

if avg_label > good_classification_threshold:
    print(f'Predicted class is GOOD')
else:
    print(f'Predicted class is BAD')

print('=' * 70)
print('First five labels:', [round(y[0], 4) for y in yhat.tolist() if y[0] >= good_classification_threshold][:5])
print('=' * 70)
print('Total number of classification labels:', len(yhat.tolist()))
print('=' * 70)
print('TOtal number of good classification labels:', len([y[0] for y in yhat.tolist() if y[0] >= good_classification_threshold]))
print('TOtal number of bad classification labels:', len([y[0] for y in yhat.tolist() if y[0] < good_classification_threshold]))
print('=' * 70)
print('Min/Max classification labels:', round(min(yhat.tolist())[0], 4), '/', round(max(yhat.tolist())[0], 4))
print('=' * 70)
print('Average classification label:', round(avg_label, 4))
print('=' * 70)

"""# (CUSTOM MIDI DATASET)"""

#@title Save file list

#@markdown Upload your custom MIDI dataset into EVAL_IN directory

###########

print('=' * 70)
print('Loading MIDI files...')
print('This may take a while on a large dataset in particular.')

dataset_addr = "/content/EVAL_IN"

# os.chdir(dataset_addr)
filez2 = list()
for (dirpath, dirnames, filenames) in os.walk(dataset_addr):
    filez2 += [os.path.join(dirpath, file) for file in filenames]
print('=' * 70)

if not filez2:
    print('Could not find any MIDI files. Please check Dataset dir...')
    print('=' * 70)

else:
  print('Randomizing file list...')
  random.shuffle(filez2)
  print('Done!')
  print('=' * 70)
  print('Total files:', len(filez2))
  print('=' * 70)

#@title Process MIDIs with TMIDIX MIDI processor
sampling_step_in_notes = 32 # @param {type:"slider", min:8, max:128, step:8}

print('=' * 70)
print('TMIDIX MIDI Processor')
print('=' * 70)
print('Starting up...')
print('=' * 70)

###########

melody_chords_f = []

print('Processing MIDI files. Please wait...')
print('=' * 70)

for i in tqdm(range(0, len(filez2), 16)):

  with parallel_config(backend='threading', n_jobs=4, verbose = 0):

    output = Parallel()(delayed(TMIDIX_MIDI_Processor)(f) for f in filez2[i:i+16])

    for o in output:

        if o is not None:
            melody_chords_f.append(o)

print('Done!')
print('=' * 70)
print('Finalizng MIDI data...')
print('=' * 70)

all_test_data = []

for m in tqdm(melody_chords_f):
  test_data = []
  for i in range(0, len(m[0]), sampling_step_in_notes):
    if len(m[0][i:i+SEQ_LEN]) == SEQ_LEN:
      test_data.append(m[0][i:i+SEQ_LEN])
  if test_data:
    all_test_data.append([test_data, m[1]])

print('Done!')
print('=' * 70)
print('Total number of processed files:', len(all_test_data))
print('=' * 70)

"""# (SAVE/LOAD CUSTOM MIDI DATASET DATA)"""

# @title Save
print('=' * 70)
TMIDIX.Tegridy_Any_Pickle_File_Writer(all_test_data, '/content/B_CLassi_CUSTOM_MIDI_DATASET_DATA')
print('=' * 70)

# @title Load
print('=' * 70)
all_test_data = TMIDIX.Tegridy_Any_Pickle_File_Reader('/content/B_CLassi_CUSTOM_MIDI_DATASET_DATA')
print('Done!')
print('=' * 70)

#@title Classify
good_classification_threshold = 0.5 # @param {type:"slider", min:0.1, max:1, step:0.05}

print('=' * 70)
print('Classifying...')
print('=' * 70)

predictions = []

for a in tqdm(all_test_data):

  features = [lst for lst in a[0]]  # Corrected this line
  tensor_features = tf.constant(features, dtype=tf.float32)
  test_dataset = tf.data.Dataset.from_tensor_slices(tensor_features)
  batched_test_dataset = test_dataset.batch(BATCH_SIZE)

  yhat = model.predict(batched_test_dataset, verbose=0)

  yhat_list = [y[0] for y in yhat.tolist()]

  avg_label = round((sum(yhat_list) / len(yhat_list)), 4)
  min_label, max_label = round(min(yhat_list), 4), round(max(yhat_list), 4)

  if avg_label > 0.5:
    good_or_bad = 1

  else:
    good_or_bad = 0

  predictions.append([a[1], good_or_bad, avg_label, min_label, max_label])

print('Done!')
print('=' * 70)
print('Total number of classified MIDI:', len(predictions))
print('=' * 70)
print('TOtal number of good MIDIs:', len([y[1] for y in predictions if y[1] >= good_classification_threshold]))
print('TOtal number of bad MIDIs:', len([y[1] for y in predictions if y[1] < good_classification_threshold]))
print('=' * 70)

# @title Copy classified MIDI splits into the output dir (EVAL_OUT)
good_classification_threshold = 0.5 # @param {type:"slider", min:0.1, max:1, step:0.05}

print('=' * 70)
print('Copying...')
print('=' * 70)

GOOD_THRESHOLD = good_classification_threshold

good = 0
bad = 0

for p in tqdm(predictions):

  src = '/content/EVAL_IN/' + p[0]

  if p[2] > GOOD_THRESHOLD:
    dest = '/content/EVAL_OUT/GOOD/' + p[0]
    good += 1
  else:
    dest = '/content/EVAL_OUT/BAD/' + p[0]
    bad += 1


  shutil.copy2(src, dest)

print('Done!')
print('=' * 70)
print('Good/bad files count:', good, '/', bad)
print('=' * 70)