-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathms21_generate_yaml.py
331 lines (280 loc) · 12.8 KB
/
ms21_generate_yaml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# from multiprocessing import Process
from multiprocessing.dummy import Pool as ThreadPool
import yaml
import re
import os, errno
import soundfile as sf
import numpy as np
import pandas as pd
import json
import pyloudnorm as pyln
import shutil
import sys
from rule_based_mixing import inst_spec_mix
# Reference: https://github.com/SiddGururani/mixing_secrets/blob/master/generate_yaml.py
def gen_yaml(directory, move_raw = True):
csv_anno = pd.read_csv(anno_file_path)
hierarchy_file = json.load(open(hierarchy_path, 'r'))
print(directory)
track_df = csv_anno.loc[csv_anno['Music_Title']==directory]
artist = track_df['Artist'].values[0]
song = track_df['Track_Name'].values[0]
ID = directory
yaml_obj = init_medley_yaml()
yaml_obj['csv_anno_path'] = anno_file_path
yaml_obj['hieararchy_file_path'] = hierarchy_path
yaml_obj['artist'] = artist
yaml_obj['genre'] = track_df['Sub_Genre'].values[0]
yaml_obj['title'] = song
if track_df["Vocal_Quality"].values[0] == 1 :
yaml_obj['vocal_has_bleed'] = 'no'
else:
yaml_obj['vocal_has_bleed'] = 'yes' # check vocal has bleed or not
vocal = np.any([i != '[]' for i in track_df.T.loc["Lead_Vocal":"Backing_Vocal"].values] )
if vocal:
yaml_obj['instrumental'] = 'no'
else:
yaml_obj['instrumental'] = 'yes'
yaml_obj['mix_filename'] = ID+'_MIX.wav'
yaml_obj['origin'] = 'Mixing Secrets'
yaml_obj['raw_dir'] = ID+'_RAW'
yaml_obj['stem_dir'] = ID+'_STEMS'
yaml_obj['version'] = '3.0'
make_dir(os.path.join(save_path, ID))
make_dir(os.path.join(save_path, ID, ID+'_RAW'))
if os.path.exists(os.path.join(save_path, ID, ID+'_STEMS')):
shutil.rmtree(os.path.join(save_path, ID, ID+'_STEMS'))
make_dir(os.path.join(save_path, ID, ID+'_STEMS'))
make_dir(os.path.join(save_path, ID, ID+'_STEMS', 'Inst'))
make_dir(os.path.join(save_path, ID, ID+'_STEMS', 'MUSDB'))
# if os.path.isfile(os.path.join(save_path, ID, ID+'_METADATA.yaml')):
# # Write code here to fix the drum tracks by adding the room mic to the drum stem.
# print('Metadata exists')
# return
# Get all track paths
# all_tracks = os.listdir(os.path.join(base_path, directory))
# all_tracks = [os.path.join(base_path, directory, track) for track in all_tracks if track.endswith('.wav')]
# uncomment to use the tracks in RAW folder (updating based on the raw tracks in ms21DB)
all_tracks = os.listdir(os.path.join(base_path, directory, directory+'_RAW')) # modify whether to use the tracks in RAW folder or not
all_tracks = [os.path.join(base_path, directory, directory+'_RAW', track) for track in all_tracks if track.endswith('.wav')]
# Make stems for drums, sfx, loops and synths
# TODO loudness normalization should be considered
for inst, tracks_name in hierarchy_file["mix"]["track2inst"].items():
# uncomment to use the tracks in RAW folder (updating based on the raw tracks in ms21DB)
make_stem(yaml_obj, os.path.join(save_path, ID, ID+'_STEMS', 'Inst'), os.path.join(base_path, directory, directory+'_RAW'), track_df, tracks_name, inst, ID+f'_STEM_Inst_{inst}.wav')
# make_stem(yaml_obj, os.path.join(save_path, ID, ID+'_STEMS', 'Inst'), os.path.join(base_path, directory), track_df, tracks_name, inst, ID+f'_STEM_Inst_{inst}.wav')
for stem, inst_name in hierarchy_file["mix"]["inst2stem"].items():
make_stem(yaml_obj, os.path.join(save_path, ID, ID+'_STEMS', 'MUSDB'), os.path.join(save_path, ID, ID+'_STEMS', 'Inst'), track_df, inst_name, stem, ID+f'_STEM_MUSDB_{stem}.wav')
# create mix file
make_mix(yaml_obj, os.path.join(save_path, ID, ID+'_STEMS', 'MUSDB'), os.path.join(save_path, ID), yaml_obj['mix_filename'])
# Move all raw files to RAW folder. Default False
if move_raw == True:
copy_raw_tracks(all_tracks, os.path.join(save_path, ID, ID+'_RAW'))
# Write YAML
f = open(os.path.join(save_path, ID, ID+'_METADATA.yaml'),'w')
yaml.dump(yaml_obj, f, default_flow_style=False)
f.close()
def init_medley_yaml():
object = {}
object['album'] = ''
object['artist'] = ''
object['composer'] = ''
object['excerpt'] = ''
object['genre'] = ''
object['has_bleed'] = ''
object['instrumental'] = ''
object['mix_filename'] = ''
object['mix_integrated_loudness'] = ''
object['origin'] = ''
object['producer'] = ''
object['raw_dir'] = ''
object['stem_dir'] = ''
object['stems_Inst'] = {}
object['stems_MUSDB'] = {}
object['title'] = ''
object['version'] = ''
object['website'] = ''
object['csv_anno_path'] = ''
object['hieararchy_file_path'] = ''
return object
def make_mix(obj,stems_path, directory_path, file_name):
# get all stem tracks
tracks = [os.path.join(stems_path, i) for i in os.listdir(stems_path)]
# print(tracks)
if len(tracks) == 0:
print('Empty track list sent for mix creation')
return
y, sr = sf.read(tracks[0], always_2d=True)
if y.shape[1] != 2:
y = np.repeat(y,2, axis=1)
for i in range(len(tracks) - 1):
y_add = sf.read(tracks[i+1], always_2d=True)[0]
if y_add.shape[1] != 2:
y_add = np.repeat(y_add, 2, axis=1)
# l = y.shape[1]
# l_add = y_add.shape[1]
# if l > l_add:
# y_add = np.pad(y_add, (0, l - l_add), 'constant')
# elif l < l_add:
# y = np.pad(y, (0, l_add - l), 'constant')
y += y_add
y, loudness, types = loudness_normalization(y, sr, 'mix', -25)
obj['mix_integrated_loudness'] = types + f'{loudness:.4f}' + ' LUFS'
path_to_write = os.path.join(directory_path, file_name)
sf.write(path_to_write, y, sr)
print("Successfully output mix.wav", path_to_write)
def make_stem(obj, stems_path, directory_path, track_df, inst_names, stem_inst_name, file_name):
tracks = []
if 'MUSDB' in stems_path:
# Add stem to yaml object
count = len(obj['stems_MUSDB'])
if count+1 < 10:
count = '0'+str(count+1)
else:
count = str(count+1)
obj['stems_MUSDB']['S'+count] = {}
obj['stems_MUSDB']['S'+count]['component'] = ''
obj['stems_MUSDB']['S'+count]['filename'] = file_name
obj['stems_MUSDB']['S'+count]['instrument'] = stem_inst_name
obj['stems_MUSDB']['S'+count]['raw'] = {}
for i, name in enumerate(inst_names):
for wav in os.listdir(directory_path):
# print(f"name: {name}; wav: {wav}")
if wav.split('_Inst_')[-1] == name + '.wav':
# print(f"adding {wav} into stem {stem_inst_name}")
tracks.append(os.path.join(directory_path, wav))
if i < 10:
raw_count = '0'+str(i+1)
else:
raw_count = str(i+1)
obj['stems_MUSDB']['S'+count]['raw']['R'+raw_count] = {}
obj['stems_MUSDB']['S'+count]['raw']['R'+raw_count]['filename'] = wav
obj['stems_MUSDB']['S'+count]['raw']['R'+raw_count]['instrument'] = name
if len(tracks) == 0:
# print('Empty track list sent for stem creation')
# delete the empty stem dict
obj['stems_MUSDB'].pop('S'+count)
return
else:
# Add stem to yaml object
count = len(obj['stems_Inst'])
if count+1 < 10:
count = '0'+str(count+1)
else:
count = str(count+1)
obj['stems_Inst']['S'+count] = {}
obj['stems_Inst']['S'+count]['component'] = ''
obj['stems_Inst']['S'+count]['filename'] = file_name
obj['stems_Inst']['S'+count]['instrument'] = stem_inst_name
obj['stems_Inst']['S'+count]['raw'] = {}
for idx, name in enumerate(inst_names):
wav_lists = track_df[name].tolist()[0]
if wav_lists == '[]':
continue
for i, wav in enumerate(wav_lists.strip('[]').split(', ')):
wav = wav.strip("'")
if i < 10:
raw_count = '0'+str(i+1)
else:
raw_count = str(i+1)
obj['stems_Inst']['S'+count]['raw']['R'+raw_count] = {}
obj['stems_Inst']['S'+count]['raw']['R'+raw_count]['filename'] = wav
obj['stems_Inst']['S'+count]['raw']['R'+raw_count]['instrument'] = name
tracks.append(os.path.join(directory_path, wav))
if len(tracks) == 0:
# print('Empty track list sent for stem creation')
# delete the empty stem dict
obj['stems_Inst'].pop('S'+count)
return
if stem_inst_name == 'backing_vocal':
print("updating new backing vocal track")
y, loudness, types = inst_spec_mix(tracks, stem_inst_name, threshold=-60)
sr = 44100
else:
y, sr = sf.read(tracks[0], always_2d=True)
if y.shape[1] != 2:
y = np.repeat(y,2, axis=1)
for i in range(len(tracks) - 1):
y_add = sf.read(tracks[i+1], always_2d=True)[0]
if y_add.shape[1] != 2:
y_add = np.repeat(y_add, 2, axis=1)
y += y_add
y, loudness, types = loudness_normalization(y, sr, stem_inst_name, -25)
if 'MUSDB' in stems_path:
obj['stems_MUSDB']['S'+count]['loudness'] = types + f'{loudness:.4f}' + ' LUFS'
else:
obj['stems_Inst']['S'+count]['loudness'] =types + f'{loudness:.4f}' + ' LUFS'
path_to_write = os.path.join(stems_path, file_name)
sf.write(path_to_write, y, sr)
def add_rem_tracks(obj, save_path, rem_tracks):
for i, track in enumerate(rem_tracks):
track_name = os.path.split(track)[1]
inst_name = get_instrument_from_track_name(track_name)
make_stem(obj, save_path, [track], inst_name, track_name)
def get_instrument_from_track_name(track_name):
track_name = track_name.strip('.wav')
regex = r"(\d*_)([a-zA-Z\D]*)"
match = re.findall(regex, track_name)
inst_name = '_'.join([x for (_,x) in match])
return inst_name
def find_all_instruments(base_path):
instruments = set()
regex = r"(\d*_)([a-zA-Z\D]*)"
for x in os.listdir(base_path):
for track in os.listdir(os.path.join(base_path, x)):
if track.endswith(".wav"):
try:
track_name = track.strip('.wav')
match = re.findall(regex, track_name)
inst_name = '_'.join([x for (_,x) in match])
instruments.add(inst_name)
except:
print(track)
return instruments
def copy_raw_tracks(tracks, destination):
for track in tracks:
track_name = os.path.split(track)[1]
shutil.copyfile(track, os.path.join(destination, track_name))
def make_dir(directory):
try:
os.makedirs(directory)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def loudness_normalization(data, rate, stem_inst_name, target_loudness=-20.0):
if stem_inst_name in ['nontonal_percussion', 'drum_set']:
# peak normalize audio to -1 dB
normalized_audio = pyln.normalize.peak(data, -1.0)
meter = pyln.Meter(rate) # create BS.1770 meter
return normalized_audio, meter.integrated_loudness(normalized_audio), 'PEAK'
else:
# measure the loudness first
meter = pyln.Meter(rate) # create BS.1770 meter
loudness = meter.integrated_loudness(data)
normalized_audio = pyln.normalize.loudness(data, loudness, target_loudness)
return normalized_audio, meter.integrated_loudness(normalized_audio), 'INTEGRATED'
if __name__ == "__main__":
root_path = sys.argv[1] # '/media/felix/dataset/ms21/train'
out_path = sys.argv[2] # '/media/felix/dataset/ms21_DB
# print(output_path)
os.makedirs(out_path,exist_ok=True)
anno_file_path = './mixing_secret_dataset_annotation_with_comment.csv'
hierarchy_path = './hierarchy.json'
threads = int(sys.argv[3])
arg_list = []
for split in os.listdir(root_path):
# if split not in ['train']: # modify to quick update the dataset if better automatic mixing algorithm is provided
# continue
pool = ThreadPool(threads)
base_path = os.path.join(root_path, split)
save_path = os.path.join(out_path,split)
os.makedirs(save_path, exist_ok=True)
residual_path_list = []
for i in os.listdir(base_path):
if not os.path.exists(os.path.join(save_path,i,i+'_METADATA.yaml')):
residual_path_list.append(i)
print(residual_path_list)
with pool:
pool.map(gen_yaml, residual_path_list)
# for i in residual_path_list: # not using multi-threads to debug
# gen_yaml(i)