-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_dataloader.py
executable file
·289 lines (246 loc) · 12.8 KB
/
image_dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#!/usr/bin/python
# Author: Siddhartha Gairola (t-sigai at microsoft dot com)
import os
import random
import numpy as np
import cv2
import torch
from torch.utils.data import Dataset
import librosa
from tqdm import tqdm
from utils import *
class image_loader(Dataset):
def __init__(self, data_dir, folds_file, test_fold, train_flag, params_json, input_transform=None, stetho_id=-1, aug_scale=None):
# getting device-wise information
self.file_to_device = {}
device_to_id = {}
device_id = 0
files = os.listdir(data_dir)
device_patient_list = []
pats = []
for f in files:
device = f.strip().split('_')[-1].split('.')[0]
if device not in device_to_id:
device_to_id[device] = device_id
device_id += 1
device_patient_list.append([])
self.file_to_device[f.strip().split('.')[0]] = device_to_id[device]
pat = f.strip().split('_')[0]
if pat not in device_patient_list[device_to_id[device]]:
device_patient_list[device_to_id[device]].append(pat)
if pat not in pats:
pats.append(pat)
print("DEVICE DICT", device_to_id)
for idx in range(device_id):
print("Device", idx, len(device_patient_list[idx]))
# get patients dict in current fold based on train flag
all_patients = open(folds_file).read().splitlines()
patient_dict = {}
for line in all_patients:
idx, fold = line.strip().split(' ')
if train_flag and int(fold) != test_fold:
patient_dict[idx] = fold
elif train_flag == False and int(fold) == test_fold:
patient_dict[idx] = fold
#extracting the audiofilenames and the data for breathing cycle and it's label
print("Getting filenames ...")
filenames, rec_annotations_dict = get_annotations(data_dir)
if stetho_id >= 0:
self.filenames = [s for s in filenames if s.split('_')[0] in patient_dict and self.file_to_device[s] == stetho_id]
else:
self.filenames = [s for s in filenames if s.split('_')[0] in patient_dict]
self.audio_data = [] # each sample is a tuple with id_0: audio_data, id_1: label, id_2: file_name, id_3: cycle id, id_4: aug id, id_5: split id
self.labels = []
self.train_flag = train_flag
self.data_dir = data_dir
self.input_transform = input_transform
# parameters for spectrograms
self.sample_rate = 4000
self.desired_length = 8
self.n_mels = 64
self.nfft = 256
self.hop = self.nfft//2
self.f_max = 2000
# For debugging let's take self.dump_images = False and change the value with = True
self.dump_images = True
self.filenames_with_labels = []
# get individual breathing cycles from each audio file
print("Exracting Individual Cycles")
self.cycle_list = []
self.classwise_cycle_list = [[], [], [], []]
for idx, file_name in tqdm(enumerate(self.filenames)):
data = get_sound_samples(rec_annotations_dict[file_name], file_name, data_dir, self.sample_rate)
cycles_with_labels = [(d[0], d[3], file_name, cycle_idx, 0) for cycle_idx, d in enumerate(data[1:])]
self.cycle_list.extend(cycles_with_labels)
for cycle_idx, d in enumerate(cycles_with_labels):
self.filenames_with_labels.append(file_name+'_'+str(d[3])+'_'+str(d[1]))
self.classwise_cycle_list[d[1]].append(d)
# concatenation based augmentation scheme
if train_flag and aug_scale:
self.new_augment(scale=aug_scale)
# split and pad each cycle to the desired length
for idx, sample in enumerate(self.cycle_list):
output = split_and_pad(sample, self.desired_length, self.sample_rate, types=1)
self.audio_data.extend(output)
self.device_wise = []
for idx in range(device_id):
self.device_wise.append([])
self.class_probs = np.zeros(4)
self.identifiers = []
for idx, sample in enumerate(self.audio_data):
self.class_probs[sample[1]] += 1.0
self.labels.append(sample[1])
self.identifiers.append(sample[2]+'_'+str(sample[3])+'_'+str(sample[1]))
self.device_wise[self.file_to_device[sample[2]]].append(sample)
if self.train_flag:
print("TRAIN DETAILS")
else:
print("TEST DETAILS")
print("CLASSWISE SAMPLE COUNTS:", self.class_probs)
print("Device to ID", device_to_id)
for idx in range(device_id):
print("DEVICE ID", idx, "size", len(self.device_wise[idx]))
self.class_probs = self.class_probs / sum(self.class_probs)
print("CLASSWISE PROBS", self.class_probs)
print("LEN AUDIO DATA", len(self.audio_data))
def new_augment(self, scale=1):
# augment normal
aug_nos = scale*len(self.classwise_cycle_list[0]) - len(self.classwise_cycle_list[0])
for idx in range(aug_nos):
# normal_i + normal_j
i = random.randint(0, len(self.classwise_cycle_list[0])-1)
j = random.randint(0, len(self.classwise_cycle_list[0])-1)
normal_i = self.classwise_cycle_list[0][i]
normal_j = self.classwise_cycle_list[0][j]
new_sample = np.concatenate([normal_i[0], normal_j[0]])
self.cycle_list.append((new_sample, 0, normal_i[2]+'-'+normal_j[2],
idx, 0))
self.filenames_with_labels.append(normal_i[2]+'-'+normal_j[2]+'_'+str(idx)+'_0')
# augment crackle
aug_nos = scale*len(self.classwise_cycle_list[0]) - len(self.classwise_cycle_list[1])
for idx in range(aug_nos):
aug_prob = random.random()
if aug_prob < 0.6:
# crackle_i + crackle_j
i = random.randint(0, len(self.classwise_cycle_list[1])-1)
j = random.randint(0, len(self.classwise_cycle_list[1])-1)
sample_i = self.classwise_cycle_list[1][i]
sample_j = self.classwise_cycle_list[1][j]
elif aug_prob >= 0.6 and aug_prob < 0.8:
# crackle_i + normal_j
i = random.randint(0, len(self.classwise_cycle_list[1])-1)
j = random.randint(0, len(self.classwise_cycle_list[0])-1)
sample_i = self.classwise_cycle_list[1][i]
sample_j = self.classwise_cycle_list[0][j]
else:
# normal_i + crackle_j
i = random.randint(0, len(self.classwise_cycle_list[0])-1)
j = random.randint(0, len(self.classwise_cycle_list[1])-1)
sample_i = self.classwise_cycle_list[0][i]
sample_j = self.classwise_cycle_list[1][j]
new_sample = np.concatenate([sample_i[0], sample_j[0]])
self.cycle_list.append((new_sample, 1, sample_i[2]+'-'+sample_j[2],
idx, 0))
self.filenames_with_labels.append(sample_i[2]+'-'+sample_j[2]+'_'+str(idx)+'_1')
# augment wheeze
aug_nos = scale*len(self.classwise_cycle_list[0]) - len(self.classwise_cycle_list[2])
for idx in range(aug_nos):
aug_prob = random.random()
if aug_prob < 0.6:
# wheeze_i + wheeze_j
i = random.randint(0, len(self.classwise_cycle_list[2])-1)
j = random.randint(0, len(self.classwise_cycle_list[2])-1)
sample_i = self.classwise_cycle_list[2][i]
sample_j = self.classwise_cycle_list[2][j]
elif aug_prob >= 0.6 and aug_prob < 0.8:
# wheeze_i + normal_j
i = random.randint(0, len(self.classwise_cycle_list[2])-1)
j = random.randint(0, len(self.classwise_cycle_list[0])-1)
sample_i = self.classwise_cycle_list[2][i]
sample_j = self.classwise_cycle_list[0][j]
else:
# normal_i + wheeze_j
i = random.randint(0, len(self.classwise_cycle_list[0])-1)
j = random.randint(0, len(self.classwise_cycle_list[2])-1)
sample_i = self.classwise_cycle_list[0][i]
sample_j = self.classwise_cycle_list[2][j]
new_sample = np.concatenate([sample_i[0], sample_j[0]])
self.cycle_list.append((new_sample, 2, sample_i[2]+'-'+sample_j[2],
idx, 0))
self.filenames_with_labels.append(sample_i[2]+'-'+sample_j[2]+'_'+str(idx)+'_2')
# augment both
aug_nos = scale*len(self.classwise_cycle_list[0]) - len(self.classwise_cycle_list[3])
for idx in range(aug_nos):
aug_prob = random.random()
if aug_prob < 0.5:
# both_i + both_j
i = random.randint(0, len(self.classwise_cycle_list[3])-1)
j = random.randint(0, len(self.classwise_cycle_list[3])-1)
sample_i = self.classwise_cycle_list[3][i]
sample_j = self.classwise_cycle_list[3][j]
elif aug_prob >= 0.5 and aug_prob < 0.7:
# crackle_i + wheeze_j
i = random.randint(0, len(self.classwise_cycle_list[1])-1)
j = random.randint(0, len(self.classwise_cycle_list[2])-1)
sample_i = self.classwise_cycle_list[1][i]
sample_j = self.classwise_cycle_list[2][j]
elif aug_prob >=0.7 and aug_prob < 0.8:
# wheeze_i + crackle_j
i = random.randint(0, len(self.classwise_cycle_list[2])-1)
j = random.randint(0, len(self.classwise_cycle_list[1])-1)
sample_i = self.classwise_cycle_list[2][i]
sample_j = self.classwise_cycle_list[1][j]
elif aug_prob >=0.8 and aug_prob < 0.9:
# both_i + normal_j
i = random.randint(0, len(self.classwise_cycle_list[3])-1)
j = random.randint(0, len(self.classwise_cycle_list[0])-1)
sample_i = self.classwise_cycle_list[3][i]
sample_j = self.classwise_cycle_list[0][j]
else:
# normal_i + both_j
i = random.randint(0, len(self.classwise_cycle_list[0])-1)
j = random.randint(0, len(self.classwise_cycle_list[3])-1)
sample_i = self.classwise_cycle_list[0][i]
sample_j = self.classwise_cycle_list[3][j]
new_sample = np.concatenate([sample_i[0], sample_j[0]])
self.cycle_list.append((new_sample, 3, sample_i[2]+'-'+sample_j[2],
idx, 0))
self.filenames_with_labels.append(sample_i[2]+'-'+sample_j[2]+'_'+str(idx)+'_3')
def __getitem__(self, index):
audio = self.audio_data[index][0]
aug_prob = random.random()
if self.train_flag and aug_prob > 0.5:
# apply augmentation to audio
audio = gen_augmented(audio, self.sample_rate)
# pad incase smaller than desired length
audio = split_and_pad([audio, 0,0,0,0], self.desired_length, self.sample_rate, types=1)[0][0]
# roll audio sample
roll_prob = random.random()
if self.train_flag and roll_prob > 0.5:
audio = rollAudio(audio)
# convert audio signal to spectrogram
# spectrograms resized to 3x of original size
audio_image = cv2.cvtColor(create_mel_raw(audio, self.sample_rate, f_max=self.f_max,
n_mels=self.n_mels, nfft=self.nfft, hop=self.hop, resz=3), cv2.COLOR_BGR2RGB)
# blank region clipping
audio_raw_gray = cv2.cvtColor(create_mel_raw(audio, self.sample_rate, f_max=self.f_max,
n_mels=self.n_mels, nfft=self.nfft, hop=self.hop), cv2.COLOR_BGR2GRAY)
audio_raw_gray[audio_raw_gray < 10] = 0
for row in range(audio_raw_gray.shape[0]):
black_percent = len(np.where(audio_raw_gray[row,:]==0)[0])/len(audio_raw_gray[row,:])
if black_percent < 0.80:
break
if (row+1)*3 < audio_image.shape[0]:
audio_image = audio_image[(row+1)*3:, :, :]
audio_image = cv2.resize(audio_image, (audio_image.shape[1], self.n_mels*3), interpolation=cv2.INTER_LINEAR)
if self.dump_images:
save_images((audio_image, self.audio_data[index][2], self.audio_data[index][3],
self.audio_data[index][5], self.audio_data[index][1]), self.train_flag)
# label
label = self.audio_data[index][1]
# apply image transform
if self.input_transform is not None:
audio_image = self.input_transform(audio_image)
return audio_image, label
def __len__(self):
return len(self.audio_data)