-
Notifications
You must be signed in to change notification settings - Fork 0
/
spectral_decomposition.py
158 lines (145 loc) · 7.25 KB
/
spectral_decomposition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import numpy as np
import librosa
import matplotlib.pyplot as plt
from skimage.segmentation import mark_boundaries
class SpectralDecomposition(object):
""" decomposes the cough audio array into equally sized spectral components"""
def __init__(self, audio, sample_rate, num_components):
"""
Init function
:param audio: np.array((n,)), audio to be decomposed
:param sample_rate: int, sample rate of audio
:param num_components: int, number of components to be generated
"""
self.num_components = num_components
self.audio = audio
self.sample_rate = sample_rate
self.decomposition_type = 'spectral'
# components are stored in 3d numpy array of shape (num_components, 128, length_spectrogram)
self.initialize_components()
def get_number_components(self):
"""
:return: int, number of components generated during the decomposition
"""
return self.num_components
def initialize_components(self):
"""
caculates the spectrogram of the audio and divides audio into components along spectral axis
store the spectogram in components array of size (num_components, 128, n) with for each [num_component, :, :]
:return: nothing, stores components in self.components
"""
spectrogram = librosa.feature.melspectrogram(y=self.audio, sr=self.sample_rate, n_mels=128)
shape_components = (self.num_components,) + np.shape(spectrogram)
self.components = np.zeros(shape_components)
if 128 % self.num_components == 0:
len_component = 128 / self.num_components
for i in range(self.num_components):
self.components[i, i*len_component:(i+1)*len_component, :] = spectrogram[i*len_component:(i+1)*len_component, :]
else:
len_component = int(128 / self.num_components + 1)
for i in range(self.num_components - 1):
self.components[i, i*len_component:(i+1)*len_component, :] = spectrogram[i*len_component:(i+1)*len_component, :]
# last component
self.components[self.num_components-1, (self.num_components-1)*len_component:, :] = spectrogram[(self.num_components-1)*len_component, :]
# make function that returns the combined array for a mask input
def get_components_mask(self, mask):
"""
return components for a mask, set to original audio component for true and fudged for false
:param mask: 1D np.array of false and true
:return: concatenated fudged and original audio components
"""
# mask: array of false and true, length of num_components
# get components for true and fudged for false
if len(mask) != self.num_components:
print('Error: mask has incorrect length')
mask = np.array(mask)
combined_spec = np.sum(self.components[mask, :, :], axis=0)
reconstructed_audio = librosa.feature.inverse.mel_to_audio(combined_spec, sr=self.sample_rate)
return reconstructed_audio
def return_components(self, indices):
"""
return audio array for given component indices, all other components set to 0
:param indices: list of indices for which to return the original audio components
:return: 1d np audio array
"""
# make mask setting true for indices
mask = np.zeros((self.num_components,)).astype(bool)
mask[indices] = True
audio = self.get_components_mask(mask)
return audio
def return_spectrogram_indices(self, indices):
"""
returns the spectrogram with only the components indices specified
:param indices: list of indices for which to return the original spectral components
:return: 2d array, combined spectrogram with selected components only
"""
mask = np.zeros((self.num_components,)).astype(bool)
mask[indices] = True
combined_spec = np.sum(self.components[mask, :, :], axis=0)
return combined_spec
def return_mask_boundaries(self, positive_indices, negative_indices):
"""
calculates a mask for highlighting selected components in an image
:param positive_indices: indices of components with positive weights
:param negative_indices: indices of components with negative weights
:return: 2d array, set to 1 for components with positive weights and to -1 for negative weights
"""
mask = np.zeros(np.shape(self.components[0, :, :]), dtype=np.byte)
if 128 % self.num_components == 0:
len_component = 128 / self.num_components
else:
len_component = int(128 / self.num_components + 1)
for i in range(self.num_components):
if i in positive_indices:
mask[(i*len_component+1):((i+1)*len_component-1), 1:-1] = 1
elif i in negative_indices:
mask[(i*len_component+1):((i+1)*len_component-1), 1:-1] = -1
if 128 % self.num_components != 0:
# last component
if (self.num_components - 1) in positive_indices:
mask[((self.num_components-1)*len_component+1):-1, 1:-1] = 1
elif (self.num_components - 1) in negative_indices:
mask[((self.num_components-1)*len_component+1):-1, 1:-1] = -1
return mask
def return_weighted_components(self, used_features, weights):
"""
return audio with spectral components weighted according to their absolute importance
:param used_features: array of indices of features to include
:param weights: array of their corresponding weights
:return: 1d array with weighted audio
"""
# normalize weights
sum_weights = np.sum(np.abs(weights))
weights = weights / sum_weights
mask_weights = np.zeros((self.num_components,))
# make weighted sum
for index, feature in enumerate(used_features):
mask_weights[feature] = weights[index]
weighted_spectrogram = np.zeros(np.shape(self.components[0, :, :]))
for comp in range(self.num_components):
if mask_weights[comp] != 0:
weighted_spectrogram += abs(mask_weights[comp]) * self.components[comp, :, :]
reconstructed_audio = librosa.feature.inverse.mel_to_audio(weighted_spectrogram, sr=self.sample_rate)
return reconstructed_audio
def visualize_decomp(self, save_path=None):
"""
visualizes the generated spectral decomposition
:param save_path: if not None, path to save the generated figure
"""
spectrogram_indices = range(self.num_components)
mask = self.return_mask_boundaries(spectrogram_indices, [])
spectrogram = self.return_spectrogram_indices(spectrogram_indices)
spec_db = librosa.power_to_db(spectrogram, ref=np.max)
marked = mark_boundaries(spec_db, mask)
plt.imshow(marked[:, :, 2], origin="lower", cmap=plt.get_cmap("magma"))
plt.colorbar(format='%+2.0f dB')
plt.xlabel("Time")
plt.ylabel("Frequency")
ax = plt.gca()
ax.axes.xaxis.set_ticks([])
ax.axes.yaxis.set_ticks([])
plt.title("Spectral Decomposition")
if save_path is not None:
plt.savefig(save_path)
plt.show()
plt.close()