-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
224 lines (178 loc) · 9.16 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import scipy
import time
def load_image(filepath: str) -> np.array:
try:
img=Image.open(filepath)
print("Image loaded succesfully")
return np.moveaxis(np.array(img), 0, 1)
except Exception as e:
print("Error loading image: {e}")
return None
def view_image(image: np.array, title: str = None) -> None:
print("Shape of the image being displayed:",image.shape)
plt.imshow(image)
if title is not None:
plt.title(title)
plt.show()
def create_gaussian_kernel(size:int, stddev:float) -> np.array:
'''
This function generates a 1D Gaussian distribution using numpy "linspace".
'''
# Generate `size` points between -stddev and stddev for a symmetric Gaussian distribution
x = np.linspace(-size//2, size//2, size)
# Compute the 1D Gaussian distribution using the PDF of the normal distribution
gaussian_1d = scipy.stats.norm.pdf(x, loc=0, scale=stddev)
# Normalize to make the sum equal to 1
gaussian_1d /= gaussian_1d.sum()
# Create the 2D Gaussian kernel by taking the outer product of the 1D Gaussian with itself
gaussian_kernel_2d = np.outer(gaussian_1d, gaussian_1d)
return gaussian_kernel_2d
def convolve2d(x: np.array, h: np.array) -> np.array:
"""
x: a 2d or 3d np.array representing your image
h: a 2d np.array representing your kernel
"""
start = time.time()
print("Computing convolution... may take a few minutes...")
print("At this point, just quit the program... it took 10 minutes on my machine, its probably going to take just as long on yours...")
print("Move on to the FFT Convolution")
if x.ndim == 3:
channels = [scipy.signal.convolve2d(x[:, :, i], h, mode="same", boundary="wrap") for i in range(x.shape[-1])]
y = np.stack(channels, axis=-1)
else:
assert x.ndim == 2
y = scipy.signal.convolve2d(x, h, mode="same", boundary="wrap")
stop = time.time()
print(f"Convolution took {stop - start} seconds")
return y.astype(np.int32)
def fft_conv2d(x: np.array, h:np.array) -> np.array:
"""
The trick of computing convolutions quickly: Fourier transform your signal and kernel
to the frequency domain, and perform a multiplication instead!
x: np.array[H, W, C]: A 3d tensor representing an image
h: np.array[K, K]: a kernel you want to convolve with the image
"""
start1 = time.time()
# For multiplication, we want our signal and kernel to have the same spatial dimensions.
# For the kernel, simply zero-pad it until it reaches the desired shape
h_padded = np.zeros_like(x[:, :, 0]).astype(h.dtype)
startY = (x.shape[0] - h.shape[0])//2
startX = (x.shape[1] - h.shape[1])//2
h_padded[startY:startY + h.shape[0], startX:startX + h.shape[1]] += h
# 2D FFT. We first swap the axis of X, so that it goes from [H, W, C] to [C, H, W].
# This is in accordance to the documentation for np.fft.fft2
X = np.fft.fft2(np.moveaxis(x, -1, 0))
H = np.fft.fft2(h_padded)
stop1 = time.time()
# visualizations are important!
view_image(np.moveaxis(np.real(X), 0, -1), "FT of image")
view_image(np.real(H), "FT of kernel")
start2 = time.time()
# Convolution theorem: FT{response} = FT{signal} x TRANSFER FUNCTION
Y = X * H[None, :, :] # This is still in the form [C, H, W]
# Inverse transform
y = np.fft.ifft2(Y)
# This part will shift all the frequencies back to the right place, since discrete fourier transforms
# are not centered around the origin. Uncommenting this equation will result in shifted colours in the
# resulting image
y = np.stack(
[np.fft.fftshift(y[i]) for i in range(y.shape[0])],
axis=0)
y = np.moveaxis(y, 0, -1)
stop2 = time.time()
print(f"FFT Convolution took {stop2 - start2 + stop1 - start1} seconds")
return np.real(y).astype(np.int32)
def olivia_fft_conv2d(x: np.array, h:np.array) -> np.array:
"""
Now its your turn! Be careful of the axes, as we will move the channel axis around and back...
The trick of computing convolutions quickly: Fourier transform your signal and kernel
to the frequency domain, and perform a multiplication instead!
x: np.array[H, W, C]: A 3d tensor representing an image
h: np.array[K, K]: a kernel you want to convolve with the image
"""
# For multiplication, we want our signal and kernel to have the same spatial dimensions.
# For the kernel, simply zero-pad it until it reaches the desired shape
###### The discrete Fourier transform is a circular convolution; thus, values wrap around and may create artifacts on the edge of our image unless we zero pad it.
x_shape= np.shape(x)
h_shape=np.shape(h)
'''
Below was my initial (naive sad onerous) approach.
#while x-dimension of arrays h and x are different, alternate padding on right and left of h. Check in between R and L if sizes are equal.
while x_shape[0] != h_shape[0]:
h= np.pad(h, ((1,0),(0,0)), 'constant', constant_values=0) #pad the left of the array
h_shape=np.shape(h)
if x_shape[0] == h_shape[0]:
break
h=np.pad(h, ((0,1),(0,0)), 'constant', constant_values=0) #pad the right of the array
h_shape=np.shape(h)
#same for y-dimension
while x_shape[1] != h_shape[1]:
h=np.pad(h, ((0,0),(1,0)), 'constant', constant_values=0) #pad the left of the array
h_shape=np.shape(h)
if x_shape[1] == h_shape[1]:
break
h=np.pad(h, ((0,0),(0,1)), 'constant', constant_values=0) #pad the right of the array
h_shape=np.shape(h)
'''
#Separately calculate how many rows/cols are needed to pad onto h
pad_x = x_shape[0] - h_shape[0]
pad_y = x_shape[1] - h_shape[1]
pad_right = pad_x//2
pad_left = pad_x - pad_right
pad_up = pad_y//2
pad_down = pad_y - pad_up
h_padded=np.pad(h, ((pad_right, pad_left), (pad_up, pad_down)), 'constant', constant_values=0)
# 2D FFT. We first swap the axis of X, so that it goes from [H, W, C] to [C, H, W].
# This is in accordance to the documentation for np.fft.fft2
###### We swap the axis because np.fft.fft2 automatically performs the FT on the last two dimensions.
x_swapped_axis = np.moveaxis(x, -1, 0)
X = np.fft.fft2(x_swapped_axis) # dimensions CxHxW, because Channels unimportant rn
H = np.fft.fft2(h_padded) # h_padded is already dimensions HxW
# visualizations are important!
###### When viewing X and H, be cognizant of two things:
############ 1. We only want to view the real portion (because thats the amplitude of our frequency).
############ 2. We need to swap X axis again to H,W,C because it's common practice for Channels (color) to be the third dimension.
img_X = np.moveaxis(np.real(X), 0, -1)
img_H = np.real(H)
view_image(img_X, "Olivia's FT of image!")
view_image(img_H, "Olivia's FT of kernel!")
# Convolution theorem: FT{response} = FT{signal} x TRANSFER FUNCTION
####### We want H to be the same dimensions as X so that we can do element-wise multiplication.
####### Note that this is NOT matrix multiplication. Numpy has different syntax for that.
Y = X * H[None,:,:] #
# Inverse transform!
y= np.fft.ifft2(Y)
# This part will shift all the frequencies back to the right place, since discrete fourier transforms are not centered around the origin. Uncommenting this equation will result in shifted colours in the resulting image
###### Remember that it is still C, H, W. We want to shift the frequencies back to normal so that our colors appear normally.
###### Remember that the first channel is for colors? Iterate through the range of the colors.
## QUESTION: in this case, its 3, right? (RGB?)
color_channel_dimension = y.shape[0]
arrays=[np.fft.fftshift(y[i]) for i in range(color_channel_dimension)] # y[3, H, W]: y[i, :, :] -> y[H, W]
y=np.stack(arrays, axis=0) #axis=0 refers to the first dimension. i elements in y.shape[0] will lead to resulting y array having dimension (i,H,W)
# Dont forget that y is still arranged as C H W! Let's put that color channel back where it belongs.
y=np.moveaxis(y, 0,-1)
## Yay! Return your array, the convolved image! Remember: for viewing the image, we only care about real elements.
## Note: integers represent colors, etc. If we don't include the conversion to int32, we see a blank white picture!
return np.real(y).astype(np.int32)
def main():
filepath = "temp_test_photo.JPG"
img=load_image(filepath)
view_image(img, "original image")
print("gaussian kernel visualization")
h_gaussian = create_gaussian_kernel(size=100, stddev=30)
view_image(h_gaussian, "gaussian kernel")
# Comment this out if you don't want to wait 10 minutes....
#result = convolve2d(img, h_gaussian)
#view_image(result, "convolution of kernel with image")
'''
result = fft_conv2d(img, h_gaussian)
view_image(result)
'''
################ Olivia Testing Zone!
result= olivia_fft_conv2d(img, h_gaussian)
view_image(result, "Olivia's final image")
if __name__ == "__main__":
main()