-
Notifications
You must be signed in to change notification settings - Fork 0
/
dog_breed_VGG16.py
159 lines (120 loc) · 5.07 KB
/
dog_breed_VGG16.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
from google.colab import drive
drive.mount('/content/drive')
!pip install -q keras
import keras
import keras
from keras.layers import Input, Dense, Flatten, Dropout
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
import os, fnmatch
from skimage import io, transform
import numpy as np
from tqdm import tqdm
import pandas as pd
import shutil
vgg16 = keras.applications.vgg16.VGG16(include_top=False, weights=None,
# use weights='imagenet'
input_tensor=None, input_shape=(224,224,3))
vgg16_full = keras.applications.vgg16.VGG16(include_top=True, weights=None,
# use weights='imagenet'
input_tensor=None, input_shape=(224,224,3))
fc1_layer = vgg16_full.get_layer("fc1")
fc1_layer
fc2_layer = vgg16_full.get_layer("fc2")
fc2_layer
labels_csv = pd.read_csv('/content/drive/My Drive/labels.csv')
breeds = pd.Series(labels_csv['breed'])
filenames = pd.Series(labels_csv['id'])
breeds.head(5)
from google.colab import drive
drive.mount('/content/drive')
unique_breeds = np.unique(breeds)
labels = []
for breed in breeds:
i = np.where(unique_breeds == breed)[0][0]
labels.append(i)
n_breeds = np.max(labels) + 1
labels = np.eye(n_breeds)[labels]
print(unique_breeds)
filenames_train = []
filenames_validate = []
# move to validate folder
for i in tqdm(range(len(filenames))):
label = unique_breeds[np.where(labels[i]==1.)][0]
filename = '{}.jpg'.format(filenames[i])
if i < 8000:
new_dir = '/content/drive/My Drive/sorted/train/{}/'.format(label)
filenames_train.append(new_dir + filename)
else:
new_dir = '/content/drive/My Drive/sorted/validate/{}/'.format(label)
filenames_validate.append(new_dir + filename)
if not os.path.exists(new_dir):
os.makedirs(new_dir)
shutil.copy("/content/drive/My Drive/train/{}.jpg".format(filenames[i]), new_dir + filename)
#We need to sort the filenames and labels array because ImageGenerator fetches the images alphabettic order.
indices_train = np.argsort(filenames_train)
indices_val = np.argsort(filenames_validate)
sorted_filenames_train = np.array(filenames_train)[indices_train]
sorted_filenames_validate = np.array(filenames_validate)[indices_val]
sorted_labels_train = np.array(labels)[0:8000][indices_train]
sorted_labels_validate = np.array(labels)[8000:][indices_val]
#Check if the sorting is correct.
print(unique_breeds[np.where(sorted_labels_train[50] == 1.)])
# should be equal to:
print(sorted_filenames_train[50])
def preprocess(img):
input_img = preprocess_input(np.expand_dims(img, axis=0))
return input_img[0]
train_datagen = ImageDataGenerator(preprocessing_function=preprocess)
val_datagen =ImageDataGenerator(preprocessing_function==preprocess)
def preprocess(img):
input_img = preprocess_input(np.expand_dims(img, axis=0))
return input_img[0]
train_datagen = ImageDataGenerator(preprocessing_function=preprocess)
val_datagen = ImageDataGenerator(preprocessing_function=preprocess)
batch_size = 64
train_gen = train_datagen.flow_from_directory("/content/drive/My Drive/sorted/train",
batch_size=batch_size,
target_size = (224,224),shuffle = False)
val_gen = val_datagen.flow_from_directory("/content/drive/My Drive/sorted/validate",
batch_size=batch_size,
target_size=(224, 224),
shuffle=False)
#Generate Bottleneck features¶
#I only execute one step here because of the limited running time.
x_train = vgg16.predict_generator(train_gen,
#steps=8000,
steps=1,
verbose=1)
x_val = vgg16.predict_generator(val_gen,
#steps=2222,
steps=1,
verbose=1)
y_train = sorted_labels_train[0:len(x_train)]
y_val = sorted_labels_validate[0:len(x_val)]
# need quite high dropout to make the model overfit less.
inputs = Input(shape=(7,7,512))
# Turn off training vgg16
for layer in vgg16.layers:
layer.trainable = False
fc1_layer.trainable = False
x = Flatten()(inputs)
x = fc1_layer(x)
x = BatchNormalization()(x)
x = Dropout(0.8)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.8)(x)
x = Dense(120, activation='softmax')(x)
model = Model(inputs=inputs, outputs=x)
model.summary()
model.compile(optimizer=keras.optimizers.Adam(),
loss=keras.losses.categorical_crossentropy,
metrics=['accuracy'])
history = model.fit(x_train, y_train, batch_size=128, epochs=30, verbose=1,
validation_data=(x_val, y_val))