-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathvisual_bow.py
218 lines (163 loc) · 7.47 KB
/
visual_bow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# Ian London 2016
# tools to run a Visual Bag of Words classifier on any images
import cv2
import numpy as np
import glob
import os
print 'OpenCV VERSION (should be 3.1.0 or later, with nonfree modules installed!):', cv2.__version__
def read_image(path):
img = cv2.imread(path)
if img is None:
raise IOError("Unable to open '%s'. Are you sure it's a valid image path?")
return img
def neg_img_cal101(positive_folder, cal101_root='101_ObjectCategories', image_suffix='*.jpg'):
"""Simply return list of paths for all images in cal101 dataset, except those in positive_folder."""
return [path for path in glob.glob(cal101_root + '/*/' + image_suffix) if positive_folder not in path]
def binary_labeled_img_from_cal101(positive_folder, cal101_root='101_ObjectCategories', image_suffix='*.jpg'):
"""
Generate a balanced dataset of positive and negative images from a directory of images
where each type of image is separated in its own folder.
Returns:
--------
labeled_img_paths: list of lists
Of the form [[image_path, label], ...]
Where label is True or False for positive and negative images respectively
"""
all_imgs = set(glob.glob(cal101_root + '/*/' + image_suffix))
pos_imgs = set(glob.glob(os.path.join(cal101_root, positive_folder) + '/' + image_suffix))
neg_imgs = all_imgs - pos_imgs
neg_sample_size = len(pos_imgs)
selected_negs = np.random.choice(list(neg_imgs), size=neg_sample_size, replace=False)
print '%i positive, %i negative images selected (out of %i negatives total)' % (
len(pos_imgs), len(selected_negs), len(neg_imgs))
labeled_img_paths = [[path, True] for path in pos_imgs] + [[path, False] for path in selected_negs]
return np.array(labeled_img_paths)
def train_test_val_split_idxs(total_rows, percent_test, percent_val):
"""
Get indexes for training, test, and validation rows, given a total number of rows.
Assumes indexes are sequential integers starting at 0: eg [0,1,2,3,...N]
Returns:
--------
training_idxs, test_idxs, val_idxs
Both lists of integers
"""
if percent_test + percent_val >= 1.0:
raise ValueError('percent_test and percent_val must sum to less than 1.0')
row_range = range(total_rows)
no_test_rows = int(total_rows*(percent_test))
test_idxs = np.random.choice(row_range, size=no_test_rows, replace=False)
# remove test indexes
row_range = [idx for idx in row_range if idx not in test_idxs]
no_val_rows = int(total_rows*(percent_val))
val_idxs = np.random.choice(row_range, size=no_val_rows, replace=False)
# remove validation indexes
training_idxs = [idx for idx in row_range if idx not in val_idxs]
print 'Train-test-val split: %i training rows, %i test rows, %i validation rows' % (len(training_idxs), len(test_idxs), len(val_idxs))
return training_idxs, test_idxs, val_idxs
def gen_sift_features(labeled_img_paths):
"""
Generate SIFT features for images
Parameters:
-----------
labeled_img_paths : list of lists
Of the form [[image_path, label], ...]
Returns:
--------
img_descs : list of SIFT descriptors with same indicies as labeled_img_paths
y : list of corresponding labels
"""
# img_keypoints = {}
img_descs = []
print 'generating SIFT descriptors for %i images' % len(labeled_img_paths)
for img_path, label in labeled_img_paths:
img = read_image(img_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
sift = cv2.xfeatures2d.SIFT_create()
kp, desc = sift.detectAndCompute(gray, None)
# img_keypoints[img_path] = kp
img_descs.append(desc)
print 'SIFT descriptors generated.'
y = np.array(labeled_img_paths)[:,1]
return img_descs, y
def cluster_features(img_descs, training_idxs, cluster_model):
"""
Cluster the training features using the cluster_model
and convert each set of descriptors in img_descs
to a Visual Bag of Words histogram.
Parameters:
-----------
X : list of lists of SIFT descriptors (img_descs)
training_idxs : array/list of integers
Indicies for the training rows in img_descs
cluster_model : clustering model (eg KMeans from scikit-learn)
The model used to cluster the SIFT features
Returns:
--------
X, cluster_model :
X has K feature columns, each column corresponding to a visual word
cluster_model has been fit to the training set
"""
n_clusters = cluster_model.n_clusters
# # Generate the SIFT descriptor features
# img_descs = gen_sift_features(labeled_img_paths)
#
# # Generate indexes of training rows
# total_rows = len(img_descs)
# training_idxs, test_idxs, val_idxs = train_test_val_split_idxs(total_rows, percent_test, percent_val)
# Concatenate all descriptors in the training set together
training_descs = [img_descs[i] for i in training_idxs]
all_train_descriptors = [desc for desc_list in training_descs for desc in desc_list]
all_train_descriptors = np.array(all_train_descriptors)
if all_train_descriptors.shape[1] != 128:
raise ValueError('Expected SIFT descriptors to have 128 features, got', all_train_descriptors.shape[1])
print '%i descriptors before clustering' % all_train_descriptors.shape[0]
# Cluster descriptors to get codebook
print 'Using clustering model %s...' % repr(cluster_model)
print 'Clustering on training set to get codebook of %i words' % n_clusters
# train kmeans or other cluster model on those descriptors selected above
cluster_model.fit(all_train_descriptors)
print 'done clustering. Using clustering model to generate BoW histograms for each image.'
# compute set of cluster-reduced words for each image
img_clustered_words = [cluster_model.predict(raw_words) for raw_words in img_descs]
# finally make a histogram of clustered word counts for each image. These are the final features.
img_bow_hist = np.array(
[np.bincount(clustered_words, minlength=n_clusters) for clustered_words in img_clustered_words])
X = img_bow_hist
print 'done generating BoW histograms.'
return X, cluster_model
def perform_data_split(X, y, training_idxs, test_idxs, val_idxs):
"""
Split X and y into train/test/val sets
Parameters:
-----------
X : eg, use img_bow_hist
y : corresponding labels for X
training_idxs : list/array of integers used as indicies for training rows
test_idxs : same
val_idxs : same
Returns:
--------
X_train, X_test, X_val, y_train, y_test, y_val
"""
X_train = X[training_idxs]
X_test = X[test_idxs]
X_val = X[val_idxs]
y_train = y[training_idxs]
y_test = y[test_idxs]
y_val = y[val_idxs]
return X_train, X_test, X_val, y_train, y_test, y_val
def img_to_vect(img_path, cluster_model):
"""
Given an image path and a trained clustering model (eg KMeans),
generates a feature vector representing that image.
Useful for processing new images for a classifier prediction.
"""
img = read_image(img_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
sift = cv2.xfeatures2d.SIFT_create()
kp, desc = sift.detectAndCompute(gray, None)
clustered_desc = cluster_model.predict(desc)
img_bow_hist = np.bincount(clustered_desc, minlength=cluster_model.n_clusters)
# reshape to an array containing 1 array: array[[1,2,3]]
# to make sklearn happy (it doesn't like 1d arrays as data!)
return img_bow_hist.reshape(1,-1)