-
Notifications
You must be signed in to change notification settings - Fork 0
/
perception.py
executable file
·212 lines (165 loc) · 7.19 KB
/
perception.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import numpy as np
import pandas as pd
import os
import dlib
import cv2
from sklearn.metrics.pairwise import cosine_similarity
import torch
from torch import nn
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms as tfms
from PIL import Image
from tqdm import tqdm
def train_detector(train_data, filename='models/detector.svm'):
'''Trains an object detector (HOG + SVM) and saves the model'''
# Seperate the images and bounding boxes in different lists.
images = [val[0] for val in train_data.values()]
bounding_boxes = [val[1] for val in train_data.values()]
# Initialize object detector Options
options = dlib.simple_object_detector_training_options()
options.add_left_right_image_flips = False
options.C = 5
# Train the model
detector = dlib.train_simple_object_detector(images, bounding_boxes, options)
# Check results
results = dlib.test_simple_object_detector(images, bounding_boxes, detector)
print(f'Training Results: {results}')
# Save model
detector.save(filename)
print(f'Saved the model to {filename}')
class ShapeClassifier(nn.Module):
'''Simple CNN based Image Classifier for Shapes (circle | rectangle)'''
def __init__(self):
super().__init__()
self.conv = nn.Sequential(nn.Conv2d(1, 28, 3, stride=2, padding=1),
nn.BatchNorm2d(28),
nn.Conv2d(28, 28, 3, stride=2, padding=1),
nn.BatchNorm2d(28),
nn.Conv2d(28, 28, 3, stride=2, padding=1),
nn.BatchNorm2d(28))
self.fc = nn.Linear(700, 1)
def forward(self, x):
'''Forward Pass'''
# batch_size (N)
N = x.size()[0]
# Extract features with CNN
x = self.conv(x)
# Classifier head
x = self.fc(x.reshape(N, -1))
return x
def train_classifier(self, train_loader, lr=0.0001, epochs=10, filename='models/classifier.pth', device=None):
'''Train the shape classifier'''
# Automatically set device if not provided
if device is None:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Mount to device
self.to(device)
# Create optimizer and loss function
optimizer = torch.optim.Adam(self.parameters(), lr=lr)
criterion = nn.BCEWithLogitsLoss()
self.train()
# Start Training
for epoch in range(epochs):
pbar = tqdm(total=len(train_loader), desc='Epoch {}'.format(epoch+1))
losses = []
for i, (image, label) in enumerate(train_loader):
# Mount to device
image, label = image.to(device).float(), label.to(device)
# Forward prop
out = self(image)
# Loss
loss = criterion(out.squeeze(1), label.float())
# Backprop and Optimization
loss.backward()
optimizer.step()
# Verbose
losses.append(loss.item())
pbar.update(1)
pbar.set_postfix({'Loss': loss.item()})
print(f'Epoch {epoch+1}: Mean Loss = {sum(losses)/len(losses)}')
pbar.close()
# Save model
torch.save(self.state_dict(), filename)
class Binarize(object):
def __init__(self):
'''Converts Grayscale to Binary (except white every other color is zeroed)'''
pass
def __call__(self, img_tensor):
'''
Args:
img_tensor (tensor): 0-1 scaled tensor with 1 channel
Returns:
tensor
'''
return (img_tensor > 0.95).float()
class PerceptionPipe():
'''
Full Perception Pipeline i.e.
detector -> attribute extraction -> structural scene representation
'''
def __init__(self, detector_file, classifer_file, device='cpu'):
# Object detector
self.detector = dlib.simple_object_detector(detector_file)
# Shape Classifier
self.classifier = ShapeClassifier().to(device)
self.classifier.load_state_dict(torch.load(classifer_file))
self.device = device
self.colors = np.array([[0,0,255], [0,255,0], [255,0,0],
[0,156,255], [128,128,128], [0,255,255]])
self.idx2color = {0: 'red', 1: 'green', 2: 'blue', 3: 'orange', 4: 'gray', 5: 'yellow'}
self.preproc = tfms.Compose([tfms.Grayscale(),
tfms.Resize((40, 40)),
tfms.ToTensor(),
Binarize()])
def detect(self, img):
'''Detects and Returns Objects and its centers'''
# Detect
detections = self.detector(img)
objects = []
for detection in detections:
# Get the bbox coords
x1, y1 = int(detection.left()), int(detection.top())
x2, y2 = int(detection.right()), int(detection.bottom())
# Clip negative values to zero
x1, y1, x2, y2 = np.array([x1, y1, x2, y2]).clip(min=0).tolist()
# Find the center
center = (int((x1+x2)/2), int((y1+y2)/2))
# Crop the individual object
obj = img[y1:y2, x1:x2]
objects.append((obj, center))
return objects
def extract_attributes(self, x_img, prob=0.5, debug=False):
'''Returns the shape and color of a given object'''
# Load image as PIL instance (color image)
image = Image.fromarray(cv2.cvtColor(x_img, cv2.COLOR_BGR2RGB))
# Preprocess (binarized image)
img = self.preproc(image).unsqueeze(0).to(self.device)
# Predict Shape
with torch.no_grad():
out = torch.sigmoid(self.classifier(img)).squeeze()
if debug:
print(out)
if out < prob:
shape = 'circle'
else:
shape = 'rectangle'
# Extract Color
center_pixel = (x_img[20, 20, :]).astype('int')
color_id = cosine_similarity(center_pixel.reshape(1, -1), self.colors).argmax()
color = self.idx2color[color_id]
# print(center_pixel)
# print(color_id)
return shape, color
def scene_repr(self, img, prob=0.5, debug=False):
'''Returns a structured scene representation as a dataframe'''
# Perform object detection and get the objects
objects = self.detect(img)
# Init Scene representation
scene_df = pd.DataFrame(columns=['shape', 'color', 'position'])
for obj, center in objects:
shape, color = self.extract_attributes(obj, prob, debug)
scene_df = pd.concat([scene_df, pd.DataFrame([{'shape': shape,
'color': color,
'position': center}])])
return scene_df