-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathactionCLSS_dataset_partitioned.py
276 lines (244 loc) · 13.6 KB
/
actionCLSS_dataset_partitioned.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import matplotlib
matplotlib.use('TkAgg')
import utils
import random
import glob, os
# import math
import cv2
import csv
# from PIL import Image, ImageFont, ImageDraw, ImageEnhance
import numpy as np
import matplotlib.pyplot as plt
# import matplotlib.patches as patches
# import matplotlib.lines as lines
# from matplotlib.patches import Polygon
# import IPython.display
class ShapesDatasetPartitioned(utils.Dataset):
"""Generates the shapes synthetic dataset. The dataset consists of simple
shapes (triangles, squares, circles) placed randomly on a blank surface.
The images are generated on the fly. No file access required.
"""
with open('validationList.txt') as file:
reserved_for_val = [video.replace('\n', '') for video in file.readlines()]
with open('testList.txt') as file:
reserved_for_test = [video.replace('\n', '') for video in file.readlines()]
def load_train_shapes(self):
# Training is True if the dataset is for training, False if it's for validation set
"""Generate the requested number of synthetic images.
count: number of images to generate.
height, width: the size of the generated images.
"""
# Add classes
self.add_class("sport", 1, "WalkingWithDog")
self.add_class("sport", 2, "BasketballDunk")
self.add_class("sport", 3, "Biking")
self.add_class("sport", 4, "CliffDiving")
self.add_class("sport", 5, "CricketBowling")
self.add_class("sport", 6, "Diving")
self.add_class("sport", 7, "Fencing")
self.add_class("sport", 8, "FloorGymnastics")
self.add_class("sport", 9, "GolfSwing")
self.add_class("sport", 10, "HorseRiding")
self.add_class("sport", 11, "IceDancing")
self.add_class("sport", 12, "LongJump")
self.add_class("sport", 13, "PoleVault")
self.add_class("sport", 14, "RopeClimbing")
self.add_class("sport", 15, "SalsaSpin")
self.add_class("sport", 16, "SkateBoarding")
self.add_class("sport", 17, "Skiing")
self.add_class("sport", 18, "Skijet")
self.add_class("sport", 19, "SoccerJuggling")
self.add_class("sport", 20, "Surfing")
self.add_class("sport", 21, "TennisSwing")
self.add_class("sport", 22, "TrampolineJumping")
self.add_class("sport", 23, "VolleyballSpiking")
self.add_class("sport", 24, "Basketball")
# Select ALL IMAGES
root_labels = 'ucf24_project/labels'
root_images = 'ucf24_project/rgb-images'
for activity in [act['name'] for act in self.class_info if not act['name'] == 'BackGround']:
for video in [vid for vid in os.listdir(root_labels + '/' + activity) if
not vid.endswith('.DS_Store')
and vid not in self.reserved_for_test
and vid not in self.reserved_for_val]:
for frame in [f for f in os.listdir(root_labels + '/' + activity + '/' + video) if
f.endswith('.txt')]:
URL_label = root_labels + '/' + activity + '/' + video + '/' + frame
URL_image = root_images + '/' + activity + '/' + video + '/' + frame.replace('.txt', '.jpg')
# open the file with the labels (class and bbox)
with open(URL_label) as file:
lines = file.readlines()
bounding_boxes = [str(line).replace('\n', '').split(' ') for line in lines]
# Orrible cast from string matrix to int matrix
for i in range(len(bounding_boxes)):
bounding_boxes[i] = [int(float(element)) for element in bounding_boxes[i]]
# add image
self.add_image("sport", image_id=i, path=URL_image,
width=320, height=240,
bbox=bounding_boxes, action=activity)
def load_val_shapes(self):
# Training is True if the dataset is for training, False if it's for validation set
"""Generate the requested number of synthetic images.
count: number of images to generate.
height, width: the size of the generated images.
"""
# Add classes
self.add_class("sport", 1, "WalkingWithDog")
self.add_class("sport", 2, "BasketballDunk")
self.add_class("sport", 3, "Biking")
self.add_class("sport", 4, "CliffDiving")
self.add_class("sport", 5, "CricketBowling")
self.add_class("sport", 6, "Diving")
self.add_class("sport", 7, "Fencing")
self.add_class("sport", 8, "FloorGymnastics")
self.add_class("sport", 9, "GolfSwing")
self.add_class("sport", 10, "HorseRiding")
self.add_class("sport", 11, "IceDancing")
self.add_class("sport", 12, "LongJump")
self.add_class("sport", 13, "PoleVault")
self.add_class("sport", 14, "RopeClimbing")
self.add_class("sport", 15, "SalsaSpin")
self.add_class("sport", 16, "SkateBoarding")
self.add_class("sport", 17, "Skiing")
self.add_class("sport", 18, "Skijet")
self.add_class("sport", 19, "SoccerJuggling")
self.add_class("sport", 20, "Surfing")
self.add_class("sport", 21, "TennisSwing")
self.add_class("sport", 22, "TrampolineJumping")
self.add_class("sport", 23, "VolleyballSpiking")
self.add_class("sport", 24, "Basketball")
# Select ALL IMAGES
root_labels = 'ucf24_project/labels'
root_images = 'ucf24_project/rgb-images'
for activity in [act['name'] for act in self.class_info if not act['name'] == 'BackGround']:
for video in [vid for vid in os.listdir(root_labels + '/' + activity) if
not vid.endswith('.DS_Store')
and vid in self.reserved_for_val]:
for frame in [f for f in os.listdir(root_labels + '/' + activity + '/' + video) if
f.endswith('.txt')]:
URL_label = root_labels + '/' + activity + '/' + video + '/' + frame
URL_image = root_images + '/' + activity + '/' + video + '/' + frame.replace('.txt', '.jpg')
# open the file with the labels (class and bbox)
with open(URL_label) as file:
lines = file.readlines()
bounding_boxes = [str(line).replace('\n', '').split(' ') for line in lines]
# Orrible cast from string matrix to int matrix
for i in range(len(bounding_boxes)):
bounding_boxes[i] = [int(float(element)) for element in bounding_boxes[i]]
# add image
self.add_image("sport", image_id=i, path=URL_image,
width=320, height=240,
bbox=bounding_boxes, action=activity)
def load_test_shapes(self):
# Training is True if the dataset is for training, False if it's for validation set
"""Generate the requested number of synthetic images.
count: number of images to generate.
height, width: the size of the generated images.
"""
# Add classes
self.add_class("sport", 1, "WalkingWithDog")
self.add_class("sport", 2, "BasketballDunk")
self.add_class("sport", 3, "Biking")
self.add_class("sport", 4, "CliffDiving")
self.add_class("sport", 5, "CricketBowling")
self.add_class("sport", 6, "Diving")
self.add_class("sport", 7, "Fencing")
self.add_class("sport", 8, "FloorGymnastics")
self.add_class("sport", 9, "GolfSwing")
self.add_class("sport", 10, "HorseRiding")
self.add_class("sport", 11, "IceDancing")
self.add_class("sport", 12, "LongJump")
self.add_class("sport", 13, "PoleVault")
self.add_class("sport", 14, "RopeClimbing")
self.add_class("sport", 15, "SalsaSpin")
self.add_class("sport", 16, "SkateBoarding")
self.add_class("sport", 17, "Skiing")
self.add_class("sport", 18, "Skijet")
self.add_class("sport", 19, "SoccerJuggling")
self.add_class("sport", 20, "Surfing")
self.add_class("sport", 21, "TennisSwing")
self.add_class("sport", 22, "TrampolineJumping")
self.add_class("sport", 23, "VolleyballSpiking")
self.add_class("sport", 24, "Basketball")
# Select ALL IMAGES
root_labels = 'ucf24_project/labels'
root_images = 'ucf24_project/rgb-images'
for activity in [act['name'] for act in self.class_info if not act['name'] == 'BackGround']:
for video in [vid for vid in os.listdir(root_labels + '/' + activity) if
not vid.endswith('.DS_Store')
and vid in self.reserved_for_test]:
for frame in [f for f in os.listdir(root_labels + '/' + activity + '/' + video) if
f.endswith('.txt')]:
URL_label = root_labels + '/' + activity + '/' + video + '/' + frame
URL_image = root_images + '/' + activity + '/' + video + '/' + frame.replace('.txt', '.jpg')
# open the file with the labels (class and bbox)
with open(URL_label) as file:
lines = file.readlines()
bounding_boxes = [str(line).replace('\n', '').split(' ') for line in lines]
# Orrible cast from string matrix to int matrix
for i in range(len(bounding_boxes)):
bounding_boxes[i] = [int(float(element)) for element in bounding_boxes[i]]
# add image
self.add_image("sport", image_id=i, path=URL_image,
width=320, height=240,
bbox=bounding_boxes, action=activity)
def load_image(self, image_id):
info = self.image_info[image_id]
image = plt.imread(info['path'])
return image
def image_reference(self, image_id):
"""Return the shapes data of the image."""
info = self.image_info[image_id]
if info["source"] == "shapes":
return info["shapes"]
else:
super(self.__class__).image_reference(self, image_id)
def load_mask(self, image_id):
"""Generate instance masks for shapes of the given image ID.
"""
info = self.image_info[image_id]
shapes = info['bbox']
masks_path = info['path'].replace(".jpg", "mask_").replace("rgb-images", "labels")
count = len(shapes)
# si crea una matrice 3D per ogni bbox. La terza dimensione è il numero di bounding.
# ogni maschera è un area di 0 su un blocco di 1 grosso come l'immagine (320x240)
mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
for i, (action, x1, y1, x2, y2) in enumerate(info['bbox']):
mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(), x1, y1, x2, y2, 1).reshape(240, 320, 1)
# se ci sono le maschere...
if os.path.exists(masks_path + "0.jpg"):
mask_counter, intersection_max = 0, -1
# per ogni maschera
while os.path.exists(masks_path + str(mask_counter) + ".jpg"):
mask_counter = mask_counter + 1
# leggo la maschera, essendo in jpg è compressa, ci sta che non tutti i valori siano 255
# uso ">128" per filtrarla -> ottengo una matrice binaria
current_mask = plt.imread(masks_path + "0.jpg") > 128
# Calcolo l'intersezione col boundingbox e la percentuale di maschera dentro questo
intersection = mask[:, :, i:i + 1] * current_mask.reshape(240, 320, 1)
# the factors *1 and /255 needed in order to had a normalized score
intersection_score = sum(sum(intersection / 255)) / sum(sum(current_mask * 1))
# Se ho un nuovo massimo, metto da parte l'intersezione trovata
if intersection_score > intersection_max:
intersection_max = intersection_score
final_mask = current_mask
# Se ho avuto almeno un intersezione con il bbox
if intersection_max > 0.1:
# uncomment to save a mask, but seems good
# print(intersection_max)
aMask = mask[:, :, i:i + 1] * final_mask.reshape(240, 320, 1)
# plt.imsave("imgSaved/"+str(intersection_max)+"m.png", aMask.reshape(240, 320))
# plt.imsave("imgSaved/"+str(intersection_max)+"i.png", plt.imread(info['path']))
# plt.imsave("imgSaved/"+str(intersection_max)+"b.png", mask[:, :, i:i+1].reshape(240, 320))
# aggiorno la maschera
mask[:, :, i:i + 1] = mask[:, :, i:i + 1] * final_mask.reshape(240, 320, 1)
# prima poteva accadere che due maschere in scena fossero di classi diverse (e.g. tondo e quadrato)
# ora avremo che le maschere in scena fanno tutte parte della stessa attività
class_ids = np.array([self.class_names.index(info['action']) for i in range(count)])
return mask, class_ids.astype(np.int32)
def draw_shape(self, image, x1, y1, x2, y2, color):
"""Draws a shape from the given specs."""
# Get the center x, y and the size s
cv2.rectangle(image, (x1, y1), (x2, y2), 255, -1)
image = image.reshape(240, 320)
return image