-
Notifications
You must be signed in to change notification settings - Fork 19
/
datasets.py
91 lines (68 loc) · 3.37 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import random
import numpy as np
import pandas as pd
from keras.applications import imagenet_utils
from scipy.misc.pilutil import imread
from params import args
from sklearn.model_selection import train_test_split
import sklearn.utils
from random_transform_mask import ImageWithMaskFunction
import os
def pad(image, padding_w, padding_h):
batch_size, height, width, depth = image.shape
# @TODO: Avoid creating new array
new_image = np.zeros((batch_size, height + padding_h * 2, width + padding_w * 2, depth), dtype=image.dtype)
new_image[:, padding_h:(height + padding_h), padding_w:(width + padding_w)] = image
# @TODO: Fill padded zones
# new_image[:, :padding_w] = image[:, :padding_w]
# new_image[:padding_h, :] = image[:padding_h, :]
# new_image[-padding_h:, :] = image[-padding_h:, :]
return new_image
def unpad(image, padding_w):
return image[:, :, padding_w:(image.shape[1] - padding_w), :]
def generate_filenames(car_ids):
return ['{}_{}.jpg'.format(id, str(angle + 1).zfill(2)) for angle in range(16) for id in car_ids]
def bootstrapped_split(car_ids, seed=args.seed):
"""
# Arguments
metadata: metadata.csv provided by Carvana (should include
`train` column).
# Returns
A tuple (train_ids, test_ids)
"""
all_ids = pd.Series(car_ids)
train_ids, valid_ids = train_test_split(car_ids, test_size=args.test_size_float,
random_state=seed)
np.random.seed(seed)
bootstrapped_idx = np.random.random_integers(0, len(train_ids))
bootstrapped_train_ids = train_ids[bootstrapped_idx]
return generate_filenames(bootstrapped_train_ids.values), generate_filenames(valid_ids)
def build_batch_generator(filenames, img_dir=None, batch_size=None,
shuffle=False, transformations=None,
out_size=None, crop_size=None, mask_dir=None, aug=False):
mask_function = ImageWithMaskFunction(out_size=out_size, crop_size=crop_size, mask_dir=mask_dir)
while True:
# @TODO: Should we fixate the seed here?
if shuffle:
filenames = sklearn.utils.shuffle(filenames)
for start in range(0, len(filenames), batch_size):
batch_x = []
end = min(start + batch_size, len(filenames))
train_batch = filenames[start:end]
for filename in train_batch:
img = imread(os.path.join(img_dir, filename))
stacked_channels = []
for i in range(args.stacked_channels):
channel_path = os.path.join(args.stacked_channels_dir,
str(i),
filename.replace('.jpg', '.png'))
stacked_channel = imread(channel_path, mode='L')
stacked_channels.append(stacked_channel)
stacked_img = np.dstack((img, *stacked_channels))
batch_x.append(stacked_img)
batch_x = np.array(batch_x, np.float32)
batch_x, masks = mask_function.mask_pred(batch_x, train_batch, range(batch_size), aug)
if crop_size is None:
# @TODO: Remove hardcoded padding
batch_x, masks = pad(batch_x, 1, 0), pad(masks, 1, 0)
yield imagenet_utils.preprocess_input(batch_x, mode=args.preprocessing_function), masks