-
Notifications
You must be signed in to change notification settings - Fork 31
/
preprocess.py
109 lines (82 loc) · 3.29 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import shutil
from pathlib import Path, PurePath
from PIL import Image
from sklearn.model_selection import train_test_split
from util import RAW_PATH, TRAIN_PATH, TEST_PATH, clean_mkdir, ROWS, COLS, IMAGES_PATH
def generate_samples(image_path, data_path):
"""
Split up a larger image into correctly sized chucks for the model.
Args:
image_path:
data_path:
"""
output_path = str(Path(data_path) / RAW_PATH)
clean_mkdir(output_path)
filename = PurePath(image_path).stem
img_src = Image.open(image_path)
rows = img_src.height
cols = img_src.width
count = 0
# iterate starting X
for i in range(0, cols - COLS - 1, COLS / 2):
# iterate starting Y
for j in range(0, rows - ROWS - 1, ROWS / 2):
img_out = img_src.crop((i, j, i + ROWS, j + COLS))
img_out.save("{}/{}_{:05d}.jpg".format(output_path, filename, count))
count += 1
img_out = img_out.rotate(90)
img_out.save("{}/{}_{:05d}.jpg".format(output_path, filename, count))
count += 1
img_out = img_out.rotate(90)
img_out.save("{}/{}_{:05d}.jpg".format(output_path, filename, count))
count += 1
img_out = img_out.rotate(90)
img_out.save("{}/{}_{:05d}.jpg".format(output_path, filename, count))
count += 1
def split_sets(data_path: Path):
input_path = data_path / RAW_PATH
train_path = data_path / TRAIN_PATH
test_path = data_path / TEST_PATH
clean_mkdir(str(train_path))
clean_mkdir(str(test_path))
filenames = []
for filename in os.listdir(str(input_path)):
if filename.endswith(".jpg"):
filenames.append(filename)
train_filenames, test_filenames = train_test_split(
filenames, train_size=0.8, test_size=0.2
)
for filename in train_filenames:
shutil.copyfile(str(input_path / filename), str(train_path / filename))
for filename in test_filenames:
shutil.copyfile(str(input_path / filename), str(test_path / filename))
def generate_dirty(data_path: Path):
"""
Generate the X values by downsampling clean imagery.
Args:
data_path:
"""
input_train_path = data_path / "train_labels"
input_test_path = data_path / "test_labels"
output_train_path = data_path / "train"
output_test_path = data_path / "test"
clean_mkdir(str(output_train_path))
clean_mkdir(str(output_test_path))
for file in os.listdir(str(input_train_path)):
img = Image.open(str(input_train_path / file))
temp = img.resize((int(ROWS / 2), int(COLS / 2)), Image.BILINEAR)
temp = temp.resize((ROWS, COLS), Image.BILINEAR)
temp.save(str(output_train_path / file))
for file in os.listdir(str(input_test_path)):
img = Image.open(str(input_test_path / file))
temp = img.resize((int(ROWS / 2), int(COLS / 2)), Image.BILINEAR)
temp = temp.resize((ROWS, COLS), Image.BILINEAR)
temp.save(str(output_test_path / file))
def preprocess_dataset(data_path):
data_path = Path(data_path)
images_path = data_path / IMAGES_PATH
for filename in os.listdir(str(images_path)):
generate_samples(str(images_path / filename), data_path)
split_sets(data_path)
generate_dirty(data_path)