-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepare_data.py
118 lines (101 loc) · 4.99 KB
/
prepare_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import argparse
import os
import shutil
import csv
from PIL import Image
import numpy as np
from tqdm import tqdm
def _mkdirs_if_not_exists(path):
if not os.path.exists(path):
os.makedirs(path)
parser = argparse.ArgumentParser(description='Data preparation')
parser.add_argument('--split-type',default='1cls', type=str,help='1cls, 2cls_highshot, 2cls_fewshot')
parser.add_argument('--data-folder',default='./VisA/', type=str,help='the path to downloaded VisA dataset')
parser.add_argument('--save-folder',default='./VisA_pytorch/', type=str,help='the target path to save the reorganized VisA dataset facilitating data loading in pytorch')
parser.add_argument('--split-file',default='./split_csv/1cls.csv', type=str,help='the csv file to split downloaded VisA dataset')
config = parser.parse_args()
split_type = config.split_type
split_file = config.split_file
data_folder = config.data_folder
save_folder = os.path.join(config.save_folder, split_type)
data_list = [ 'pcb1', 'pcb2', 'pcb3', 'pcb4', 'pcb5', 'pcb6', 'pcb7']
if split_type == '1cls':
for data in (data_list):
train_folder = os.path.join(save_folder, data, 'train')
test_folder = os.path.join(save_folder, data, 'test')
mask_folder = os.path.join(save_folder, data, 'ground_truth')
train_img_good_folder = os.path.join(train_folder, 'good')
test_img_good_folder = os.path.join(test_folder, 'good')
test_img_bad_folder = os.path.join(test_folder, 'bad')
test_mask_bad_folder = os.path.join(mask_folder, 'bad')
_mkdirs_if_not_exists(train_img_good_folder)
_mkdirs_if_not_exists(test_img_good_folder)
_mkdirs_if_not_exists(test_img_bad_folder)
_mkdirs_if_not_exists(test_mask_bad_folder)
with open(split_file, 'r') as file:
csvreader = csv.reader(file)
header = next(csvreader)
for row in csvreader:
object, set, label, image_path, mask_path = row
if label == 'normal':
label = 'good'
else:
label = 'bad'
image_name = image_path.split('/')[-1]
mask_name = mask_path.split('/')[-1]
img_src_path = os.path.join(data_folder, image_path)
msk_src_path = os.path.join(data_folder, mask_path)
img_dst_path = os.path.join(save_folder, object, set, label, image_name)
msk_dst_path = os.path.join(save_folder, object, 'ground_truth', label, mask_name)
shutil.copyfile(img_src_path, img_dst_path)
if set == 'test' and label == 'bad':
mask = Image.open(msk_src_path)
# binarize mask
mask_array = np.array(mask)
mask_array[mask_array != 0] = 255
mask = Image.fromarray(mask_array)
mask.save(msk_dst_path)
else:
for data in data_list:
train_folder = os.path.join(save_folder, data, 'train')
test_folder = os.path.join(save_folder, data, 'test')
mask_folder = os.path.join(save_folder, data, 'ground_truth')
train_mask_folder = os.path.join(mask_folder, 'train')
test_mask_folder = os.path.join(mask_folder, 'test')
train_img_good_folder = os.path.join(train_folder, 'good')
train_img_bad_folder = os.path.join(train_folder, 'bad')
test_img_good_folder = os.path.join(test_folder, 'good')
test_img_bad_folder = os.path.join(test_folder, 'bad')
train_mask_bad_folder = os.path.join(train_mask_folder, 'bad')
test_mask_bad_folder = os.path.join(test_mask_folder, 'bad')
_mkdirs_if_not_exists(train_img_good_folder)
_mkdirs_if_not_exists(train_img_bad_folder)
_mkdirs_if_not_exists(test_img_good_folder)
_mkdirs_if_not_exists(test_img_bad_folder)
_mkdirs_if_not_exists(train_mask_bad_folder)
_mkdirs_if_not_exists(test_mask_bad_folder)
with open(split_file, 'r') as file:
csvreader = csv.reader(file)
header = next(csvreader)
for row in csvreader:
object, set, label, image_path, mask_path = row
if label == 'normal':
label = 'good'
else:
label = 'bad'
image_name = image_path.split('/')[-1]
mask_name = mask_path.split('/')[-1]
img_src_path = os.path.join(data_folder, image_path)
msk_src_path = os.path.join(data_folder, mask_path)
img_dst_path = os.path.join(save_folder, object, set, label, image_name)
msk_dst_path = os.path.join(save_folder, object, 'ground_truth', set, label, mask_name)
shutil.copyfile(img_src_path, img_dst_path)
if label == 'bad':
mask = Image.open(msk_src_path)
# binarize mask
mask_array = np.array(mask)
mask_array[mask_array != 0] = 255
mask = Image.fromarray(mask_array)
mask.save(msk_dst_path)