-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrain_test_split.py
27 lines (26 loc) · 1.03 KB
/
train_test_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from sklearn.model_selection import train_test_split
import pandas as pd
import os
if __name__ == '__main__':
DATA_DIR = '/data/personal_folders/skolchenko/panda'
IMAGE_DIR = '/data/personal_folders/skolchenko/panda/train_images/'
MASK_DIR = '/data/personal_folders/skolchenko/panda/train_label_masks//'
TRAIN_LABELS = '/data/personal_folders/skolchenko/panda/train.csv'
train_labes = pd.read_csv(TRAIN_LABELS)
has_slide = []
for image_id in train_labes.image_id.values:
has_slide.append(os.path.isfile(os.path.join(
MASK_DIR,
f'{image_id}_mask.tiff')))
train_labes['has_slide'] = has_slide
train_labes = train_labes.loc[train_labes['has_slide'], :]
data_train, data_val = train_test_split(
train_labes,
test_size=0.25,
random_state=42)
data_train.to_csv(
'/data/personal_folders/skolchenko/panda/data_train.csv',
index=False)
data_val.to_csv(
'/data/personal_folders/skolchenko/panda/data_val.csv',
index=False)