-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter_classes.py
47 lines (31 loc) · 1.19 KB
/
filter_classes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# coding=utf-8
# Toma un directorio con training sets y un directorio objetivo
# Agrupo todas las clases que no son ceph o rrlyr en un solo grupo
# -----------------------------------------------------------------------------
import os
import sys
import argparse
import pandas as pd
def filter_class(lc_class):
cepheids = ['CEPH', 'Ceph_10', 'Ceph_10_20', 'Ceph_F', 'T2CEPH', 'cep',
't2cep']
rrlyr = ['RRL', 'rrlyr']
if lc_class in cepheids:
return 'ceph'
elif lc_class in rrlyr:
return 'rrlyr'
else:
return 'other'
if __name__ == '__main__':
print ' '.join(sys.argv)
parser = argparse.ArgumentParser()
parser.add_argument('--datasets_dir', required=True, type=str)
parser.add_argument('--result_dir', required=True, type=str)
args = parser.parse_args(sys.argv[1:])
datasets_dir = args.datasets_dir
result_dir = args.result_dir
files = [f for f in os.listdir(datasets_dir) if os.path.isfile(os.path.join(datasets_dir, f))]
for f in files:
df = pd.read_csv(os.path.join(datasets_dir, f), index_col=0)
df['class'] = df['class'].apply(filter_class)
df.to_csv(result_dir + f)