-
Notifications
You must be signed in to change notification settings - Fork 2
/
prepare_merged_csv.py
executable file
·142 lines (122 loc) · 4.14 KB
/
prepare_merged_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python2
import os, sys
import operator as op
import numpy as np
import pandas as pd
version = 'R3v5'
output_dir = 'CSV_files'
filename_pattern = 'statcsv_ossvm_{}_{}'
datatype = 'normal'
suffixes_dir_normal = [
'normal',
'normal_max6',
'normal_max9',
'onlyCIFAR10',
'onlyImageNet',
'onlyMNIST',
]
suffixes_dir = suffixes_dir_normal
classifier_map = {
'mcevm_ovx_gsec': 'evmC',
'mcevm_ovx_gseo': 'evmO',
'mcocsvm_ova_gsic': 'ocsvmC',
'mcocsvm_ova_gsio': 'ocsvmO',
'mcossvm_ova_gsic': 'ssvmC',
'mcossvm_ova_gsio': 'ssvmO',
'mcsvdd_ova_gsic': 'svddC',
'mcsvdd_ova_gsio': 'svddO',
'mcsvm_ova_gsic_highGamma_fixedC': 'svmC',
'mcsvm_ova_gsio_highGamma_fixedC': 'svmO',
'mcsvmdbc_ova_gsic': 'svmdbcC',
'mcsvmdbc_ova_gsio': 'svmdbcO',
'pisvm_ovx_gsec': 'pisvmC',
'pisvm_ovx_gseo': 'pisvmO',
'svm1vsll_ovx_gsec': 'onevsetC',
'svm1vsll_ovx_gseo': 'onevsetO',
'wsvm_ovx_gsec': 'wsvmC',
'wsvm_ovx_gseo': 'wsvmO',
}
measure_map = {
'na': 'NA',
'harmonicNA': 'HNA',
'aks': 'AKS',
'aus': 'AUS',
'bbmafm': 'OSFMM',
'bbmifm': 'OSFMm',
'mafm': 'FMM',
'mifm': 'FMm',
}
dataset_map = {
'yeast_pmlb': 'YEAST',
'mfeat-zernike_pmlb': 'ZERNIKE',
'mfeat-fourier_pmlb': 'FOURIER',
'optdigits_pmlb': 'OPTDIGITS',
'movement_libras_pmlb': 'MOVEMENT',
'led7_pmlb': 'LED7',
'led24_pmlb': 'LED24',
'caltech256_bow_dense_hard_average_1000': 'CALTECH256',
'vowel_pmlb': 'VOWEL',
'mfeat-morphological_pmlb': 'MFEAT',
'15scenes_bow_soft_max_1000': '15SCENES',
'ImageNet_googlenet_openset_images_07102017': 'IMAGENET',
'auslan': 'AUSLAN',
'pendigits_pmlb': 'PENDIGITS',
'kddcup_limited': 'KDDCUP',
'krkopt_pmlb': 'KRKOPT',
'aloi_bic': 'ALOI',
'letter': 'LETTER',
'cifar10_local4_train_eval': 'CIFAR10',
'mfeat-factors_pmlb': 'FACTORS',
'mfeat-karhunen_pmlb': 'KARHUNEN',
'mnist_h_fc1_train': 'MNIST',
}
suffix_file = '___0___0___0.csv'
def get_dir(suffix_dir):
dirname = filename_pattern.format(version, suffix_dir)
files = os.listdir(dirname)
files = filter(lambda filename: filename.endswith(suffix_file), files)
files = [os.path.join(dirname, filename) for filename in files]
return files
files = reduce(op.concat, map(get_dir, suffixes_dir))
def get_measure(filename):
return measure_map[filename.split('/')[1].split('___')[1]]
measures = set([get_measure(filename) for filename in files])
def filter_measure(files, measure):
filtered = [filename for filename in files if get_measure(filename) == measure]
return filtered
def concat_csvs(files):
def readcsv(filename):
# with open(filename) as fd:
# return fd.readlines()
csv = pd.read_csv(filename)
return csv
# lst_lines = map(readcsv, files)
lst_lines = pd.concat(map(readcsv, files))
# print lst_lines.shape
return lst_lines
def select_results(df, gs='O'):
# df: Pandas dataframe
assert gs in ['C', 'O', '']
def is_with_openset_grid_search(classifiername):
return classifiername.endswith(gs)
return df[map(is_with_openset_grid_search, df.classifier)]
def mean_experiments(df):
newdf = df.groupby(('lenacs', 'dataset', 'classifier')).agg({'experiment':'size', 'result':'mean'})
return newdf.reset_index()
# for gs in ['C', 'O', '']:
for gs in ['']:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for measure in measures:
print 'Generating CSV for {} measure...'.format(measure)
filtered = filter_measure(files, measure)
df = concat_csvs(filtered)
df.classifier = [classifier_map[classifier] for classifier in df.classifier]
df.dataset = [dataset_map[dataset] for dataset in df.dataset]
df = select_results(df, gs)
df = df.sort_values(['classifier', 'dataset', 'lenacs', 'experiment'])
# df = mean_experiments(df)
# df = df.drop('experiment', axis=1)
output = os.path.join(output_dir, '{}_{}{}.csv'.format(datatype, measure, '_{}'.format(gs) if gs else gs))
with open(output, 'w') as fd:
print >> fd, df.to_csv(index=False),