-
Notifications
You must be signed in to change notification settings - Fork 0
/
aggregate_prs_statistics_cross_generic.py
65 lines (50 loc) · 2.78 KB
/
aggregate_prs_statistics_cross_generic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
import pandas as pd
import constants
import utils
def aggregate_statistics_cross(prs_names, imps, method, hyperparameters, cv_folds, rep, suffix):
df_statistics_all=pd.DataFrame()
df_statistics_test=pd.DataFrame()
df_or_all=pd.DataFrame()
df_or_test=pd.DataFrame()
cv_ids=[f"{a}_{cv_folds}_validation" for a in range(1,cv_folds+1)]
for prs_name in prs_names:
for imp in imps:
path = os.path.join(constants.PRSS_PATH, prs_name, imp, f"rep_{rep}")
for hp in hyperparameters:
fl_name=os.path.join(path,f"prs.cv.{method}___{cv_folds}_test.statistics.{hp}.tsv")
if os.path.exists(fl_name):
df=pd.read_csv(fl_name, sep='\t')
df['imp']=imp
df['prs_name']=prs_name
df_statistics_test=pd.concat([df_statistics_test, utils.fix_table(df)])
for cv_suffix in cv_ids:
fl_name=os.path.join(path,f"prs.cv.{method}___{cv_suffix}.statistics.{hp}.tsv")
if os.path.exists(fl_name):
df=pd.read_csv(fl_name, sep='\t')
df['imp']=imp
df['prs_name']=prs_name
df_statistics_all=pd.concat([df_statistics_all, utils.fix_table(df)])
for prs_name in prs_names:
for imp in imps:
path = os.path.join(constants.PRSS_PATH, prs_name, imp, f"rep_{rep}")
for hp in hyperparameters:
fl_name = os.path.join(path, f"prs.cv.{method}___{cv_folds}_test.or.summary.{hp}.tsv")
if os.path.exists(fl_name):
df = pd.read_csv(fl_name, sep='\t')
df['imp'] = imp
df['prs_name']=prs_name
df_or_test = pd.concat([df_or_test, utils.fix_table(df)])
for cv_suffix in cv_ids:
fl_name = os.path.join(path, f"prs.cv.{method}___{cv_suffix}.or.summary.{hp}.tsv")
if os.path.exists(fl_name):
df = pd.read_csv(fl_name, sep='\t')
df['imp'] = imp
df['prs_name']=prs_name
df_or_all = pd.concat([df_or_all, utils.fix_table(df)])
df_statistics_all.to_csv(os.path.join(constants.OUTPUT_PATH, f"prs.cv.{method}.statistics_summary_{suffix}.tsv"), sep='\t')
df_or_all.to_csv(os.path.join(constants.OUTPUT_PATH, f"prs.cv.{method}.or_summary_{suffix}.tsv"), sep='\t')
df_statistics_test.to_csv(os.path.join(constants.OUTPUT_PATH, f"prs.cv.{method}.test.statistics_summary_{suffix}.tsv"), sep='\t')
df_or_test.to_csv(os.path.join(constants.OUTPUT_PATH, f"prs.cv.{method}.test.or_summary_{suffix}.tsv"), sep='\t')
print(df_statistics_all)
print(df_or_all)