From b3c92fbb6a0511d507e47a472a2c5c23c89ae69d Mon Sep 17 00:00:00 2001 From: jsidhom1 Date: Tue, 8 Dec 2020 15:55:09 -0500 Subject: [PATCH] feature_updates --- .../supervised/HIV_analysis/reference.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 ancillary_analysis/supervised/HIV_analysis/reference.py diff --git a/ancillary_analysis/supervised/HIV_analysis/reference.py b/ancillary_analysis/supervised/HIV_analysis/reference.py new file mode 100644 index 0000000..7b68151 --- /dev/null +++ b/ancillary_analysis/supervised/HIV_analysis/reference.py @@ -0,0 +1,44 @@ +import pickle +import pandas as pd +import numpy as np +import seaborn as sns +import matplotlib.pyplot as plt +from scipy.stats import fisher_exact + +with open('screen.pkl','rb') as f: + [df_auc,sequences,predicted] = pickle.load(f) + +pw = pd.read_csv('hiv_ref_pw.csv') +preds = [] +for row in pw.iterrows(): + tcr = row[1]['TCR'] + pep = row[1]['Peptide'] + u = np.where(df_auc['epitope']==pep)[0][0] + preds.append(np.mean(predicted[u][np.where(np.isin(sequences[u],tcr))[0]])) + +pw['preds'] = preds +pw['preds'] = pw['preds'].round(3) +pw.to_csv('val_preds.csv',index=False) + +all_pred = np.hstack(predicted) +df_plot = pd.DataFrame() +df_plot['preds'] = np.hstack([preds,all_pred]) +df_plot['label'] = np.hstack([['validated+']*len(preds),['background']*len(all_pred)]) +sns.violinplot(data=df_plot[df_plot['label']=='background'],x='label',y='preds',cut=0) +sns.swarmplot(data=df_plot[df_plot['label']=='validated+'],x='label',y='preds',color='red',size=8,alpha=0.75) +plt.xlabel('') +plt.ylabel('Prediction Value',fontsize=16) +plt.xticks([]) +plt.yticks() +plt.show() +plt.savefig('val_dist.png') + +thresh = 0.95 +x = np.array([[np.sum(all_pred < thresh)-1,np.sum(all_pred >= thresh)-17], + [1,17]]) + +_,p_val = fisher_exact(x) +enrichment = (x[1,1]/np.sum(x[:,1]))/(np.sum(x[1,:])/np.sum(x)) + + +