-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvolcano_plot.py
113 lines (65 loc) · 3.5 KB
/
volcano_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#A script to plot volcano plots
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from adjustText import adjust_text
import random
#read the csv file
deseq_results = pd.read_csv('ctnnb_TPMdeseq_analysis.csv').dropna()
deseq_results['nlog10'] = -np.log10(deseq_results.padj)
#Determine the upregulated and downregulated genes based on a specified threshold for log2 fold change and p-adj
upregulated_genes = deseq_results[(deseq_results['log2FoldChange'] > 0.5) & (deseq_results['padj'] < 0.05)].symbol.tolist()
upcount = len(upregulated_genes)
#print("Number of upregulated genes", upcount)
#not_diff = deseq_results[deseq_results['padj'] > 0.05]
downregulated_genes = deseq_results[(deseq_results['log2FoldChange'] < -0.5) & (deseq_results['padj'] < 0.05)].symbol.tolist()
downcount = len(downregulated_genes)
#print("Number of downregulated genes", downcount)
notdiff_genes = deseq_results[((deseq_results['log2FoldChange'] < 0.5) & (deseq_results['log2FoldChange'] > -0.5) & (deseq_results['padj'] < 0.05))|(deseq_results['padj'] > 0.05)].symbol.tolist()
notdiff = len(notdiff_genes)
#print(notdiff)
#print("Number of not differentially expressed genes", notdiff)
#print(upregulated_genes)
def map_color(a):
log2FoldChange, symbol, padj, nlog10 = a
if symbol in notdiff_genes:
return 'Not_diff_expr'+' '+str(notdiff)
elif symbol in upregulated_genes:
return 'upregulated'+' '+str(upcount)
elif symbol in downregulated_genes:
return 'downregulated'+' '+str(downcount)
deseq_results['key'] = deseq_results[['log2FoldChange', 'symbol', 'padj', 'nlog10']].apply(map_color, axis = 1)
#print(deseq_results)
#make the plot
plt.figure(figsize = (6,6))
ax = sns.scatterplot(data = deseq_results, x = 'log2FoldChange', y = 'nlog10',
hue = 'key', hue_order = [ 'upregulated'+' '+str(upcount),'downregulated'+' '+str(downcount), 'Not_diff_expr'+' '+str(notdiff)],
palette = ['red','green', 'grey'])
ax.axhline(1, zorder = 0, c = 'k', lw = 1, ls = '--')
ax.axvline(-1, zorder = 0, c = 'k', lw = 1, ls = '--')
ax.axvline(1, zorder = 0, c = 'k', lw = 1, ls = '--')
#mark selected genes
mark_list=['Cdkn2A','Bst2','Grem1','Bst2','Foxj1','Gabrb3','Cers1']
texts = []
texts = [plt.text(row['log2FoldChange'], row['nlog10'], row['symbol'], fontsize = 12, weight = 'bold') for index, row in deseq_results.iterrows() if row['symbol'] in mark_list]
# Adjust the text
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='k'))
#for index,row in deseq_results.iterrows():
# if row['symbol'] in mark_list:
# plt.annotate(row['symbol'],(row['log2FoldChange'],row['nlog10']), textcoords="offset points", xytext=(5, 5),
# ha='left', bbox=dict(boxstyle='round,pad=0.5', fc='white', alpha=0.5))
plt.legend(loc = 1, bbox_to_anchor = (1.5,1), frameon = True, prop = {'weight':'bold'})
for axis in ['bottom', 'left']:
ax.spines[axis].set_linewidth(2)
for axis in ['top', 'right']:
ax.spines[axis].set_linewidth(2)
#ax.spines['top'].set_visible(True)
#ax.spines['right'].set_visible(False)
ax.tick_params(width = 2)
plt.xticks(size = 12, weight = 'bold')
plt.yticks(size = 12, weight = 'bold')
plt.xlabel("$log_{2}$ fold change", size = 15)
plt.ylabel("-$log_{10}$ FDR", size = 15)
plt.savefig('volcano_ctnnb_labeled_TPM.png', dpi = 300, bbox_inches = 'tight', facecolor = 'white')
plt.show()