-
Notifications
You must be signed in to change notification settings - Fork 2
/
connectivity_matrix.py
92 lines (79 loc) · 3.93 KB
/
connectivity_matrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import argparse
import sys
import numpy as np
import matplotlib.pyplot as plt
import os
#build a connectivity matrix for each run
def buildMatrix(matrixFile):
inputH=[]
with open(matrixFile) as input:
for line in input:
line=line.rstrip('\n')
line=line.split('\t')
converted=[float(x) for x in line]
inputH.append(converted)
inputH=np.array(inputH)
sampleNum=len(inputH[0])
connectivityMat=np.zeros((sampleNum, sampleNum), dtype=np.int)
matricesForConsensus.write('\n'+str(matrixPath)+'final_connectivity_matrix'+str(args.matrixFile.split("/")[-1]))
#fills in connectivity matrix
for connectivityX in range(0, sampleNum):
for connectivityY in range(0, sampleNum):
#return cluster location of the maximum metagene factor for sample x vs sample y
locationOfMax1=list(inputH[:,connectivityX]).index(max(list(inputH[:,connectivityX])))
locationOfMax2=list(inputH[:,connectivityY]).index(max(list(inputH[:,connectivityY])))
#if the maximum metagene factor for sample x and sample y are in the same cluster location, they are connected
if locationOfMax1==locationOfMax2:
connectivityMat[connectivityX, connectivityY]=1
#outputs connectivity matrix in tab-delimited format
outputMatrix=open(str(matrixPath)+'final_connectivity_matrix'+str(args.matrixFile.split("/")[-1]), 'w')
for i in range(0, sampleNum):
for n in range(0, sampleNum-1):
outputMatrix.write(str(connectivityMat[i][n])+'\t')
outputMatrix.write(str(connectivityMat[i][sampleNum-1])+'\n')
return connectivityMat
def visualize_connectivity(connectivityMat, sampleName):
if sampleName=='noXLabels':
fig1, ax1=plt.subplots()
heatmap=ax1.pcolor(connectivityMat, cmap=plt.cm.jet)
cbar = plt.colorbar(heatmap)
plt.ylabel('sample ID')
plt.xlabel('sample ID')
plt.savefig(str(visPath)+'final_connectivity_matrix'+str(args.matrixFile.split("/")[-1])+'.png')
#if column names were provided by user, heatmap is labeled
else:
colNames=[]
with open(sampleName) as input:
for line in input:
colNames.append(line.rstrip('\n'))
fig1, ax1=plt.subplots()
heatmap=ax1.pcolor(connectivityMat, cmap=plt.cm.jet)
cbar = plt.colorbar(heatmap)
ax1.set_yticks(np.arange(connectivityMat.shape[1])+0.5, minor=False)
ax1.set_yticklabels(colNames, minor=False)
ax1.set_xticks(np.arange(connectivityMat.shape[1])+0.5, minor=False)
ax1.set_xticklabels(colNames, minor=False)
plt.ylabel('sample ID')
plt.xlabel('sample ID')
plt.savefig(str(visPath)+'final_connectivity_matrix'+str(args.matrixFile.split("/")[-1])+'.png')
if __name__=='__main__':
parser=argparse.ArgumentParser("parses information to build connectivity matrix")
parser.add_argument('-input', required=True, dest='matrixFile', help='Full path to tab-delimited "H matrix" file')
parser.add_argument('--colNames', default='noXLabels', dest='colNames', type=str, help='full path to file of sample names in order of matrix, one name per line')
parser.add_argument('--output', default=os.getcwd(), dest='outPath', type=str, help='full path to output directory')
args=parser.parse_args()
# path to output directories
visPath=str(args.outPath)+'connectivity_visualization/'
matrixPath=str(args.outPath)+'connectivity_matrix/'
#check if output directory is already made, if not creates it
if os.path.isdir(visPath) == False:
os.mkdir(visPath)
if os.path.isdir(matrixPath) == False:
os.mkdir(matrixPath)
#creates a list of paths to connectivity matrices for consensus matrix creation; used for run_NMF.sh
matricesForConsensus=open(str(matrixPath)+'paths_to_connectivity_matrices_to_analyze.txt', 'a')
if os.stat(str(matrixPath)+'paths_to_connectivity_matrices_to_analyze.txt').st_size == 0:
matricesForConsensus.write('Connectivity matrices contributing to consensus matrix')
#input is a file of predicted matrix H (kxm, k=clusters/metagene expression, m=samples)
connectivityMat=buildMatrix(matrixFile=args.matrixFile);
visualize_connectivity(connectivityMat=connectivityMat, sampleName=args.colNames);