-
Notifications
You must be signed in to change notification settings - Fork 1
/
localize_with_slicenum.py
263 lines (195 loc) · 9.9 KB
/
localize_with_slicenum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 15 16:31:45 2020
@author: Yesh
"""
import os
#os.chdir('/Users/Yesh/Documents/BDRAD/chest_ct_projects/lung_nodule_slice_localizer')
import numpy as np
import pandas as pd
import time
import argparse
from stl import mesh
import sys
sys.path.append('../pytorch-retinanet')
from dataloader_NLST import NLSTDataset, Normalizer, Resizer
from torchvision import transforms
np.random.seed(42)
def get_inferences(datadir):
data = pd.DataFrame()
csvs = [f for f in os.listdir(datadir) if f.endswith('.csv')]
for csv in csvs:
d = pd.read_csv(os.path.join(datadir,csv))
data = data.append(d, ignore_index=True)
return data
def createMeshCube(corner, dx, dy, dz):
"""
Returns a mesh cube object with the above diameters for x, y, z axis with respect to an axial slice orientation
- corner is the posterior, inferior, left corner - I think
"""
# vertices of square
vertices = np.array([
corner,
corner + np.array([dx, 0, 0]),
corner + np.array([dx, dy, 0]),
corner + np.array([ 0, dy, 0]),
corner + np.array([ 0, 0, dz]),
corner + np.array([dx, 0, dz]),
corner + np.array([dx, dy, dz]),
corner + np.array([ 0, dy, dz]),])
# Define the 12 triangles composing the cube
faces = np.array([
[0,3,1],
[1,3,2],
[0,4,7],
[0,7,3],
[4,5,6],
[4,6,7],
[5,1,2],
[5,2,6],
[2,3,6],
[3,7,6],
[0,1,5],
[0,5,4]])
# Create the mesh
cube = mesh.Mesh(np.zeros(faces.shape[0], dtype=mesh.Mesh.dtype))
for i, f in enumerate(faces):
for j in range(3):
cube.vectors[i][j] = vertices[f[j],:]
return cube
def main(args=None):
parser = argparse.ArgumentParser(description='Identify nodule using axial slice number')
parser.add_argument('--inferences_dir', help='Path to Directory containing NLST raw inferences', default='../data/NLST_Massive_Annotation_No_Masking_Results_2020_06_07_CoronalAxial')
parser.add_argument('--nodules_filepath', help='Path to file containing NLST nodule candidates', default='../data/NLST_Clustered_Massive_Annotation_No_Masking_Results_2020_06_07_CoronalAxial_fp_reducer/nodules.csv')
parser.add_argument('--nlst_dir', help='Directory of NLST data', default='/Volumes/Yesh2020/nlst-ct')
parser.add_argument('--nlst_anns', help='Path to NLST annotations', default='../data/nlst_annotations/all_14_datasets/cdas_delivery_csv/sctabn.csv')
parser = parser.parse_args()
# reads annotations
anns = pd.read_csv(parser.nlst_anns)
anns['pidyr'] = anns['pid'].astype(str) + '_' + anns['STUDY_YR'].astype(str)
anns = anns.dropna(subset=['sct_slice_num'])
anns = anns[anns['pid'] > 100380] # pids before this were used with fp_reducer and must be excluded
# get predicted nodules
nodules = pd.read_csv(parser.nodules_filepath)
nodules['pidyr'] = nodules['pid'].astype(str) + '_' + nodules['study_yr'].astype(str)
nodules = nodules.dropna(subset=['confidence_max'])
# keep only anns with pidyrs that we've clustered (including nodules with no clusters)
anns = anns[anns['pidyr'].isin(nodules['pidyr'])]
# get test set
saved_test_pids = 'anns_test_pids.csv'
if os.path.exists(saved_test_pids):
anns = pd.read_csv(saved_test_pids)
else:
# randomize by pid & only select 1 year per pid
test_pidyrs = anns.sample(3).groupby('pid').first()['pidyr'].values
anns = anns[anns['pidyr'].isin(test_pidyrs)]
assert(os.path.exists(saved_test_pids) == False)
anns.to_csv(saved_test_pids, index=False)
# get raw inferences
inferences = get_inferences(parser.inferences_dir)
inferences['pidyr'] = inferences['pid'].astype(str) + '_' + inferences['study_yr'].astype(str)
# get axial conversion value
inference_grp = inferences.groupby(['pidyr', 'axial_conversion']).size().reset_index().rename(columns={0:'count'})
del inference_grp['count']
# merge to axial_conversion information
nodules = nodules.merge(inference_grp, on='pidyr')
nlst = NLSTDataset(fp_anns=anns,
data_dir=parser.nlst_dir,
transform=transforms.Compose([Normalizer(), Resizer()]))
df_final_nodules = pd.DataFrame()
for pidyr in np.unique(anns['pidyr']):
print(pidyr)
ann = anns[anns['pidyr'] == pidyr]
nodule_cands = nodules[nodules['pidyr'] == pidyr]
if len(nodule_cands) == 0:
df_final_nodules = df_final_nodules.append(pd.DataFrame({'pidyr': ann['pidyr'],
'gt_slice_num': ann['sct_slice_num'],
'gt_abn_num': ann['SCT_AB_NUM']
}), sort=False)
continue
pid = ann['pid'].values[0]
study_yr = ann['STUDY_YR'].values[0]
kernel = nodule_cands['kernel'].values[0]
res = nlst.load_NLST_images(pid, study_yr, load_dcms=True)
dcms = res[kernel]['dcms']
slice_nums = np.array([int(dcm.InstanceNumber) for dcm in dcms])
axial_positions = np.array([float(dcm.ImagePositionPatient[2]) for dcm in dcms])
used_nodules_clusters = []
cubes = []
df_pidyr_nodules = pd.DataFrame()
for idx, row in ann.iterrows():
# drop already identified nodules in this pidyr
nods = nodule_cands[~nodule_cands['cluster'].isin(used_nodules_clusters)]
# get axial world position for GT slice
slice_num_gt = int(row['sct_slice_num'])
gt_abn_num = row['SCT_AB_NUM']
try:
axial_gt = axial_positions[np.where(slice_nums == slice_num_gt)[0]][0]
except IndexError as e:
print(e)
df_pidyr_nodules = df_pidyr_nodules.append(pd.DataFrame({'pidyr': [pidyr],
'gt_slice_num': [slice_num_gt],
'gt_axial_world_coord': [np.nan],
'comment': ['IndexError when trying to get gt_axial_world_coord. Check sct_slice_num manually'],
'gt_abn_num': gt_abn_num,
}), sort=False)
continue
gt_location = None
if row['SCT_EPI_LOC'] == 1:
gt_location = 'Right Upper Lobe'
elif row['SCT_EPI_LOC'] == 2:
gt_location = 'Right Middle Lobe'
elif row['SCT_EPI_LOC'] == 3:
gt_location = 'Right Lower Lobe'
elif row['SCT_EPI_LOC'] == 4:
gt_location = 'Left Upper Lobe'
elif row['SCT_EPI_LOC'] == 5:
gt_location = 'Lingula'
elif row['SCT_EPI_LOC'] == 6:
gt_location = 'Left Lower Lobe'
# get the closest nodules
nods.loc[:,'dist_to_label'] = np.abs(nods['axial_world_coord_mean'] - axial_gt)
# nodules must be within 20mm
nods = nods[nods['dist_to_label'] <= 20]
# get X highest proba nodule to the axial position, change 1 to N to get N largest
nods = nods[nods['dist_to_label'].isin(nods['dist_to_label'].nsmallest(1))]
nods = nods.sort_values(by='dist_to_label')
if len(nods) == 0:
df_pidyr_nodules = df_pidyr_nodules.append(pd.DataFrame({'pidyr': [pidyr],
'gt_slice_num': [slice_num_gt],
'gt_axial_world_coord': [axial_gt],
'gt_location': [gt_location],
'gt_abn_num': gt_abn_num,
}), sort=False)
continue
nods['gt_slice_num'] = slice_num_gt
nods['gt_axial_world_coord'] = axial_gt
nods['gt_location'] = gt_location
nods['gt_abn_num'] = gt_abn_num
df_pidyr_nodules = df_pidyr_nodules.append(nods, sort=False)
used_nodules_clusters += [int(cluster) for cluster in list(nods['cluster'])]
# create mesh cubes for visualization on slicer - loop b/c if we want to predict "top 3" then we need to loop
for n, nod in nods.iterrows():
dx, dy, dz = nod['diameter_max'], nod['diameter_max'], nod['diameter_max']
corner = np.array([0,0,0])
corner[0] = nod['saggital_world_coord_mean'] - dx/2
corner[1] = nod['coronal_world_coord_mean'] - dy/2
corner[2] = nod['axial_world_coord_mean'] - dz/2
cube = createMeshCube(corner, dx, dy, dz)
cubes.append(cube)
# save cube to directory, grouped into folders with same name as the csv
cube_dir = 'Nodule_Masks'
if not os.path.exists(cube_dir):
os.mkdir(cube_dir)
if len(cubes) != 0:
cubes = mesh.Mesh(np.concatenate([c.data for c in cubes]))
cubes.save(os.path.join(cube_dir, '{}_{}_{}.stl'.format(pid, study_yr, kernel)))
# add to final df and used nodules
df_final_nodules = df_final_nodules.append(df_pidyr_nodules, sort=False)
df_final_nodules.to_csv(os.path.join(cube_dir, 'nodules_test_set.csv'), index=False)
if __name__ == '__main__':
time_total0 = time.time()
main()
print('-'*30)
print('Total Time: {} min'.format(round((time.time()-time_total0)/60, 2)))