forked from kdharris101/iss
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathiss.m
664 lines (485 loc) · 24.8 KB
/
iss.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
%% iss: code for processing of in situ sequencing
% Kenneth D. Harris and Xiaoyan Qian
% GPL 3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
%
% to use:
% o = iss; % create structure, default parameters
% % change any parameters you want, and set o.FileN
% o = o.extract_and_filter; % create top-hat filtered tiffs for each tile
% o = o.find_spots; % find spot positions in global coordinates
% o = o.call_spots; % allocate each spot to a gene
% o = o.call_cells; % identify cells
%
% this current file iss.m just contains default parameter values
classdef iss
properties
%% interactive graphics mode. 1 means some, 2 means a lot.
Graphics = 1;
% region to show during cell calling
CellCallShowCenter = [385 1100];
CellCallShowRad = 200;
% example cell to diagnose calling, plus class pair to compare (two
% strings)
ExampleCellCenter = [1670 250];
CellCallDiagnosisPair = [];
%% parameters: file locations
% where the input czi files are kept
InputDirectory;
% where the top-hat image files are kept (one for each tile and round, all channels)
TileDirectory;
% where .mat file outputs and intermediates are kept
OutputDirectory;
% code file for spot calling
CodeFile;
%% parameters: stuff that might vary between experiments
% one more round after the main rounds for Anchor
nExtraRounds = 1;
% For single-gene rounds, a n by 3 structure array of {GeneName, round, channel, threshold}
ExtraCodes = {'Sst', 6, 3, 500; 'Npy', 6, 5, 400};
% BasePair labels
bpLabels = {'A', 'C', 'G', 'T'};
%% parameters: extract_and_filter
% To extract spots, a filter is used with inner radius ExtractR1 within
% which it is positive and outer radius ExtractR2 so the annulus between
% R1 and R2 is negative. Overall sums up to 0. R1 should be the
% approximate radius of spot. R2 should be such that between
% R1 and R2 there is a dark region.
ExtractR1 = 3;
ExtractR2 = 6;
% Each filtered image is multiplied by ExtractScale. This is
% because the image is saved as uint16 so to gain information from
% the decimal points, should multiply image so max pixel number is
% in the 10,000s (less than 65,536). If 'auto', it sets to
% 10,000/max(Tile 1 round 1 colour channel 1).
ExtractScale = 2;
%ExtractScale = 5*10^7;
% TilePixelValueShift is added onto every tile (except DAPI) when it is saved and
% removed from every tile when loaded so we can have negative pixel
% values. Saves as uint16.
TilePixelValueShift = 15000;
%Tophat filtering is done to each tile in the Dapi channel
%with a filter of radius DapiR. If set to 'auto', DapiR =
%round(8/pixelsize). R should be about radius of object of
%interest.
DapiR = 'auto';
%% parameters: registration and alignment
% correlation threshold for image alignment. Can be low since
% chance correls are miniscule. Second value is for shifts of 0
% which can occur spuriously due to hot pixels so need a stronger
% threshold
RegCorrThresh = [.3 .6];
% smoothing before registration (size of disk filter)
RegSmooth = 1;
% minimum size overlap for tile matching (pixels - so side squared)
RegMinSize = 200^2;
% distance scale for point cloud registration (pixels)
PcDist = 3;
%PcIter is the max number of iterations done by the PCR algorithm.
PcIter = 30;
% number of point cloud matches needed to count an overlap
MinPCMatches = 50;
% ToPlot: [r,c,t], plot of round r, colour channel c, tile t
% is shown for debugging
ToPlot;
% InitialShiftChannel is the chosen channel to use to register all
% rounds to the anchor. Choose the one with clear spots, recommend 5,6 or 7.
InitialShiftChannel = 4;
% MaxRoundShift is the maximum absolute shift in either direction
% of a tile in any round relative to the anchor round.
MaxRoundShift = 500;
% Registration is forced to find an overlap smaller than MaxOverlapFract*TileSz
% between neighbouring Tiles
MaxOverlapFract = 0.2;
% MaxRegShift is the maximum shift of a tile in the non overlapping
% direction i.e. South/North if looking at its east neighbour.
MaxRegShift = 50;
%Below are required if using register2.m and find_spots2.m - they
%are to do with the initial search.
%The Score used to find the best shift in get_initial_shift2 is
%sum(exp(-Dist.^2/(2*o.ShiftScoreThresh^2)))
ShiftScoreThresh = 2;
% If either RegMinScore or FindSpotsMinScore are set to 'auto',
% the minimum allowed score before the search range is enlarged
% will be set to median + InitalShiftAutoMinScoreParam*IQR
% of search scores.
InitalShiftAutoMinScoreParam = 5;
%RegSearch.Direction.Y,RegSearch.Direction.X are the ranges values of
%shifts to check during the registration to the neighbour in the corresponding
%Direction (South or East)
RegSearch;
%RegStep specifies the step size to use in the Y,X direction of RegSearch
RegStep = [5,5];
%if the score mentioned above is below RegMinScore, the search
%range will be enlarged. If set to 'auto', will be set to median + 5*IQR
%of search scores.
RegMinScore = 'auto';
%RegWidenSearch specifies how much to widen the search range in the
%Y,X directions respectively if the score is below MinRegScore.
%The search will go from min(RegSearch):RegStep:max(RegSearch) to
%min(RegSearch)-RegWidenSearch:RegStep:max(RegSearch)+RegWidenSearch
RegWidenSearch = [50,50];
%After the initial broad search, get_initial_shift will do a
%refined search with a range of
%BestShift-RegRefinedSearch:BestShift+RegRefinedSearch inclusive.
RegRefinedSearch = [12,12];
%RegisterRefinedStep is the step size to use in this refined
%search
RegisterRefinedStep = [3,3];
%if the score is below RegAbsoluteMinScore, the shift found will be
%set to the average of all the other shifts
RegAbsoluteMinScore = 4;
%To be considered an outlier, a shift must have a score less than
%OutlierMinScore. AmendShifts will not run unless atleast one of
%the shifts has a score less than this.
OutlierMinScore = 200;
%OutlierThresh is the number of scaled MAD away from the median
%that constitutes an outlier when considering the shifts in the
%registration or find_steps methods. These outliers are set to the
%average of all acceptable shifts if the score is low enough
OutlierThresh = 5;
%% parameters: spot detection
% smooth images before reading out fluorescence and before registration
% with a disk of this radius. Set to 0 to do no smoothing
SmoothSize = 0;
% to detect spot, pixel needs to be above dilation with this radius
DetectionRadius = 1;
% and pixel raw fluorescence needs to be above this value:
DetectionThresh = 'auto';
% AutoThresh(t,c,r) is the threshold found automatically for tile
% t, color channel c, round r. Its value is
% AutoThreshMultiplier*Median(abs(ScaledFilteredTile)).
AutoThresh;
AutoThreshMultiplier = 10;
%If find less than o.minPeaks spots then DectionThresh is lowered by
%o.ThreshParam
minPeaks = 1000;
ThreshParam = 5;
% find isolated spots by annular filtering with these radii
IsolationRadius1 = 2;
IsolationRadius2 = 7;
% annular filtered value needs to be less than this:
IsolationThresh = 'auto';
% for visualization during spot detection
FindSpotsRoi = [1742 1755 213 227];
% Each spot will be allocated to home tile if possible - but not if
% it is this close to the edge, because of aberrations
ExpectedAberration = 3;
%MinThresh is the smallest value DectionThresh can get to
MinThresh = 10;
%MinSpots is the smallest number of spots on a round/tile for a
%particular colour channel for hat color channel to be deemed
%suitable for finding the initial shifts to the anchor round. If
%all color channels have a tile with less spots than this, an error
%is thrown
MinSpots = 100;
%FindSpotsSearch.Y,FindSpotsSearch.X are the ranges values of
%shifts to check when looking for the initial shifts between rounds
%for each tile. Can also set to cell(o.nRounds,1) and give a
%different search range for each round.
FindSpotsSearch;
%FindSpotsStep specifies the step size to use in the Y,X direction of FindSpotsSearch
FindSpotsStep = [5,5];
%if the score mentioned above is below FindSpotsMinScore, the search
%range will be enlarged. If set to 'auto', will be set to median + 5*IQR
%of search scores.
FindSpotsMinScore = 'auto';
%RegWidenSearch specifies how much to widen the search range in the
%Y,X directions respectively if the score is below MinRegScore.
%The search will go from min(FindSpotsSearch):RegStep:max(FindSpotsSearch) to
%min(FindSpotsSearch)-FindSpotsWidenSearch:FindSpotsStep:max(FindSpotsSearch)+FindSpotsWidenSearch
FindSpotsWidenSearch = [50,50];
%After the initial broad search, get_initial_shift will do a
%refined search with a range of
%BestShift-FindSpotsRefinedSearch:BestShift+FindSpotsRefinedSearch
%inclusive.
FindSpotsRefinedSearch = [12,12];
%FindSpotsRefinedStep is the step size to use in this refined
%search
FindSpotsRefinedStep = [3,3];
%if the score is below FindSpotsAbsoluteMinScore, the shift found will be
%set to the average of all the other shifts in a particular round
FindSpotsAbsoluteMinScore = 10;
%% parameters: spot calling
% normalizes spot fluorescence so this percentile = 1
SpotNormPrctile = 98;
% if BleedMatrixType == 'Separate', then a bleed matrix will be
% computed for each round. If BleedMatrixType == 'Single', a single
% bleed matrix will be computed, combining spot colours from all
% rounds.
BleedMatrixType = 'Single';
% This controls how to normalise the spot and bled codes in o.call_spots.
% If CallSpotsCodeNorm == 'Round', each round of code will have
% L2 norm of 1. Otherwise, whole code will have L2 norm of 1.
% Might want to use 'Round', as this means the contribution to each round
% is the same which is what we expect from the UnbledCodes.
CallSpotsCodeNorm = 'WholeCode';
% score and intensity thresholds to plot a spot (combi codes)
%Note max score is 1 for CallSpotsCodeNorm = 'WholeCode' but 7 for CallSpotsCodeNorm = 'Round'
CombiQualThresh = .8;
CombiIntensityThresh = .1;
CombiDevThresh = 0.07;
CombiAnchorsReq = 4; % need at least this many anchor chans above threshold
nRedundantRounds = 0;
RedundantPseudobases = {'AC', 'GT'};
% RedundantCodes = {...
% '.11..', '.12..', '.22..', '.21..';...
% '..11.', '..12.', '..22.', '..21.';...
% '1..1.', '2..1.', '2..2.', '1..2.';...
% '11...', '12...', '22...', '21...';...
% };
RedundantCodes = {...
'.11..', '.12..', '.21..', '.22..';...
'..11.', '..12.', '..21.', '..22.';...
'1..1.', '2..1.', '1..2.', '2..2.';...
'11...', '12...', '21...', '22...';...
};
%% parameters: cell segmentation
% percentile threshold for dapi image (after imadjust)
DapiThresh = 90;
% how close local maxima can be together (in pixels)
DapiMinSep = 7;
% how big cell radius needs to be to be detected (in pixels)
DapiMinSize = 5;
% how much margin around each cell to assign to it (in pixels)
DapiMargin = 10;
% minimum area in pixels for a cell not to be dropped
MinCellArea = 200;
%% parameters: cell calling
% CellCallRegionXY(:,1:2): XY coords of polygod outlining region to
% look for cells in
CellCallRegionYX;
% how many neighboring cells to consider for each spot
nNeighbors = 3;
% gamma dist prior for each spot
rSpot = 2;
% gamma dist shape for scaling whole gene
rGene = 20;
% probability of misread per pixel
MisreadDensity = 1e-5;
% prior on how much to scale scRNAseq down by to match iss
Inefficiency = .2;
% how much to add to expression so as not to get infities
SpotReg = .1;
% additional log likelihood for a gene inside the segmented region
InsideCellBonus = 2;
% converges when no probabilities have changed more than this
CellCallTolerance = .02;
% converges when no probabilities have changed more than this
CellCallMaxIter = 100;
% for pie-plots: don't both showing probs below this
MinPieProb = .1;
% size of pies
PieSize = 25;
% for plotting: any cell classes starting with these names will be
% collapsed and given the specified color
ClassCollapse = {{'Astro', 'Endo', 'Oligo', 'Eryth', 'Vsmc'}, 'NonNeuron', [.5 .5 .5] ; ...
{'PC.CA1'}, 'PC CA1', [1 .8 .8] ; ...
{'PC.CA2', 'PC.CA3'}, 'PC subtypes', [.8 1 .8] ; ...
% {'PC.CA3'}, 'PC CA3', [.8 1 .8] ; ...
{'Zero'}, 'Zero', [0 0 0]};
%% parameters: stuff that should be the same between experiments
% for dapi images: scale is downsampling factor for final .fig file
DapiChannel = 1;
% which channel of each file is anchor images
AnchorChannel = 2;
% which channel has first round of sequencing images
FirstBaseChannel = 3;
% which sequencing round to align all others to
ReferenceRound = 2;
% how many combinatorial sequencing rounds excluding anchor round
nRounds = 5;
% Number of possible basepairs (always 4 for life as we know it but
% hardcoding is bad programming style!)
nBP = 4;
% tile size (assumed square tiles)
TileSz = 2048;
% expected step size (in coordinates returned by bioformats)
MicroscopeStepSize = 2048;
RawFileExtension = '.czi';
% when decoding spots, will use all colour channels in
% UseChannels (Array of numbers in range 1 to o.nBP)
UseChannels;
% when decoding spots, will use all rounds in
% UseRounds (Array of numbers in range 1 to o.nRounds)
UseRounds;
%% variables: filenames
% TileFiles{r, y, x} is full pathname of tile (y,x) on round r
% (mutlicolor tiff file). Empty if no file there
TileFiles;
% EmptyTiles{y,x} is zero if that tile location wasn't scanned
EmptyTiles;
% FileBase{r} is local filename of round r .czi file; other
% extensions added in output files
FileBase;
% BigDapiFile is full path of stitched DAPI image (tiff format)
BigDapiFile;
% CellMapFile is full path of cell map (.mat format)
CellMapFile;
%% variables: registration
% TilePosYX(t,:): grid position of tile t in integers. t is what you
% find in the file name - so it only counts non-empty tiles. This
% is confusing and ought to be refactored. Manana.
TilePosYX;
% TileOriginYX(t,:,r): YX coordinate of origin of tile t on round
% r. To get the global coordinate of a point, add this to the local
% coordinate within the tile (counting from 1)
TileOrigin;
% TileInitialPosYX(t,:): coordinate of tile t in integers. This is
% the derived coordinates, before using the fact we know the answer
% to get TilePosYX.
TileInitialPosYX;
%RawLocalYX{t} stores the YX coordinates of spots found in the
%anchor round of tile t
RawLocalYX;
%RawIsolated{t} labels each spot in the anchor round as isolated or not
RawIsolated;
%RegInfo saves debugging information for the registration section
%h = horizontal, v = vertical
%Score(i) is the score for the Shifts(i) found between tiles given by Pairs(i)
%ChangedSearch is number of times the search range had to be changed.
%Outlier is a shift found that was wrong and subsequently changed
%to the average of all other shifts.
RegInfo;
%AllBaseSpotNo(t,c,r) is the number of spots found on tile t,
%color channel c, round r.
AllBaseSpotNo;
% D0(t,2,r) stores the initial shift to use as a starting point for
% the PCR on round r tile t.
D0;
%FindSpotsInfo saves debugging information for finding the initial
%shifts.
%Score(t,r) is the score for the shift D0(t,r) found for tile t
%between the anchor round and round r.
%ChangedSearch is number of times the search range had to be changed.
%Outlier is a shift found that was wrong and subsequently changed
%to the average of all other shifts.
FindSpotsInfo;
% A(2,2,c): stores the scaling correction for chromatic aberration
% found by point cloud registration for color channel c
A;
% D(t,2,r): stores the final shift found by point cloud registration
% on round r tile t.
D;
% cc(t,1,r) stores the correlation coefficient for the initial
% shift D0(t,:,r) found between tile t round r and the anchor
cc;
% nMatches(t,c,r): stores number of matches found by point cloud
% registration for tile t, color channel c, round r
nMatches;
% error(t,c,r): stores error found by point cloud registration
% for tile t, color channel c, round r
Error;
%% variables: spot calling outputs
% cSpotColors(Spot, Base, Round) contains spot color on each base
% and round. only for combinatorial splots
cSpotColors;
% cSpotIsolated(Spot) is a binary array saying if the spot is well isolated
% again for combinatorial spots only
cSpotIsolated;
% SpotCombi: binary array which is one for combinatorial spots,
% zero for extra spots
SpotCombi;
% SpotGlobalYX(Spot,1:2) contains y,x coordinates of every spot in
% global coordiate system. Both combinatorial and extra spots
SpotGlobalYX;
% SpotCodeNo(Spot): code number for each spot (uint16). Both combinatorial and extra spots
SpotCodeNo;
% SpotScore(Spot): score saying how well the code fits (0...1).
% 1 for extras
SpotScore;
% SpotIntensity(Spot): RMS intensity of the spot. Zero for
% extras!
SpotIntensity;
% SpotScoreDev(s) is the standard deviation of the scores of
% assigning spot s to all genes
SpotScoreDev;
% CharCodes{Code}: text representation of each code in the
% codebook. For extra spots, this just says "EXTRA"
CharCodes;
% GeneNames{Code}: gene name for each code. Note that some genes
% can have multiple entries, because they have multiple codes or
% because they have extra rounds
GeneNames;
% UnbledCodes(nCodes, nBP*nRounds): binary code vectors
UnbledCodes;
% BledCodes(nCodes, nBP*nRounds): code vectors after modeling
% crosstalk
BledCodes;
% Normalised Spot Scores
NormBledCodes;
cNormSpotColors;
% BleedMatrix used to estimate BledCodes
BleedMatrix;
%% variables: cell calling outputs
% pCellClass(cell, class); % prob each cell goes to each class: last class is zero expression
pCellClass;
% ClassNames(class): name of each class, taken from gSet file
ClassNames;
% pSpotCell(spot, cell): sparse array containing prob of each spot
% to belong to each cell. Last cell is background
pSpotCell;
% position of each cell centroid
CellYX;
%% parameters: Probability Method for spot calling
% BleedMatrix used to estimate BledCodes in call_spots_prob. Unnormalised.
pBleedMatrix;
%HistCounts(:,b,r) is the pixel count corresponding to each value
%in HistValues for channel b, round r
HistCounts;
%HistValues: The full range of pixel values;
HistValues;
%alpha is used for regularisation so don't have any bins with -Inf
%log probability.
alpha = 1e-20;
%SymmHistValues is -max(HistValues(HistCount>0)):max(HistValues(HistCount>0)).
%Required as needs to be symmetric for the convolution
SymmHistValues;
%HistProbs(:,b,r) is the probability corresponding to each value
%in SymmHistValues for channel b, round r.
%(HistCounts/nPixels+o.alpha)./(1+nBins*o.alpha);
HistProbs;
% pBledCodes(nCodes, nBP*nRounds): code vectors after modeling
% crosstalk and un-nomalising.
pBledCodes;
%RaylConst is the constant used in the rayleigh distribution for
%the estimated distribution of lambda such that cSpotColors =
%Lambda*pBledCode
RaylConst = 1.0688;
%ExpConst is same as above but exponenital distribution used for
%all rounds/channels that don't appear in CharCode for each gene.
ExpConst = 3.5;
%LambdaDist(:,g,b,r) is the probability distribution of lambda for
%gene g, channel b and round r. Rayleigh if appear in CharCodes,
%Exp otherwise using constants above.
LambdaDist;
%ZeroIndex is the index of the 0 value in
%min(cSpotColors(:)):max(cSpotColors(:)) used for convolutions.
%Needed to find values in lookup table.
ZeroIndex;
%pIntensityThresh is the value pSpotIntensity(s) needs to exceed for spot s
%to count
pIntensityThresh = 100;
%pLogProbThresh is the value pSpotIntensity(s) needs to exceed for spot s
%to count
pLogProbThresh = -600;
%pScoreThresh is the value pSpotScore(s) needs to exceed for spot s
%to count
pScoreThresh = 10;
%A spot must have pSpotScore(s)+pSpotScoreDev(s) > pDevThresh to
%count - avoid spots with similar score to all genes.
pDevThresh = 6;
%% variables: spot calling outputs
%pSpotIntensity is the modified spot intensity given by
%get_spot_intensity.m
pSpotIntensity;
%pLogProb is sum(ln(Prob(b,r))) i.e. log(total probability)
pLogProb;
%pSpotScore is pLogProb -max(pLogProb(SpotCodeNo~=pSpotCodeNo))
pSpotScore;
%pSpotScoreDev(s) is the standard deviation of the log prob of spot s
%for all genes
pSpotScoreDev;
%pSpotCodeNo is the gene found for each spot
pSpotCodeNo;
end
end