forked from kdharris101/iss
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathiss_PixelBased.m
255 lines (200 loc) · 10.3 KB
/
iss_PixelBased.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
%% iss: code for processing of in situ sequencing
% Kenneth D. Harris and Xiaoyan Qian
% GPL 3.0 https://www.gnu.org/licenses/gpl-3.0.en.html
%
% to use:
% o = iss_PixelBased; % create structure, default parameters
% % change any parameters you want, and set o.FileN
% o = o.extract_and_filter; % create top-hat filtered tiffs for each tile
% o = o.find_spots; % find spot positions in global coordinates
% o = o.call_spots; % allocate each spot to a gene
% o = o.call_cells; % identify cells
%
% this current file iss.m just contains default parameter values
classdef iss_PixelBased < iss_Base
properties
%% parameters: Probability Method for spot calling
% BleedMatrix used to estimate BledCodes in call_spots_prob. Unnormalised.
pBleedMatrix;
%alpha is used for regularisation so don't have any bins with -Inf
%log probability.
alpha = 1e-20;
%An exponential curve will be fit where HistCounts falls below this.
SmoothHistCountLimit = 500;
%The HistCount is smoothed according to
%SmoothHistCountLimit*exp(SmoothHistCountParam*x);
%SmoothHistCountParamGuess is the start point to find SmoothHistCountParam.
SmoothHistCountParamGuess = -0.003;
%HistZeroIndex is index where HistValues==0
HistZeroIndex;
%HistProbs(:,b,r) is the probability corresponding to each value
%in HistValues for channel b, round r.
%(HistCounts/nPixels+o.alpha)./(1+nBins*o.alpha);
HistProbs;
% pBledCodes(nCodes, nBP*nRounds): code vectors after modeling
% crosstalk and un-nomalising.
pBledCodes;
%ProbMethod determines Lambda dist, either 1 or 2.
ProbMethod = 1;
%LambdaDist(:,g,b,r) is the probability distribution of lambda for
%gene g, channel b and round r.
%ProbMethod = 1: Gamma in all rounds/channels using GammaShape.
%ProbMethod = 2: Rayleigh if appear in CharCodes using RaylConst,
%Exp otherwise using ExpConst.
LambdaDist;
%BackgroundLambdaDist(:) is the background distribution of lambda
%for all genes, channels and rounds. It is just the LambdaDist
%where the predicted bled code is 1 (i.e. a very small value).
BackgroundLambdaDist;
%GammaShape is the shape parameter used in the gamma distribution for
%the estimated distribution of lambda such that cSpotColors =
%Lambda*pBledCode
GammaShape = 3.0;
%RaylConst is the constant used in the rayleigh distribution for
%the estimated distribution of lambda such that cSpotColors =
%Lambda*pBledCode
%1 means distribution peaks at bled code number.
RaylConst = 1.0;
%ExpConst is same as above but exponenital distribution used for
%all rounds/channels that don't appear in CharCode for each gene.
ExpConst = 3.5;
%ZeroIndex is the index of the 0 value in
%min(cSpotColors(:)):max(cSpotColors(:)) used for convolutions.
%Needed to find values in lookup table.
ZeroIndex;
%BackgroundProb(:,b,r) is the reference probability distribution
%for visualisation -
%ProbMethod=1: probability distribution if the bled code had a value of 1.
%ProbMethod=2: HistProbs(:,b,r).
BackgroundProb;
%ScoreScale is the contribution each round/channel not in Unbled
%code contributes to LogProbOverBackground compared to each
%round/channel in Unbled code.
%0 means only use 7 rounds/channels in unbled code.
%1 means use all 49 rounds/channels equally
%In between uses all rounds/channels but unbled code contributes
%more.
%If ProbMethod=2, this is 1.
ScoreScale = 0;
%If ScoreBledThroughContribution is true, then ScoreScale will be
%set to 0 and e.g. if for round r, gene g, o.BledCodes =
%[0.001, 0.05, 0.4, 0.09, 0.001, 0, 0].
%The contribution of each channel to LogProb for this round will be:
%[0.0018, 0.092, 0.738, 0.166, 0.0018, 0, 0].
%I.e. bleed through contributes but in proportion to its intensity.
ScoreBleedThroughContribution = true;
%pIntensityThresh is the value pSpotIntensity(s) needs to exceed for spot s
%to count
pIntensityThresh = 100;
pIntensityThresh2 = 50;
%pSpotIntensity2 needs to be greater than pIntensity2Thresh
pIntensity2Thresh = 0;
%pLogProbThresh is the value pLogProbOverBackground(s) needs to exceed for spot s
%to count
pLogProbThresh = 0;
pLogProbThresh2 = 0;
%pScoreThresh is the value pSpotScore(s) needs to exceed for spot s
%to count
pScoreThresh = 80;
%If pSpotScore(s) < pScoreThresh but pSpotScore(s) > pScoreThresh2
%and has high intensity then will count as spot.
pScoreThresh2 = 0;
%A spot must have pSpotScore(s)+pSpotScoreDev(s) > pDevThresh to
%count - avoid spots with similar score to all genes.
pDevThresh = 6;
%Values used in quality_threshold for prob method
pQualThresh1 = -90; %Optimized using ground truth
pQualParam1 = 0.4; %Optimized using ground truth
pQualThresh2 = -5; %Optimized using ground truth
pQualParam2 = 3.1; %Optimized using ground truth
pQualThresh3 = 71;
pQualThresh4 = -20;
%% variables: spot calling outputs - prob method
% pSpotGlobalYX(Spot,1:2) contains y,x coordinates of every spot in
% global coordiate system. Both combinatorial and extra spots
% pixel spots are at different locations.
pSpotGlobalYX; %Same as dpSpotGlobalYX
% pSpotColors(Spot, Base, Round) contains spot color on each base
% and round. only for combinatorial splots
pSpotColors; %Same as dpSpotColors
% pSpotIsolated(Spot) is a binary array saying if the spot is well isolated
% again for combinatorial spots only
pSpotIsolated; %Same as dpSpotIsolated
% pLocalTile(s) is the tile spot cSpotColors(s,:,:) was found on
pLocalTile; %Same as dpLocalTile
%pSpotIntensity is the modified spot intensity given by
%get_spot_intensity.m
pSpotIntensity;
%pSpotIntensity2 is the median intensity in the 7 rounds/channels
%specified by the gene assigned i.e. pSpotCodeNo.
pSpotIntensity2;
%pLogProb is sum(ln(Prob(b,r))/ln(HistProb(SpotColor(b,r),b,r)))
%i.e. probability spot can be explained by gene relative to
%probability it can be explained by background alone.
pLogProbOverBackground;
%pSpotScore is for the sorted array pLogProb,
%pLogProb(1)-pLogProb(2)
pSpotScore;
%pSpotScoreDev(s) is the standard deviation of the log prob of spot s
%for all genes
pSpotScoreDev;
%pSpotCodeNo is the gene found for each spot
pSpotCodeNo;
%% PixelBased method parameters
%PixelFileMaxTiles is approximately the maximum number of tiles
%that can be stored in a single file. Output data to files so don't
%get memory problems.
PixelFileMaxTiles = 6;
%PixelFileNames contains the names of files in which pixel method data
%is stored
PixelFileNames;
%PixelDetectRadius is the radius of the filter used to find local
%maxima in gene images
PixelDetectRadius = 4;
%Have to do initial filtering so don't have too many points, only
%keep points with pxSpotScore>pxInitialScoreThresh or
%pxLogProbOverBackground>pxInitialProbThresh
pxInitialScoreThresh = 0; %I.e. only want spots that are at least 2nd best at their position
pxInitialProbThresh = -5;
%When finding probability for second best gene, remove best gene
%first in all rounds/channels where gene should be present or
%bleedthrough is more than pLogProb2RemoveGeneThresh.
pLogProb2RemoveGeneThresh = 0.25;
%When finding probability for second best gene, remove
%(1-DistScore) fraction of pxSpotBestGene(s) code from
%pxSpotColor(s,:) where DistScore = exp(-Dist.^2/pLogProb2DistParam^2);
%Dist being the distance to the nearest pxSpotBestGene(s) spot.
pLogProb2DistParam = 25;
%% PixelBased method outputs
% pxSpotGlobalYX(Spot,1:2) contains y,x coordinates of every spot in
% global coordiate system. Both combinatorial and extra spots
% pixel spots are at different locations.
pxSpotGlobalYX;
% pxSpotColors(Spot, Base, Round) contains spot color on each base
% and round. only for combinatorial splots
pxSpotColors;
%pxSpotIntensity is the modified spot intensity given by
%get_spot_intensity.m
pxSpotIntensity;
%pxLogProb is sum(ln(Prob(b,r))/ln(HistProb(SpotColor(b,r),b,r)))
%i.e. probability spot can be explained by gene relative to
%probability it can be explained by background alone.
pxLogProbOverBackground;
%pxSpotScore is pLogProb -max(pxLogProb(SpotCodeNo~=pSpotCodeNo))
pxSpotScore;
%pxSpotScoreDev(s) is the standard deviation of the log prob of spot s
%for all genes
pxSpotScoreDev;
%pxSpotCodeNo is the gene found for each spot
pxSpotCodeNo;
%pxLocalTile(s) is the tile spot s was found on
pxLocalTile;
%pxBestGene(s) is gene number of best gene at location of spot s.
pxSpotBestGene;
%pxLogProbOverBackground2(s) is LogProbOverBackground for spot s
%after pxBestGene(s) has been removed.
pxLogProbOverBackground2;
%pxSpotScore2(s) is pxLogProb2 -max(pxLogProb2(SpotCodeNo~=pSpotCodeNo))
pxSpotScore2;
end
end