-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathstats.py
482 lines (397 loc) · 21.2 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
#!/usr/bin/env python
# coding=utf-8
"""
Statistical functions
| Option | Description |
| ------ | ----------- |
| title | stats.py |
| authors | Florence Brun, Guillaume Dumas |
| date | 2020-03-18 |
"""
from collections import namedtuple
from typing import List, Tuple
import numpy as np
import scipy
import matplotlib.pylab as plt
import mne
from mne.channels import find_ch_adjacency
from mne.stats import permutation_cluster_test
def statsCond(data: np.ndarray, epochs: mne.Epochs, n_permutations: int, alpha: float) -> tuple:
"""
Computes statistical t test on participant measure (e.g. PSD) for a condition.
Arguments:
data: array of participants measure (e.g. PSD) for
a condition (n_samples, n_tests, nfreq: n_tests the channels).
Values will be averaged on nfreq for statistics.
epochs: Epochs object for a condition from a random participant, only
used to get parameters from the info (sampling frequencies for example).
n_permutations: the number of permutations, int. Should be at least 2*n
sample, can be set to 50000 for example.
alpha: the threshold for ttest, float, can be set to 0.05.
Note:
This ttest calculates if the observed mean significantly deviates
from 0; it does not compare two periods, but one period with the null
hypothesis. Randomized data are generated with random sign flips.
The tail is set to 0 by default (= the alternative hypothesis is that
the data mean is different from 0).
To reduce false positive due to multiple comparisons, False Discovery Rate
(FDR) correction is applied to the p values.
Note that the frequency dimension is reduced to one for the test
(average in the frequency band-of-interest).
To take frequencies into account, use cluster statistics
(see statscondCluster function in the toolbox).
For visualization, use plot_significant_sensors function in the toolbox.
Returns:
T_obs, p_values, H0, adj_p, T_obs_plot:
- T_obs: T-statistic observed for all variables, array of shape (n_tests).
- p_values: p-values for all the tests, array of shape (n_tests).
- H0: T-statistic obtained by permutations and t-max trick for multiple
comparisons, array of shape (n_permutations).
- adj_p: adjusted p values from FDR correction, array of shape
(n_tests, n_tests), with boolean assessment for p values
and p values corrected.
- T_obs_plot: statistical values to plot, from sensors above alpha threshold,
array of shape (n_tests,).
"""
# checking whether data have the same size
assert(len(data.shape) == 3), "PSD does not have the appropriate shape!"
# averaging across frequencies (compute stats only in ch space)
power = np.mean(data, axis=2)
T_obs, p_values, H0 = mne.stats.permutation_t_test(power, n_permutations,
tail=0, n_jobs=1)
adj_p = mne.stats.fdr_correction(p_values, alpha=alpha, method='indep')
T_obs_plot = np.nan * np.ones_like(T_obs)
for c in adj_p[1]:
if c <= alpha:
i = np.where(adj_p[1] == c)
T_obs_plot[i] = T_obs[i]
T_obs_plot = np.nan_to_num(T_obs_plot)
# retrieving sensor position
pos = np.array([[0, 0]])
for i in range(0, len(epochs.info['ch_names'])):
cor = np.array([epochs.info['chs'][i]['loc'][0:2]])
pos = np.concatenate((pos, cor), axis=0)
pos = pos[1:]
statsCondTuple = namedtuple(
'statsCond', ['T_obs', 'p_values', 'H0', 'adj_p', 'T_obs_plot'])
return statsCondTuple(
T_obs=T_obs,
p_values=p_values,
H0=H0,
adj_p=adj_p,
T_obs_plot=T_obs_plot)
def con_matrix(epochs: mne.Epochs, freqs_mean: List[float], draw: bool = False) -> tuple:
"""
Computes a priori channel connectivity across space and frequencies.
Arguments:
epochs: one participant Epochs object; contains channel information.
freqs_mean: list of frequencies in frequency-band-of-interest used
by MNE for power or coherence spectral density calculation.
draw: option to plot the connectivity matrices, boolean.
Returns:
ch_con, ch_con_freq:
- ch_con: connectivity matrix between channels along space based on
their position, scipy.sparse.csr_matrix of shape
(n_channels, n_channels).
- ch_con_freq: connectivity matrix between channels along space and
frequencies, scipy.sparse.csr_matrix of shape
(n_channels*len(freqs_mean), n_channels*len(freqs_mean)).
"""
# creating channel-to-channel connectivity matrix in space
ch_con, ch_names_con = find_ch_adjacency(epochs.info,
ch_type='eeg')
ch_con_arr = ch_con.toarray()
# duplicating the array 'freqs_mean' or 'freqs' times (PSD or CSD)
# to take channel connectivity across neighboring frequencies into
# account
l_freq = len(freqs_mean)
init = np.zeros((l_freq*len(ch_names_con),
l_freq*len(ch_names_con)))
for i in range(0, l_freq*len(ch_names_con)):
for p in range(0, l_freq*len(ch_names_con)):
if (p//len(ch_names_con) == i//len(ch_names_con)) or (p//len(ch_names_con) == i//len(ch_names_con) + 1) or (p//len(ch_names_con) == i//len(ch_names_con) - 1):
init[i][p] = 1
ch_con_mult = np.tile(ch_con_arr, (l_freq, l_freq))
ch_con_freq = np.multiply(init, ch_con_mult)
if draw:
plt.figure()
# visualizing the matrix and transforming it into array
plt.subplot(1, 2, 1)
plt.spy(ch_con)
plt.title("Connectivity matrix")
plt.subplot(1, 2, 2)
plt.spy(ch_con_freq)
plt.title("Meta-connectivity matrix")
con_matrixTuple = namedtuple('con_matrix', ['ch_con', 'ch_con_freq'])
return con_matrixTuple(
ch_con=ch_con,
ch_con_freq=ch_con_freq)
def metaconn_matrix_2brains(electrodes: List[Tuple[int, int]], ch_con: scipy.sparse.csr_matrix, freqs_mean: List[float], plot: bool = False) -> tuple:
"""
Computes a priori connectivity across space and frequencies
between pairs of channels for which connectivity indices have
been calculated, to merge data (2 brains).
Arguments:
electrodes: electrode pairs for which connectivity indices have
been computed, list of tuples with channels indexes, see
indices_connectivity_interbrain function in toolbox (analyses).
ch_con: connectivity matrix between channels along space based on their
position, scipy.sparse.csr_matrix of shape (n_channels, n_channels).
freqs_mean: list of frequencies in the frequency-band-of-interest used
by MNE for coherence spectral density calculation (connectivity indices).
plot: option to plot the connectivity matrices, boolean.
Note:
It is assumed that there was no a priori connectivity
between channels from the two participants.
Returns:
metaconn, metaconn_freq:
- metaconn: a priori connectivity based on channel location, between
pairs of channels for which connectivity indices have been calculated,
to merge data, matrix of shape (len(electrodes), len(electrodes)).
- metaconn_freq: a priori connectivity between pairs of channels for which
connectivity indices have been calculated, across space and
frequencies, to merge data, matrix of shape
(len(electrodes)*len(freqs_mean), len(electrodes)*len(freqs_mean)).
"""
n = np.max(electrodes, axis=0)[0]+1
# n = 62
metaconn = np.zeros((len(electrodes), len(electrodes)))
for ne1, (e11, e12) in enumerate(electrodes):
for ne2, (e21, e22) in enumerate(electrodes):
# print(ne1,e11,e12,ne2,e21,e22)
# considering no a priori connectivity between the 2 brains
metaconn[ne1, ne2] = (((ch_con[e11, e21]) and (ch_con[e12-n, e22-n])) or
((ch_con[e11, e21]) and (e12 == e22)) or
((ch_con[e12-n, e22-n]) and (e11 == e21)) or
((e12 == e22) and (e11 == e21)))
# duplicating the array 'freqs_mean' times to take channel connectivity
# across neighboring frequencies into account
l_freq = len(freqs_mean)
init = np.zeros((l_freq*len(electrodes),
l_freq*len(electrodes)))
for i in range(0, l_freq*len(electrodes)):
for p in range(0, l_freq*len(electrodes)):
if (p//len(electrodes) == i//len(electrodes)) or (p//len(electrodes) == i//len(electrodes) + 1) or (p//len(electrodes) == i//len(electrodes) - 1):
init[i][p] = 1
metaconn_mult = np.tile(metaconn, (l_freq, l_freq))
metaconn_freq = np.multiply(init, metaconn_mult)
if plot:
# vizualising the array
plt.figure()
plt.spy(metaconn_freq)
plt.title("Meta-connectivity matrix")
metaconn_matrix_2brainsTuple = namedtuple(
'metaconn_matrix_2brains', ['metaconn', 'metaconn_freq'])
return metaconn_matrix_2brainsTuple(
metaconn=metaconn,
metaconn_freq=metaconn_freq)
def metaconn_matrix(electrodes: List[Tuple[int, int]], ch_con: scipy.sparse.csr_matrix, freqs_mean: List[float]) -> tuple:
"""
Computes a priori connectivity between pairs of sensors for which
connectivity indices have been calculated, across space and frequencies
(based on channel location).
Arguments:
electrodes: electrode pairs for which connectivity has been computed,
list of tuples with channel indices, see indices_connectivity
intrabrain function in toolbox (analyses).
ch_con: connectivity matrix between sensors along space based on their
position, scipy.sparse.csr_matrix of shape (n_channels, n_channels).
freqs_mean: list of frequencies in the frequency-band-of-interest used
by MNE for coherence spectral density calculation (connectivity indices).
Returns:
metaconn, metaconn_freq:
- metaconn: a priori connectivity based on channel location, between
pairs of channels for which connectivity indices have been calculated,
matrix of shape (len(electrodes), len(electrodes)).
- metaconn_freq: a priori connectivity between pairs of channels for which
connectivity indices have been calculated, across space and
frequencies, for merge data, matrix of shape
(len(electrodes)*len(freqs_mean), len(electrodes)*len(freqs_mean)).
"""
metaconn = np.zeros((len(electrodes), len(electrodes)))
for ne1, (e11, e12) in enumerate(electrodes):
for ne2, (e21, e22) in enumerate(electrodes):
# print(ne1,e11,e12,ne2,e21,e22)
metaconn[ne1, ne2] = (((ch_con[e11, e21]) and (ch_con[e12, e22])) or
((ch_con[e11, e22]) and (ch_con[e12, e21])) or
((ch_con[e11, e21]) and (e12 == e22)) or
((ch_con[e11, e22]) and (e12 == e21)) or
((ch_con[e12, e21]) and (e11 == e22)) or
((ch_con[e12, e22]) and (e11 == e21)))
# duplicating the array 'freqs_mean' times to take channels connectivity
# across neighboring frequencies into account
l_freq = len(freqs_mean)
init = np.zeros((l_freq*len(electrodes),
l_freq*len(electrodes)))
for i in range(0, l_freq*len(electrodes)):
for p in range(0, l_freq*len(electrodes)):
if (p//len(electrodes) == i//len(electrodes)) or (p//len(electrodes) == i//len(electrodes) + 1) or (p//len(electrodes) == i//len(electrodes) - 1):
init[i][p] = 1
metaconn_mult = np.tile(metaconn, (l_freq, l_freq))
metaconn_freq = np.multiply(init, metaconn_mult)
# TODO: option with verbose
# vizualising the array
plt.spy(metaconn_freq)
metaconn_matrixTuple = namedtuple(
'metaconn_matrix', ['metaconn', 'metaconn_freq'])
return metaconn_matrixTuple(
metaconn=metaconn,
metaconn_freq=metaconn_freq)
def statscondCluster(data: list, freqs_mean: list, ch_con_freq: scipy.sparse.csr_matrix, tail: int, n_permutations: int, alpha: float = 0.05) -> tuple:
"""
Computes cluster-level statistical permutation test, corrected with
channel connectivity across space and frequencies.
Arguments:
data: values from different conditions or different groups to compare,
list of arrays (3d for time-frequency power or connectivity values).
freqs_mean: frequencies in frequency-band-of-interest used by MNE
for PSD or CSD calculation, list.
ch_con_freq: connectivity or metaconnectivity matrix for PSD or CSD
values to assess a priori connectivity between channels across
space and frequencies based on their position, bsr_matrix.
tail: direction of the ttest, can be set to 1, 0 or -1.
n_permutations: number of permutations computed, can be set to 50000.
alpha: threshold to consider clusters significant, can be set to 0.05
or less.
Returns:
F_obs, clusters, cluster_pv, H0, F_obs_plot:
- F_obs: statistic (F by default) observed for all variables,
array of shape (n_tests,).
- clusters: boolean array with same shape as the input data,
True values indicating locations that are part of a cluster, array.
- cluster_p_values: p-value for each cluster, array.
- H0: max cluster level stats observed under permutation, array of
shape (n_permutations,).
- F_obs_plot: statistical values above alpha threshold, to plot
significant sensors (see plot_significant_sensors function in the toolbox)
array of shape (n_tests,).
"""
# computing the cluster permutation t test
F_obs, clusters, cluster_p_values, H0 = permutation_cluster_test(data,
threshold=None,
n_permutations=n_permutations,
tail=tail, adjacency=ch_con_freq,
t_power=1, out_type='mask')
# t_power = 1 weighs each location by its statistical score,
# when set to 0 it gives a count of locations in each cluster
# getting F values for sensors belonging to a significant cluster
F_obs_plot = np.zeros(F_obs.shape)
for cluster_p in cluster_p_values:
if cluster_p <= alpha:
sensors_plot = clusters[np.where(cluster_p_values == cluster_p)[
0][0]].astype('uint8')
F_values = sensors_plot*F_obs
# taking maximum F value if a sensor belongs to many clusters
F_obs_plot = np.maximum(F_obs_plot, F_values)
statscondClusterTuple = namedtuple('statscondCluster', [
'F_obs', 'clusters', 'cluster_p_values', 'H0', 'F_obs_plot'])
return statscondClusterTuple(
F_obs=F_obs,
clusters=clusters,
cluster_p_values=cluster_p_values,
H0=H0,
F_obs_plot=F_obs_plot)
def statscluster(data: list, test: str, factor_levels: List[int], ch_con_freq: scipy.sparse.csr_matrix, tail: int, n_permutations: int, alpha: float = 0.05) -> tuple:
"""
Computes cluster-level statistical permutation test, corrected with
channel connectivity across space and frequencies to compare groups
or conditions for simple or multiple comparisons.
Arguments:
data: values from different groups or conditions to compare,
list of arrays (3d for time-frequency power or connectivity values),
or np.array for f multiple-way ANOVA test. For this test and the
paired ttest, samples must have the same dimension.
test: nature of the test used to compare groups or conditions.
Can be a t test for independant or paired samples
('ind ttest' or 'rel ttest'), a one-way ANOVA test
('f oneway'), or a multiple-way ANOVA test ('f multipleway), str.
factor_levels: for multiple-way ANOVA test, describe the number of level
for each factor, list (if compare 2 groups and 2 conditions,
factor_levels = [2, 2] and data should be an np.array with
group1-condition1, group1-condition2, group2-condition1,
group2-condition2).
Set to None otherwise.
ch_con_freq: connectivity or metaconnectivity matrix for PSD or CSD
values to assess a priori connectivity between channels across
space and frequencies based on their position, bsr_matrix.
tail: direction of the ttest, can be set to 1, 0 or -1. The tail must
be set to 0 for a one-way ANOVA test and to 1 for a mutiple-way
ANOVA test.
n_permutations: number of permutations computed, can be set to 50000.
alpha: threshold to consider clusters significant, can be set to 0.05
that is the default value. An adjustment is done for a f one-way and
multiple-way tests to adapt 0.05 to the number of observations.
Notes:
With t_power set to 1, each location is weighted by its statistical
score in a cluster.
For a f multipleway ANOVA test with connectivity values, the last
dimensions have to be flattened in a vector, instead of the shape
(n_sensors, n_sensors), you can use np.reshape.
Returns:
Stat_obs, clusters, cluster_pv, H0, Stat_obs_plot:
- Stat_obs: statistic (T or F values according to the assignement
of 'test') observed for all variables,
array of shape (n_tests,).
- clusters: boolean array with same shape as the input data,
True values indicating locations that are part of a cluster, array.
- cluster_p_values: p-value for each cluster, array.
- H0: max cluster level stats observed under permutation, array of
shape (n_permutations,).
- Stat_obs_plot: statistical values above alpha threshold,
to plot significant sensors (see plot_significant_sensors
function in the toolbox) array of shape (n_tests,).
"""
# type of test
if test == 'ind ttest':
def stat_fun(*arg):
return(scipy.stats.ttest_ind(arg[0], arg[1], equal_var=False)[0])
threshold = alpha
elif test == 'rel ttest':
def stat_fun(*arg):
return(scipy.stats.ttest_rel(arg[0], arg[1])[0])
threshold = alpha
elif test == 'f oneway':
def stat_fun(*arg):
return(scipy.stats.f_oneway(arg[0], arg[1])[0])
threshold = alpha
elif test == 'f multipleway':
if max(factor_levels) > 2:
correction = True
else:
correction = False
def stat_fun(*arg):
return(mne.stats.f_mway_rm(np.swapaxes(args, 1, 0),
factor_levels,
effects='all',
correction=correction,
return_pvals=False)[0])
threshold = mne.stats.f_threshold_mway_rm(n_subjects=data.shape[1],
factor_levels=factor_levels,
effects='all',
pvalue=0.05)
# computing the cluster permutation t test
Stat_obs, clusters, cluster_p_values, H0 = permutation_cluster_test(data,
stat_fun=stat_fun,
threshold=threshold,
tail=tail,
n_permutations=n_permutations,
adjacency=ch_con_freq,
t_power=1,
out_type='mask')
# getting F values for sensors belonging to a significant cluster
Stat_obs_plot = np.zeros(Stat_obs.shape)
for cluster_p in cluster_p_values:
if cluster_p <= alpha:
sensors_plot = clusters[np.where(cluster_p_values == cluster_p)[
0][0]].astype('uint8')
Stat_values = sensors_plot*Stat_obs
# taking maximum statistical value if a sensor is in many clusters
Stat_obs_plot = np.maximum(Stat_obs_plot, Stat_values)
statscondClusterTuple = namedtuple('statscondCluster', [
'Stat_obs', 'clusters', 'cluster_p_values', 'H0', 'Stat_obs_plot'])
return statscondClusterTuple(
Stat_obs=Stat_obs,
clusters=clusters,
cluster_p_values=cluster_p_values,
H0=H0,
Stat_obs_plot=Stat_obs_plot)