ME-ICA · smoia · Nov 25, 2019 · Nov 25, 2019 · Nov 25, 2019 · Nov 27, 2019
diff --git a/docs/api.rst b/docs/api.rst
@@ -177,7 +177,7 @@ API
    :toctree: generated/
    :template: function.rst
 
-   tedana.stats.get_coeffs
+   tedana.stats.get_ls_coeffs
    tedana.stats.computefeats2
    tedana.stats.getfbounds
 

diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py
@@ -312,6 +312,11 @@ def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG,
     comptable['normalized variance explained'] = varex_norm
 
     # write component maps to 4D image
+    # compute component spatial maps based on regression of the data on the
+    # component time series. Internally, regression (orthogonal least squares)
+    # is performed after z-normalization of data and component time series.
+    # Finally write component spatial maps in 4D files, where the spatial maps
+    # will divided by its standard deviation (option normalize=True)
     comp_ts_z = stats.zscore(comp_ts, axis=0)
     comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask)
     io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img)

diff --git a/tedana/io.py b/tedana/io.py
@@ -13,7 +13,7 @@
 from nilearn.image import new_img_like
 
 from tedana import utils
-from tedana.stats import computefeats2, get_coeffs
+from tedana.stats import computefeats2, get_ls_coeffs
 
 LGR = logging.getLogger(__name__)
 RepLGR = logging.getLogger('REPORT')
@@ -47,8 +47,8 @@ def split_ts(data, mmix, mask, comptable):
     """
     acc = comptable[comptable.classification == 'accepted'].index.values
 
-    cbetas = get_coeffs(data - data.mean(axis=-1, keepdims=True),
-                        mmix, mask)
+    cbetas = get_ls_coeffs(data - data.mean(axis=-1, keepdims=True),
+                           mmix, mask)
     betas = cbetas[mask]
     if len(acc) != 0:
         hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask)
@@ -104,7 +104,7 @@ def write_split_ts(data, mmix, mask, comptable, ref_img, suffix=''):
     dmdata = mdata.T - mdata.T.mean(axis=0)
 
     # get variance explained by retained components
-    betas = get_coeffs(dmdata.T, mmix, mask=None)
+    betas = get_ls_coeffs(dmdata.T, mmix, mask=None)
     varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() /
                (dmdata**2.).sum()) * 100
     LGR.info('Variance explained by ICA decomposition: {:.02f}%'.format(varexpl))
@@ -223,7 +223,7 @@ def writeresults(ts, mask, comptable, mmix, n_vols, ref_img):
 
     write_split_ts(ts, mmix, mask, comptable, ref_img, suffix='OC')
 
-    ts_B = get_coeffs(ts, mmix, mask)
+    ts_B = get_ls_coeffs(ts, mmix, mask)
     fout = filewrite(ts_B, 'betas_OC', ref_img)
     LGR.info('Writing full ICA coefficient feature set: {}'.format(op.abspath(fout)))
 

diff --git a/tedana/metrics/__init__.py b/tedana/metrics/__init__.py
@@ -2,8 +2,8 @@
 # ex: set sts=4 ts=4 sw=4 et:
 
 from .kundu_fit import (
-    dependence_metrics, kundu_metrics, get_coeffs, computefeats2
+    dependence_metrics, kundu_metrics, get_ls_coeffs, computefeats2
 )
 
 __all__ = [
-    'dependence_metrics', 'kundu_metrics', 'get_coeffs', 'computefeats2']
+    'dependence_metrics', 'kundu_metrics', 'get_ls_coeffs', 'computefeats2']
diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py
@@ -9,7 +9,7 @@
 from scipy import stats
 
 from tedana import io, utils
-from tedana.stats import getfbounds, computefeats2, get_coeffs
+from tedana.stats import getfbounds, computefeats2, get_ls_coeffs
 
 
 LGR = logging.getLogger(__name__)
@@ -104,10 +104,10 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img,
     # compute un-normalized weight dataset (features)
     if mmixN is None:
         mmixN = mmix
-    WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False)
+    WTS = computefeats2(tsoc, mmixN, mask=None)
 
     # compute PSC dataset - shouldn't have to refit data
-    tsoc_B = get_coeffs(tsoc_dm, mmix, mask=None)
+    tsoc_B = get_ls_coeffs(tsoc_dm, mmix, mask=None)
     del tsoc_dm
     tsoc_Babs = np.abs(tsoc_B)
     PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100
@@ -124,9 +124,9 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img,
     totvar_norm = (WTS**2).sum()
 
     # compute Betas and means over TEs for TE-dependence analysis
-    betas = get_coeffs(utils.unmask(catd, mask),
-                       mmix,
-                       np.repeat(mask[:, np.newaxis], len(tes), axis=1))
+    betas = get_ls_coeffs(utils.unmask(catd, mask),
+                          mmix,
+                          np.repeat(mask[:, np.newaxis], len(tes), axis=1))
     betas = betas[mask, ...]
     n_voxels, n_echos, n_components = betas.shape
     mu = catd.mean(axis=-1, dtype=float)

diff --git a/tedana/stats.py b/tedana/stats.py
@@ -7,11 +7,70 @@
 from scipy import stats
 
 from tedana import utils
+from tedana.due import due, BibTeX, Doi
 
 LGR = logging.getLogger(__name__)
 RepLGR = logging.getLogger('REPORT')
 RefLGR = logging.getLogger('REFERENCES')
 
+T2Z_TRANSFORM = BibTeX("""
+    @article{hughett2007accurate,
+      title={Accurate Computation of the F-to-z and t-to-z Transforms
+             for Large Arguments},
+      author={Hughett, Paul and others},
+      journal={Journal of Statistical Software},
+      volume={23},
+      number={1},
+      pages={1--5},
+      year={2007},
+      publisher={Foundation for Open Access Statistics}
+    }
+    """)
+T2Z_IMPLEMENTATION = Doi('10.5281/zenodo.32508')
+
+
+@due.dcite(T2Z_TRANSFORM,
+           description='Introduces T-to-Z transform.')
+@due.dcite(T2Z_IMPLEMENTATION,
+           description='Python implementation of T-to-Z transform.')
+def t_to_z(t_values, dof):
+    """
+    From Vanessa Sochat's TtoZ package.
+    Copyright (c) 2015 Vanessa Sochat
+    MIT Licensed
+    """
-    """
-    From Vanessa Sochat's TtoZ package.
-    Copyright (c) 2015 Vanessa Sochat
-    MIT Licensed
-    """
+    """Convert t-statistics to z-statistics.
+
+    An implementation of [1]_ from Vanessa Sochat's TtoZ package [2]_.
+
+    Parameters
+    ----------
+    t_values : array_like
+        T-statistics
+    dof : int
+        Degrees of freedom
+
+    Returns
+    -------
+    z_values : array_like
+        Z-statistics
+
+    References
+    ----------
+    .. [1] Hughett, P. (2007). Accurate Computation of the F-to-z and t-to-z
+           Transforms for Large Arguments. Journal of Statistical Software,
+           23(1), 1-5.
+    .. [2] Sochat, V. (2015, October 21). TtoZ Original Release. Zenodo.
+           http://doi.org/10.5281/zenodo.32508
+    """
-    """
-    From Vanessa Sochat's TtoZ package.
-    Copyright (c) 2015 Vanessa Sochat
-    MIT Licensed
-    """
+    """Convert t-statistics to z-statistics.
+
+    An implementation of [1]_ from Vanessa Sochat's TtoZ package [2]_.
+
+    Parameters
+    ----------
+    t_values : array_like
+        T-statistics
+    dof : int
+        Degrees of freedom
+
+    Returns
+    -------
+    z_values : array_like
+        Z-statistics
+
+    References
+    ----------
+    .. [1] Hughett, P. (2007). Accurate Computation of the F-to-z and t-to-z
+           Transforms for Large Arguments. Journal of Statistical Software,
+           23(1), 1-5.
+    .. [2] Sochat, V. (2015, October 21). TtoZ Original Release. Zenodo.
+           http://doi.org/10.5281/zenodo.32508
+    """
+
+    # check if t_values is np.array, and convert if required
+    t_values = np.asanyarray(t_values)
+
+    # Select just the nonzero voxels
+    nonzero = t_values[t_values != 0]
+
+    # We will store our results here
+    z_values = np.zeros(len(nonzero))
+
+    # Select values less than or == 0, and greater than zero
+    c = np.zeros(len(nonzero))
+    k1 = (nonzero <= c)
+    k2 = (nonzero > c)
+    # Subset the data into two sets
+    t1 = nonzero[k1]
+    t2 = nonzero[k2]
+
+    # Calculate p values for <=0
+    p_values_t1 = stats.t.cdf(t1, df=dof)
+    z_values_t1 = stats.norm.ppf(p_values_t1)
+
+    # Calculate p values for > 0
+    p_values_t2 = stats.t.cdf(-t2, df=dof)
+    z_values_t2 = -stats.norm.ppf(p_values_t2)
+    z_values[k1] = z_values_t1
+    z_values[k2] = z_values_t2
+    # Write new image to file
+    out = np.zeros(t_values.shape)
+    out[t_values != 0] = z_values
+    return out
+
 
 def getfbounds(n_echos):
     """
@@ -34,7 +93,7 @@ def getfbounds(n_echos):
     return f05, f025, f01
 
 
-def computefeats2(data, mmix, mask=None, normalize=True):
+def computefeats2(data, mmix, mask=None):
     """
     Converts `data` to component space using `mmix`
-    """
-    Converts `data` to component space using `mmix`
+    """Fits mmix to data with OLS and calculates standardized beta values and z-statistics.
-    """
-    Converts `data` to component space using `mmix`
+    """Fits mmix to data with OLS and calculates standardized beta values and z-statistics.
 
@@ -56,12 +115,14 @@ def computefeats2(data, mmix, mask=None, normalize=True):
         Data in component space
     """
     if data.ndim != 2:
-        raise ValueError('Parameter data should be 2d, not {0}d'.format(data.ndim))
+        raise ValueError('Parameter data should be 2d, not '
+                         '{0}d'.format(data.ndim))
     elif mmix.ndim not in [2]:
         raise ValueError('Parameter mmix should be 2d, not '
                          '{0}d'.format(mmix.ndim))
     elif (mask is not None) and (mask.ndim != 1):
-        raise ValueError('Parameter mask should be 1d, not {0}d'.format(mask.ndim))
+        raise ValueError('Parameter mask should be 1d, not '
+                         '{0}d'.format(mask.ndim))
     elif (mask is not None) and (data.shape[0] != mask.shape[0]):
         raise ValueError('First dimensions (number of samples) of data ({0}) '
                          'and mask ({1}) do not match.'.format(data.shape[0],
@@ -74,28 +135,20 @@ def computefeats2(data, mmix, mask=None, normalize=True):
     # demean masked data
     if mask is not None:
         data = data[mask, ...]
+    # normalize data (minus mean and divide by std)
     data_vn = stats.zscore(data, axis=-1)
 
-    # get betas of `data`~`mmix` and limit to range [-0.999, 0.999]
-    data_R = get_coeffs(data_vn, mmix, mask=None)
-    data_R[data_R < -0.999] = -0.999
-    data_R[data_R > 0.999] = 0.999
-
-    # R-to-Z transform
-    data_Z = np.arctanh(data_R)
+    # get betas and z-values of `data`~`mmix`
+    # mmix is normalized internally
+    _, data_Z = get_ls_coeffs(data_vn, mmix, mask=None, add_const=False,
+                              compute_zvalues=True)
     if data_Z.ndim == 1:
         data_Z = np.atleast_2d(data_Z).T
 
-    # normalize data
-    if normalize:
-        data_Zm = stats.zscore(data_Z, axis=0)
-        data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) /
-                            data_Z.std(axis=0, keepdims=True))
-
     return data_Z
 
 
-def get_coeffs(data, X, mask=None, add_const=False):
+def get_ls_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, min_df=1):
     """
     Performs least-squares fit of `X` against `data`
 
@@ -109,28 +162,37 @@ def get_coeffs(data, X, mask=None, add_const=False):
         Boolean mask array
     add_const : bool, optional
         Add intercept column to `X` before fitting. Default: False
+    compute_zvalues : bool, optional
+        Compute z-values of the betas (predictors)
+    min_df : integer, optional
+        Integer to give warning if # df <= min_df
 
     Returns
     -------
     betas : (S [x E] x C) :obj:`numpy.ndarray`
         Array of `S` sample betas for `C` predictors
+    z_values : (S [x E] x C) :obj:`numpy.ndarray`
+        Array of `S` sample z-values for `C` predictors
+
     """
     if data.ndim not in [2, 3]:
-        raise ValueError('Parameter data should be 2d or 3d, not {0}d'.format(data.ndim))
+        raise ValueError('Parameter data should be 2d or 3d, not '
+                         '{0}d'.format(data.ndim))
     elif X.ndim not in [2]:
         raise ValueError('Parameter X should be 2d, not {0}d'.format(X.ndim))
     elif data.shape[-1] != X.shape[0]:
-        raise ValueError('Last dimension (dimension {0}) of data ({1}) does not '
-                         'match first dimension of '
-                         'X ({2})'.format(data.ndim, data.shape[-1], X.shape[0]))
+        raise ValueError('Last dimension (dimension {0}) of data ({1}) does '
+                         'not match first dimension of X '
+                         '({2})'.format(data.ndim, data.shape[-1], X.shape[0]))
 
     # mask data and flip (time x samples)
     if mask is not None:
         if mask.ndim not in [1, 2]:
-            raise ValueError('Parameter data should be 1d or 2d, not {0}d'.format(mask.ndim))
+            raise ValueError('Parameter data should be 1d or 2d, not '
+                             '{0}d'.format(mask.ndim))
         elif data.shape[0] != mask.shape[0]:
-            raise ValueError('First dimensions of data ({0}) and mask ({1}) do not '
-                             'match'.format(data.shape[0], mask.shape[0]))
+            raise ValueError('First dimensions of data ({0}) and mask ({1}) do'
+                             ' not match'.format(data.shape[0], mask.shape[0]))
         mdata = data[mask, :].T
     else:
         mdata = data.T
@@ -144,11 +206,45 @@ def get_coeffs(data, X, mask=None, add_const=False):
     if add_const:  # add intercept, if specified
         X = np.column_stack([X, np.ones((len(X), 1))])
 
+    # least squares estimation: beta = (X^T * X)^(-1) * X^T * mdata
+    # betas is transposed due to backward compatibility with rest of code.
     betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T
+
+    if compute_zvalues:
+        # compute t-values of betas (estimates) and then convert to z-values
+        # first compute number of degrees of freedom
+        df = mdata.shape[0] - X.shape[1]
+        if df == 0:
+            LGR.error('ERROR: No degrees of freedom left in least squares '
+                      'calculation. Stopping!!')
+        elif df <= min_df:
+            LGR.warning('Number of degrees of freedom in least-square '
+                        'estimation is less than {}'.format(min_df + 1))
+        # compute sigma:
+        # RSS = sum{[mdata - (X * betas)]^2}
+        # sigma = RSS / Degrees_of_Freedom
+        sigma = np.sum(np.power(mdata - np.dot(X, betas.T), 2), axis=0) / df
+        sigma = sigma[:, np.newaxis]
+        # Copmute std of betas:
-        # Copmute std of betas:
+        # compute std of betas:
-        # Copmute std of betas:
+        # compute std of betas:
+        # C = (X^T * X)_ii^(-1)
+        # std(betas) = sqrt(sigma * C)
+        C = np.diag(np.linalg.pinv(np.dot(X.T, X)))
+        C = C[:, np.newaxis]
+        std_betas = np.sqrt(np.dot(sigma, C.T))
+        z_values = t_to_z(betas / std_betas, df)
+        z_values = np.clip(z_values, -40, 40)
+
     if add_const:  # drop beta for intercept, if specified
         betas = betas[:, :-1]
+        if compute_zvalues:
+            z_values = z_values[:, :-1]
 
     if mask is not None:
         betas = utils.unmask(betas, mask)
+        if compute_zvalues:
+            z_values = utils.unmask(z_values, mask)
 
-    return betas
+    if compute_zvalues:
+        return betas, z_values
+    else:
+        return betas