Add posthoc (#9)

* Adding post-hoc tests * Reconfigured outputs for easier interpretation. * Update to Readme. * Unitttests for post hoc.
eribean · Aug 25, 2021 · 60708c5 · 60708c5
1 parent 0c59a81
commit 60708c5
Show file tree

Hide file tree

Showing 4 changed files with 278 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -2,6 +2,7 @@
 [![codecov.io](https://codecov.io/gh/eribean/RyStats/coverage.svg?branch=main)](https://codecov.io/gh/eribean/RyStats)
 [![CodeFactor](https://www.codefactor.io/repository/github/eribean/RyStats/badge)](https://www.codefactor.io/repository/github/eribean/RyStats)
 [![PyPI version](https://badge.fury.io/py/RyStats.svg)](https://badge.fury.io/py/RyStats)
+![PyPI - Downloads](https://img.shields.io/pypi/dm/RyStats?color=darkgreen)
 [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
 
 
@@ -42,6 +43,12 @@ Plotting Tools require Bokeh and are made to be used within a Jupyter Notebook
 3. Bokeh (for plotting)
 
 # Installation
+Via pip
+```
+pip install RyStats --upgrade
+```
+
+From Source
 ```
 pip install . -t $PYTHONPATH --upgrade
 ```

diff --git a/inferential/__init__.py b/inferential/__init__.py
@@ -1,2 +1,3 @@
 from .ttests import *
-from .oneway_anova import *
+from .oneway_anova import *
+from .post_hoc import *
diff --git a/inferential/post_hoc.py b/inferential/post_hoc.py
@@ -0,0 +1,162 @@
+import numpy as np
+from scipy.stats import studentized_range
+
+from RyStats.inferential import (equal_variance_oneway_anova,
+                                 equal_variance_ttest,
+                                 unequal_variance_ttest,
+                                 repeated_ttest)
+
+
+__all__ = ['tukey_posthoc', 'games_howell_posthoc', 
+           'bonferonni_posthoc']
+
+
+def tukey_posthoc(*args):
+    """Post-Hoc test to compute valid differences
+
+    Args:
+        group1:  Input group 1 as a numpy array
+        group2:  Input group 2 as a numpy array
+        groupN:  Input group n as a numpy array
+
+    Returns:
+        P_values: p_value associated with mean difference
+        Delta Means: difference between the means
+        Lower CI : Lower 95th Confidence Interval
+        Upper CI : Upper 95th Confidence Interval
+    """
+    k_groups = len(args)
+    p_values = np.full((k_groups, k_groups), 0.0)
+    delta_x = p_values.copy()
+    lower_ci = p_values.copy()
+    upper_ci = p_values.copy()
+
+    anova_output = equal_variance_oneway_anova(*args)
+    ms_error = anova_output['Within']['MS']
+    deg_of_freedom = anova_output['Within']['DF']
+
+    sigma_value = studentized_range.isf(0.05, 
+                                        k_groups, deg_of_freedom)    
+
+    for ndx2 in range(k_groups):
+        for ndx1 in range(ndx2+1, k_groups):
+            group1 = args[ndx1]
+            group2 = args[ndx2]
+
+            std_err1 = ms_error / np.count_nonzero(~np.isnan(group1))
+            std_err2 = ms_error / np.count_nonzero(~np.isnan(group2))
+            total_std_error = np.sqrt(0.5 * (std_err1 + std_err2))
+
+            deltaX = np.nanmean(group1) - np.nanmean(group2)
+            t_value = deltaX / total_std_error
+
+            p_value = studentized_range.sf(np.abs(t_value), 
+                                           k_groups, deg_of_freedom)
+
+            p_values[ndx1, ndx2] = p_value
+            delta_x[ndx1, ndx2] = deltaX
+            lower_ci[ndx1, ndx2] = deltaX - sigma_value * total_std_error
+            upper_ci[ndx1, ndx2] = deltaX + sigma_value * total_std_error
+
+    return {'P_value': p_values,
+            'Delta Means': delta_x,
+            'Lower CI': lower_ci,
+            'Upper CI': upper_ci}
+
+
+def games_howell_posthoc(*args):
+    """Post-Hoc test to compute valid differences
+
+    Args:
+        group1:  Input group 1 as a numpy array
+        group2:  Input group 2 as a numpy array
+        groupN:  Input group n as a numpy array
+
+    Returns:
+        P_values: p_value associated with mean difference
+        Delta Means: difference between the means
+        Lower CI : Lower 95th Confidence Interval
+        Upper CI : Upper 95th Confidence Interval
+    """
+    k_groups = len(args)
+    p_values = np.full((k_groups, k_groups), 0.0)
+    delta_x = p_values.copy()
+    lower_ci = p_values.copy()
+    upper_ci = p_values.copy()    
+
+    for ndx2 in range(k_groups):
+        for ndx1 in range(ndx2+1, k_groups):
+            group1 = args[ndx1]
+            group2 = args[ndx2]
+
+            group1_size = np.count_nonzero(~np.isnan(group1))
+            group2_size = np.count_nonzero(~np.isnan(group2))
+
+            std_err1 = np.nanvar(group1, ddof=1) / group1_size
+            std_err2 = np.nanvar(group2, ddof=1) / group2_size
+
+            total_std_error = np.sqrt(0.5 * (std_err1 + std_err2))
+            deg_of_freedom = (np.square(std_err1 + std_err2) 
+                              / (std_err1**2 / (group1_size - 1) 
+                              + std_err2**2 / (group2_size - 1)))
+
+            deltaX = np.nanmean(group1) - np.nanmean(group2)
+            t_value = deltaX / total_std_error
+
+            sigma_value = studentized_range.isf(0.05, 
+                                                k_groups, deg_of_freedom)
+            p_value = studentized_range.sf(np.abs(t_value), 
+                                           k_groups, deg_of_freedom)
+            p_values[ndx1, ndx2] = p_value
+            delta_x[ndx1, ndx2] = deltaX
+            lower_ci[ndx1, ndx2] = deltaX - sigma_value * total_std_error
+            upper_ci[ndx1, ndx2] = deltaX + sigma_value * total_std_error
+
+    return {'P_value': p_values,
+            'Delta Means': delta_x,
+            'Lower CI': lower_ci,
+            'Upper CI': upper_ci}
+
+
+def bonferonni_posthoc(*args, ttest_type='equal'):
+    """Computes T-Tests between groups, should adjust your
+       significance value, a.k.a. bonferonni correction to
+       minimize family wide error rates
+
+    Args:
+        group1:  Input group 1 as a numpy array
+        group2:  Input group 2 as a numpy array
+        groupN:  Input group n as a numpy array
+        ttest_type: [('equal') | 'unequal' | 'repeated'] string
+                    of which type of ttest to perform
+
+    Returns:
+        P_values: p_value associated with mean difference
+        Delta Means: difference between the means
+        Lower CI : Lower 95th Confidence Interval
+        Upper CI : Upper 95th Confidence Interval
+    """
+    k_groups = len(args)
+    ttest = {'equal': equal_variance_ttest,
+             'unequal': unequal_variance_ttest, 
+             'repeated': repeated_ttest}[ttest_type.lower()]
+
+    p_values = np.full((k_groups, k_groups), 0.0)
+    delta_x = p_values.copy()
+    lower_ci = p_values.copy()
+    upper_ci = p_values.copy()
+
+    for ndx2 in range(k_groups):
+        for ndx1 in range(ndx2+1, k_groups):
+            result = ttest(args[ndx1], args[ndx2])
+            deltaX = result['Mean1'] - result['Mean2']
+
+            delta_x[ndx1, ndx2] = deltaX
+            p_values[ndx1, ndx2] = result['P_value']
+            lower_ci[ndx1, ndx2] = result['95th CI'][0]
+            upper_ci[ndx1, ndx2] = result['95th CI'][1]
+
+    return {'P_value': p_values,
+            'Delta Means': delta_x,
+            'Lower CI': lower_ci,
+            'Upper CI': upper_ci}
diff --git a/inferential/test/test_posthoc.py b/inferential/test/test_posthoc.py
@@ -0,0 +1,107 @@
+import unittest
+
+import numpy as np
+
+from RyStats.inferential import tukey_posthoc, games_howell_posthoc, bonferonni_posthoc
+
+
+class TestPostHocMethods(unittest.TestCase):
+    """Test Fixture for Post Hoc Methods."""
+
+    def setUp(self):
+        rng = np.random.default_rng(3439857390857420398471098273)
+        self.dataset = [rng.standard_normal(250) for _ in range(4)]
+        self.dataset2 = [rng.normal(0, 1 + ndx/4, 250) for ndx in range(4)]
+
+    def test_tukey_posthoc(self):
+        """Testing Tukey Post-Hoc."""     
+        rr, cc = np.tril_indices(4, k=-1)
+
+        result = tukey_posthoc(*self.dataset)
+
+        # Regression Test
+        expected_p_value = np.array([0.93949605, 0.67821687, 0.9483563, 
+                                     0.99940972, 0.96826012, 0.74926386])
+
+        expected_lower_ci = np.array([-0.29021765, -0.34029182, -0.28727278, 
+                                      -0.24809237, -0.19507333, -0.14499915])
+
+        np.testing.assert_allclose(result['P_value'][rr, cc], 
+                                   expected_p_value, atol=1e-4)
+        np.testing.assert_allclose(result['Lower CI'][rr, cc], 
+                                   expected_lower_ci, atol=1e-4)
+
+    def test_games_howell_posthoc(self):
+        """Testing Games Howell Post Hoc."""
+        rr, cc = np.tril_indices(4, k=-1)
+
+        result = games_howell_posthoc(*self.dataset)
+
+        # Regression Test
+        expected_p_value = np.array([0.942161, 0.689703, 0.945025, 
+                                     0.999446, 0.96665 , 0.739303])
+
+        expected_lower_ci = np.array([-0.294567, -0.344686, -0.282424, 
+                                      -0.253633, -0.191418, -0.14139])
+
+        np.testing.assert_allclose(result['P_value'][rr, cc], 
+                                   expected_p_value, atol=1e-4)
+        np.testing.assert_allclose(result['Lower CI'][rr, cc], 
+                                   expected_lower_ci, atol=1e-4)
+
+    def test_equal_bonferonni_posthoc(self):
+        """Testing Bonferonni Post Hoc Equal."""
+        rr, cc = np.tril_indices(4, k=-1)
+
+        result = bonferonni_posthoc(*self.dataset2, ttest_type='equal')
+
+        # Regression Test
+        expected_p_value = np.array([0.237767, 0.561773, 0.717279, 
+                                     0.84575 , 0.295532, 0.533447])
+
+        expected_lower_ci = np.array([-0.303003, -0.298203, -0.20255, 
+                                      -0.217053, -0.120646, -0.197812])
+
+        np.testing.assert_allclose(result['P_value'][rr, cc], 
+                                   expected_p_value, atol=1e-5)
+        np.testing.assert_allclose(result['Lower CI'][rr, cc], 
+                                   expected_lower_ci, atol=1e-5)
+
+    def test_unequal_bonferonni_posthoc(self):
+        """Testing Games Howell Post Hoc UnEqual."""
+        rr, cc = np.tril_indices(4, k=-1)
+
+        result = bonferonni_posthoc(*self.dataset2, ttest_type='unequal')
+
+        # Regression Test
+        expected_p_value = np.array([0.2378  , 0.561835, 0.717291, 
+                                     0.845772, 0.295589, 0.533448])
+
+        expected_lower_ci = np.array([-0.303031, -0.298337, -0.202597, 
+                                      -0.21722 , -0.120716, -0.197815])
+
+        np.testing.assert_allclose(result['P_value'][rr, cc], 
+                                   expected_p_value, atol=1e-4)
+        np.testing.assert_allclose(result['Lower CI'][rr, cc], 
+                                   expected_lower_ci, atol=1e-4)
+
+    def test_repeated_bonferonni_posthoc(self):
+        """Testing Games Howell Post Hoc Repeated."""
+        rr, cc = np.tril_indices(4, k=-1)
+
+        result = bonferonni_posthoc(*self.dataset2, ttest_type='repeated')
+
+        # Regression Test
+        expected_p_value = np.array([0.228903, 0.551281, 0.716359, 
+                                     0.843387, 0.310797, 0.545441])
+
+        expected_lower_ci = np.array([-0.299669, -0.292557, -0.202171, 
+                                      -0.213828, -0.12932 , -0.207034])
+
+        np.testing.assert_allclose(result['P_value'][rr, cc], 
+                                   expected_p_value, atol=1e-4)
+        np.testing.assert_allclose(result['Lower CI'][rr, cc], 
+                                   expected_lower_ci, atol=1e-4)                                                                        
+
+if __name__ == "__main__":
+    unittest.main()