Skip to content

Commit

Permalink
Add posthoc (#9)
Browse files Browse the repository at this point in the history
* Adding post-hoc tests

* Reconfigured outputs for easier interpretation.

* Update to Readme.

* Unitttests for post hoc.
  • Loading branch information
eribean authored Aug 25, 2021
1 parent 0c59a81 commit 60708c5
Show file tree
Hide file tree
Showing 4 changed files with 278 additions and 1 deletion.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
[![codecov.io](https://codecov.io/gh/eribean/RyStats/coverage.svg?branch=main)](https://codecov.io/gh/eribean/RyStats)
[![CodeFactor](https://www.codefactor.io/repository/github/eribean/RyStats/badge)](https://www.codefactor.io/repository/github/eribean/RyStats)
[![PyPI version](https://badge.fury.io/py/RyStats.svg)](https://badge.fury.io/py/RyStats)
![PyPI - Downloads](https://img.shields.io/pypi/dm/RyStats?color=darkgreen)
[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)


Expand Down Expand Up @@ -42,6 +43,12 @@ Plotting Tools require Bokeh and are made to be used within a Jupyter Notebook
3. Bokeh (for plotting)

# Installation
Via pip
```
pip install RyStats --upgrade
```

From Source
```
pip install . -t $PYTHONPATH --upgrade
```
Expand Down
3 changes: 2 additions & 1 deletion inferential/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .ttests import *
from .oneway_anova import *
from .oneway_anova import *
from .post_hoc import *
162 changes: 162 additions & 0 deletions inferential/post_hoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import numpy as np
from scipy.stats import studentized_range

from RyStats.inferential import (equal_variance_oneway_anova,
equal_variance_ttest,
unequal_variance_ttest,
repeated_ttest)


__all__ = ['tukey_posthoc', 'games_howell_posthoc',
'bonferonni_posthoc']


def tukey_posthoc(*args):
"""Post-Hoc test to compute valid differences
Args:
group1: Input group 1 as a numpy array
group2: Input group 2 as a numpy array
groupN: Input group n as a numpy array
Returns:
P_values: p_value associated with mean difference
Delta Means: difference between the means
Lower CI : Lower 95th Confidence Interval
Upper CI : Upper 95th Confidence Interval
"""
k_groups = len(args)
p_values = np.full((k_groups, k_groups), 0.0)
delta_x = p_values.copy()
lower_ci = p_values.copy()
upper_ci = p_values.copy()

anova_output = equal_variance_oneway_anova(*args)
ms_error = anova_output['Within']['MS']
deg_of_freedom = anova_output['Within']['DF']

sigma_value = studentized_range.isf(0.05,
k_groups, deg_of_freedom)

for ndx2 in range(k_groups):
for ndx1 in range(ndx2+1, k_groups):
group1 = args[ndx1]
group2 = args[ndx2]

std_err1 = ms_error / np.count_nonzero(~np.isnan(group1))
std_err2 = ms_error / np.count_nonzero(~np.isnan(group2))
total_std_error = np.sqrt(0.5 * (std_err1 + std_err2))

deltaX = np.nanmean(group1) - np.nanmean(group2)
t_value = deltaX / total_std_error

p_value = studentized_range.sf(np.abs(t_value),
k_groups, deg_of_freedom)

p_values[ndx1, ndx2] = p_value
delta_x[ndx1, ndx2] = deltaX
lower_ci[ndx1, ndx2] = deltaX - sigma_value * total_std_error
upper_ci[ndx1, ndx2] = deltaX + sigma_value * total_std_error

return {'P_value': p_values,
'Delta Means': delta_x,
'Lower CI': lower_ci,
'Upper CI': upper_ci}


def games_howell_posthoc(*args):
"""Post-Hoc test to compute valid differences
Args:
group1: Input group 1 as a numpy array
group2: Input group 2 as a numpy array
groupN: Input group n as a numpy array
Returns:
P_values: p_value associated with mean difference
Delta Means: difference between the means
Lower CI : Lower 95th Confidence Interval
Upper CI : Upper 95th Confidence Interval
"""
k_groups = len(args)
p_values = np.full((k_groups, k_groups), 0.0)
delta_x = p_values.copy()
lower_ci = p_values.copy()
upper_ci = p_values.copy()

for ndx2 in range(k_groups):
for ndx1 in range(ndx2+1, k_groups):
group1 = args[ndx1]
group2 = args[ndx2]

group1_size = np.count_nonzero(~np.isnan(group1))
group2_size = np.count_nonzero(~np.isnan(group2))

std_err1 = np.nanvar(group1, ddof=1) / group1_size
std_err2 = np.nanvar(group2, ddof=1) / group2_size

total_std_error = np.sqrt(0.5 * (std_err1 + std_err2))
deg_of_freedom = (np.square(std_err1 + std_err2)
/ (std_err1**2 / (group1_size - 1)
+ std_err2**2 / (group2_size - 1)))

deltaX = np.nanmean(group1) - np.nanmean(group2)
t_value = deltaX / total_std_error

sigma_value = studentized_range.isf(0.05,
k_groups, deg_of_freedom)
p_value = studentized_range.sf(np.abs(t_value),
k_groups, deg_of_freedom)
p_values[ndx1, ndx2] = p_value
delta_x[ndx1, ndx2] = deltaX
lower_ci[ndx1, ndx2] = deltaX - sigma_value * total_std_error
upper_ci[ndx1, ndx2] = deltaX + sigma_value * total_std_error

return {'P_value': p_values,
'Delta Means': delta_x,
'Lower CI': lower_ci,
'Upper CI': upper_ci}


def bonferonni_posthoc(*args, ttest_type='equal'):
"""Computes T-Tests between groups, should adjust your
significance value, a.k.a. bonferonni correction to
minimize family wide error rates
Args:
group1: Input group 1 as a numpy array
group2: Input group 2 as a numpy array
groupN: Input group n as a numpy array
ttest_type: [('equal') | 'unequal' | 'repeated'] string
of which type of ttest to perform
Returns:
P_values: p_value associated with mean difference
Delta Means: difference between the means
Lower CI : Lower 95th Confidence Interval
Upper CI : Upper 95th Confidence Interval
"""
k_groups = len(args)
ttest = {'equal': equal_variance_ttest,
'unequal': unequal_variance_ttest,
'repeated': repeated_ttest}[ttest_type.lower()]

p_values = np.full((k_groups, k_groups), 0.0)
delta_x = p_values.copy()
lower_ci = p_values.copy()
upper_ci = p_values.copy()

for ndx2 in range(k_groups):
for ndx1 in range(ndx2+1, k_groups):
result = ttest(args[ndx1], args[ndx2])
deltaX = result['Mean1'] - result['Mean2']

delta_x[ndx1, ndx2] = deltaX
p_values[ndx1, ndx2] = result['P_value']
lower_ci[ndx1, ndx2] = result['95th CI'][0]
upper_ci[ndx1, ndx2] = result['95th CI'][1]

return {'P_value': p_values,
'Delta Means': delta_x,
'Lower CI': lower_ci,
'Upper CI': upper_ci}
107 changes: 107 additions & 0 deletions inferential/test/test_posthoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import unittest

import numpy as np

from RyStats.inferential import tukey_posthoc, games_howell_posthoc, bonferonni_posthoc


class TestPostHocMethods(unittest.TestCase):
"""Test Fixture for Post Hoc Methods."""

def setUp(self):
rng = np.random.default_rng(3439857390857420398471098273)
self.dataset = [rng.standard_normal(250) for _ in range(4)]
self.dataset2 = [rng.normal(0, 1 + ndx/4, 250) for ndx in range(4)]

def test_tukey_posthoc(self):
"""Testing Tukey Post-Hoc."""
rr, cc = np.tril_indices(4, k=-1)

result = tukey_posthoc(*self.dataset)

# Regression Test
expected_p_value = np.array([0.93949605, 0.67821687, 0.9483563,
0.99940972, 0.96826012, 0.74926386])

expected_lower_ci = np.array([-0.29021765, -0.34029182, -0.28727278,
-0.24809237, -0.19507333, -0.14499915])

np.testing.assert_allclose(result['P_value'][rr, cc],
expected_p_value, atol=1e-4)
np.testing.assert_allclose(result['Lower CI'][rr, cc],
expected_lower_ci, atol=1e-4)

def test_games_howell_posthoc(self):
"""Testing Games Howell Post Hoc."""
rr, cc = np.tril_indices(4, k=-1)

result = games_howell_posthoc(*self.dataset)

# Regression Test
expected_p_value = np.array([0.942161, 0.689703, 0.945025,
0.999446, 0.96665 , 0.739303])

expected_lower_ci = np.array([-0.294567, -0.344686, -0.282424,
-0.253633, -0.191418, -0.14139])

np.testing.assert_allclose(result['P_value'][rr, cc],
expected_p_value, atol=1e-4)
np.testing.assert_allclose(result['Lower CI'][rr, cc],
expected_lower_ci, atol=1e-4)

def test_equal_bonferonni_posthoc(self):
"""Testing Bonferonni Post Hoc Equal."""
rr, cc = np.tril_indices(4, k=-1)

result = bonferonni_posthoc(*self.dataset2, ttest_type='equal')

# Regression Test
expected_p_value = np.array([0.237767, 0.561773, 0.717279,
0.84575 , 0.295532, 0.533447])

expected_lower_ci = np.array([-0.303003, -0.298203, -0.20255,
-0.217053, -0.120646, -0.197812])

np.testing.assert_allclose(result['P_value'][rr, cc],
expected_p_value, atol=1e-5)
np.testing.assert_allclose(result['Lower CI'][rr, cc],
expected_lower_ci, atol=1e-5)

def test_unequal_bonferonni_posthoc(self):
"""Testing Games Howell Post Hoc UnEqual."""
rr, cc = np.tril_indices(4, k=-1)

result = bonferonni_posthoc(*self.dataset2, ttest_type='unequal')

# Regression Test
expected_p_value = np.array([0.2378 , 0.561835, 0.717291,
0.845772, 0.295589, 0.533448])

expected_lower_ci = np.array([-0.303031, -0.298337, -0.202597,
-0.21722 , -0.120716, -0.197815])

np.testing.assert_allclose(result['P_value'][rr, cc],
expected_p_value, atol=1e-4)
np.testing.assert_allclose(result['Lower CI'][rr, cc],
expected_lower_ci, atol=1e-4)

def test_repeated_bonferonni_posthoc(self):
"""Testing Games Howell Post Hoc Repeated."""
rr, cc = np.tril_indices(4, k=-1)

result = bonferonni_posthoc(*self.dataset2, ttest_type='repeated')

# Regression Test
expected_p_value = np.array([0.228903, 0.551281, 0.716359,
0.843387, 0.310797, 0.545441])

expected_lower_ci = np.array([-0.299669, -0.292557, -0.202171,
-0.213828, -0.12932 , -0.207034])

np.testing.assert_allclose(result['P_value'][rr, cc],
expected_p_value, atol=1e-4)
np.testing.assert_allclose(result['Lower CI'][rr, cc],
expected_lower_ci, atol=1e-4)

if __name__ == "__main__":
unittest.main()

0 comments on commit 60708c5

Please sign in to comment.