Added pearsons correlation and inferential tests. (#14)

eribean · Aug 30, 2021 · ef903aa · ef903aa
1 parent 1d559fc
commit ef903aa
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 2 deletions.
diff --git a/inferential/__init__.py b/inferential/__init__.py
@@ -4,4 +4,5 @@
 from .twoway_anova import *
 from .linear_regression import *
 from .logistic_regression import *
-from .simple_mediation import *
+from .simple_mediation import *
+from .correlation import *
diff --git a/inferential/correlation.py b/inferential/correlation.py
@@ -0,0 +1,33 @@
+import numpy as np
+
+from scipy import stats as sp
+
+
+__all__ =  ['pearsons_correlation']
+
+
+def pearsons_correlation(raw_data):
+    """Computes the correlation and statistics for a dataset.
+
+    Args:
+        raw_data:  Data matrix [n_items, n_observations]
+
+    Returns:
+        dict: Dictionary of correlation, and critical rho values
+
+    Notes:
+        The integration is over the n_observations such that the output is
+        of size [n_items, n_items]
+    """
+    correlation = np.corrcoef(raw_data)
+
+    # Compute the critical values for the 3 significance tests
+    deg_of_freedom = raw_data.shape[1] - 2
+    t_critical = sp.t.isf([.025, .005, 0.0005] , deg_of_freedom)
+    r_critical = np.sqrt(t_critical**2 / (t_critical**2 + deg_of_freedom))
+
+    return {'Correlation': correlation, 
+            'R critical': {'.05': r_critical[0],
+                           '.01': r_critical[1],
+                           '.001': r_critical[2]},
+           }   
diff --git a/inferential/test/test_correlation.py b/inferential/test/test_correlation.py
@@ -0,0 +1,30 @@
+import unittest
+
+import numpy as np
+
+from RyStats.inferential import pearsons_correlation
+
+
+class TestCorrelation(unittest.TestCase):
+    """Test Fixture for correlation."""
+
+    def test_pearsons_correlation(self):
+        """Testing pearsons correlation."""
+        rng = np.random.default_rng(34982750394857201981982375)
+        n_items = 100
+        dataset = rng.standard_normal((n_items, 1000))
+
+        results = pearsons_correlation(dataset)
+
+        # Get the number of valid correlations
+        correlation = np.abs(results['Correlation'])
+        r_critical = results['R critical']['.05']
+
+
+        significant_data = (np.count_nonzero(correlation > r_critical) 
+                            - n_items) / (n_items * (n_items - 1))
+
+        self.assertAlmostEqual(significant_data, .05, delta=0.01)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/setup.py b/setup.py
@@ -16,7 +16,7 @@
                      'RyStats.dimensionality': convert_path('./dimensionality'),
                      'RyStats.plots': convert_path('./plots')
                      },
-        version="0.4.0",
+        version="0.4.1",
         license="MIT",
         description="Psychology Related Statistics in Python!",
         long_description=long_description.replace('<ins>','').replace('</ins>',''),