-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchi2tests.m
107 lines (94 loc) · 3.47 KB
/
chi2tests.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
function [P, chi2, table, df] = chi2tests(x1, x2, method)
% Chi-square tests of homogeneity and independence
% Computes the P-value for IxJ-table row/col independence.
% Program by Steinar Thorvaldsen, steinar.thorvaldsen@uit.no, Dec. 2004.
% Ref.: DeltaProt toolbox at http://services.cbu.uib.no/software/deltaprot/
% Last changes 22. Dec 2010.
% Requires Matlab 7.1 or newer, and Matlab Statistics toolbox.
% Input:
% X: data matrix (IxJ-table) of the observed frequency cells
% method: 'RC': Read-Cressie power divergence statistics (default), lambda= 2/3
% 'Pe': Standard Pearson chi2-distance, lambda= 1
% 'LL': Log Likelihood ratio distance, lambda= 0
% Output:
% P-value, chi2-value, degree of freedom (df)
%
% Use: [P, chi2, df] = chi2tests(x1, x2,,'Pe')
% The P-value is computed through approximation with chi-2 distribution
% under the null hypothesis for all methods.
% The 'RC'-method is sligtly better than the 'Pe'-method in small tables
% with unbalanced column margins
% Please, use the following reference:
% Thorvaldsen, S. , Fl?, T. and Willassen, N.P. (2010) DeltaProt: a software toolbox
% for comparative genomics. BMC Bioinformatics 2010, Vol 11:573.
% See http://www.biomedcentral.com/1471-2105/11/573
% Other reference:
% Rudas, T. (1986): A Monte Carlo Comparision of Small Sample Behaviour
% of The Pearson, the Likelihood Ratio and the Cressie-Read Statistics.
% J.Statist. Comput. Simul, vol 24, pp 107-120.
% Read, TRC and Cressie, NAC (1988): Goodness of Fit Statistics for
% Discrete Multivariate Data. Springer Verlag.
% Ewens, WJ and Grant, GR (2001): Statistical Methods in Bioinformatics.
% Springer Verlag.
if nargin < 3
method = 'RC'; %default value
else
method=upper(method);
switch method %validate the method parameter:
case {'RC' 'PE' 'LL'}
% these are ok
otherwise
error('Chi2Test:UnknownMethod', ...
'The ''method'' parameter value must be ''RC'', ''Pe'', or ''LL''.');
end %switch
end %if
% lambda - power for statistic:
if strcmp(method,'RC'),
lambda=2/3;
elseif strcmp(method, 'PE'),
lambda=1;
elseif strcmp(method, 'LL'),
lambda=0;
end
uid = unique([x1(:); x2(:)]);
X = zeros(2,length(uid));
for i=1:length(uid),
X(1,i) = length(find(x1==uid(i)));
X(2,i) = length(find(x2==uid(i)));
end
% X = zeros(length(uid),2);
% for i=1:length(uid),
% X(i,1) = length(find(x1==uid(i)));
% X(i,2) = length(find(x2==uid(i)));
% end
[I, J] = size(X);
if I < 2 | J < 2,
error('Matrix of observation must at least be of size 2x2');
return;
end
rs = sum(X')';
cs = sum(X);
Ns = sum(rs);
if Ns <= 0 % no counts found in table
P=NaN;
return;
end
Eo = (rs*cs)./Ns; %matrix of null means
% Make sure expected values are not too small
Emin=1.5;
if any(any(Eo < Emin))
%disp ('Note: Some expected values in chi2Test are small (<1.5)...');
end
if any(any(Eo <= 0))
chi2 = NaN;
elseif lambda == 0 % logL
i = find(X>0);
chi2 = 2 * sum(X(i).*(log(X(i)./Eo(i)))); % the test statistic
elseif lambda == 1 % Pearson chi2
chi2 = sum(sum((Eo-X).^2 ./ Eo)); % Pearson test statistic
else % Read-Cressie test statistic
chi2 = (2/(lambda*(lambda+1)))*sum(sum(X.*((X./Eo).^lambda -1)));
end
df = prod(size(X)-[1,1]); % degree of freedom
P = 1-chi2cdf(chi2,df);
table = X;