Skip to content

Commit

Permalink
Use StatsAPI.pvalue + StatsAPI.HypothesisTest (#297)
Browse files Browse the repository at this point in the history
* Use `StatsAPI.pvalue` + `StatsAPI.HypothesisTest`

* Apply suggestions from code review

Co-authored-by: Alex Arslan <ararslan@comcast.net>

* Export `nobs` and `dof`

* Apply suggestions

---------

Co-authored-by: Alex Arslan <ararslan@comcast.net>
  • Loading branch information
devmotion and ararslan authored May 31, 2023
1 parent 9bcdfbd commit 932eaac
Show file tree
Hide file tree
Showing 30 changed files with 83 additions and 152 deletions.
7 changes: 5 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
name = "HypothesisTests"
uuid = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
version = "0.10.13"
version = "0.11.0"

[deps]
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Rmath = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
Combinatorics = "1"
Distributions = "0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25"
Distributions = "0.25.90"
Rmath = "0.5, 0.6, 0.7"
Roots = "0.7, 0.8, 1.0, 2"
StatsAPI = "1.6"
StatsBase = "0.27, 0.28, 0.29, 0.30, 0.31, 0.32, 0.33, 0.34"
julia = "1.3"

Expand Down
69 changes: 6 additions & 63 deletions src/HypothesisTests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,73 +28,17 @@ using Statistics, Random, LinearAlgebra
using Distributions, Roots, StatsBase
using Combinatorics: combinations, permutations
using Rmath: pwilcox, psignrank
using Printf: @printf

import StatsBase.confint
import StatsAPI
using StatsAPI: HypothesisTest, confint, pvalue

export testname, pvalue, confint
abstract type HypothesisTest end
export testname, pvalue, confint, dof, nobs

check_same_length(x::AbstractVector, y::AbstractVector) = if length(x) != length(y)
throw(DimensionMismatch("Vectors must be the same length"))
end

"""
confint(test::HypothesisTest; level = 0.95, tail = :both)
Compute a confidence interval C with coverage `level`.
If `tail` is `:both` (default), then a two-sided confidence interval is returned. If `tail`
is `:left` or `:right`, then a one-sided confidence interval is returned.
!!! note
Most of the implemented confidence intervals are *strongly consistent*, that is, the
confidence interval with coverage `level` does not contain the test statistic under
``h_0`` if and only if the corresponding test rejects the null hypothesis
``h_0: θ = θ_0``:
```math
C (x, level) = \\{θ : p_θ (x) > 1 - level\\},
```
where ``p_θ`` is the [`pvalue`](@ref) of the corresponding test.
"""
function confint end

"""
pvalue(test::HypothesisTest; tail = :both)
Compute the p-value for a given significance test.
If `tail` is `:both` (default), then the p-value for the two-sided test is returned. If
`tail` is `:left` or `:right`, then a one-sided test is performed.
"""
function pvalue end

# Basic function for finding a p-value given a distribution and tail
function pvalue(dist::ContinuousUnivariateDistribution, x::Number; tail=:both)
check_tail(tail)

if tail == :both
p = 2 * min(cdf(dist, x), ccdf(dist, x))
min(p, oneunit(p)) # if P(X = x) > 0, then possibly p > 1
elseif tail == :left
cdf(dist, x)
else # tail == :right
ccdf(dist, x)
end
end

function pvalue(dist::DiscreteUnivariateDistribution, x::Number; tail=:both)
check_tail(tail)

if tail == :both
p = 2 * min(ccdf(dist, x-1), cdf(dist, x))
min(p, oneunit(p)) # if P(X = x) > 0, then possibly p > 1
elseif tail == :left
cdf(dist, x)
else # tail == :right
ccdf(dist, x-1)
end
end

function check_level(level::Float64)
if level >= 1 || level <= 0.5
throw(ArgumentError("coverage level $level not in range (0.5, 1)"))
Expand All @@ -114,7 +58,7 @@ function Base.show(_io::IO, test::T) where T<:HypothesisTest
println(io, repeat("-", length(testname(test))))

# population details
has_ci = applicable(StatsBase.confint, test)
has_ci = applicable(confint, test)
(param_name, param_under_h0, param_estimate) = population_param_of_interest(test)
println(io, "Population details:")
println(io, " parameter of interest: $param_name")
Expand All @@ -126,7 +70,7 @@ function Base.show(_io::IO, test::T) where T<:HypothesisTest
println(io)

if has_ci
ci = map(x -> round.(x; sigdigits=4, base=10), StatsBase.confint(test))
ci = map(x -> round.(x; sigdigits=4, base=10), confint(test))
print(io, " 95% confidence interval: ")
show(io, ci)
println(io)
Expand Down Expand Up @@ -176,7 +120,6 @@ function show_params(io::IO, test::T, ident="") where T<:HypothesisTest
end
end

include("deprecated.jl")
include("common.jl")

include("binomial.jl")
Expand Down
4 changes: 2 additions & 2 deletions src/anderson_darling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ function show_params(io::IO, x::OneSampleADTest, ident = "")
end

### G. and J. Marsaglia, "Evaluating the Anderson-Darling Distribution", Journal of Statistical Software, 2004
function pvalue(t::OneSampleADTest)
function StatsAPI.pvalue(t::OneSampleADTest)
g1(x) = sqrt(x)*(1.0-x)*(49.0x-102.0)
g2(x) = -0.00022633 + (6.54034 - (14.6538 - (14.458 - (8.259 - 1.91864x)x)x)x)x
g3(x) = -130.2137 + (745.2337 - (1705.091 - (1950.646 - (1116.360 - 255.7844x)x)x)x)x
Expand Down Expand Up @@ -238,7 +238,7 @@ function pvalueasym(x::KSampleADTest)
return exp(lp0)/(1 + exp(lp0))
end

pvalue(x::KSampleADTest) = x.nsim == 0 ? pvalueasym(x) : pvaluesim(x)
StatsAPI.pvalue(x::KSampleADTest) = x.nsim == 0 ? pvalueasym(x) : pvaluesim(x)

function adkvals(Z⁺, N, samples)
k = length(samples)
Expand Down
2 changes: 1 addition & 1 deletion src/augmented_dickey_fuller.jl
Original file line number Diff line number Diff line change
Expand Up @@ -217,4 +217,4 @@ function show_params(io::IO, x::ADFTest, ident)
println(io)
end

pvalue(x::ADFTest) = HypothesisTests.pvalue(Normal(0, 1), adf_pv_aux(x.stat, x.deterministic); tail=:left)
StatsAPI.pvalue(x::ADFTest) = pvalue(Normal(0, 1), adf_pv_aux(x.stat, x.deterministic); tail=:left)
6 changes: 3 additions & 3 deletions src/bartlett.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ function BartlettTest(X::AbstractMatrix, Y::AbstractMatrix)
return BartlettTest(L′, p, nx, ny)
end

StatsBase.nobs(B::BartlettTest) = (B.nx, B.ny)
StatsBase.dof(B::BartlettTest) = div(B.p * (B.p + 1), 2)
StatsAPI.nobs(B::BartlettTest) = (B.nx, B.ny)
StatsAPI.dof(B::BartlettTest) = div(B.p * (B.p + 1), 2)

testname(::BartlettTest) = "Bartlett's Test for Equality of Covariance Matrices"
default_tail(::BartlettTest) = :right
pvalue(B::BartlettTest; tail=:right) = pvalue(Chisq(dof(B)), B.L′, tail=tail)
StatsAPI.pvalue(B::BartlettTest; tail=:right) = pvalue(Chisq(dof(B)), B.L′, tail=tail)

function show_params(io::IO, B::BartlettTest, indent="")
println(io, indent, "number of observations: ", nobs(B))
Expand Down
12 changes: 6 additions & 6 deletions src/binomial.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ function show_params(io::IO, x::BinomialTest, ident="")
println(io, ident, "number of successes: $(x.x)")
end

pvalue(x::BinomialTest; tail=:both) = pvalue(Binomial(x.n, x.p), x.x; tail=tail)
StatsAPI.pvalue(x::BinomialTest; tail=:both) = pvalue(Binomial(x.n, x.p), x.x; tail=tail)

# Confidence interval
"""
Expand Down Expand Up @@ -106,13 +106,13 @@ of the following methods. Possible values for `method` are:
* [Binomial confidence interval on Wikipedia](https://en.wikipedia.org/wiki/
Binomial_proportion_confidence_interval)
"""
function StatsBase.confint(x::BinomialTest; level::Float64=0.95, tail=:both, method=:clopper_pearson)
function StatsAPI.confint(x::BinomialTest; level::Float64=0.95, tail=:both, method=:clopper_pearson)
check_level(level)

if tail == :left
(0.0, StatsBase.confint(x, level=1-(1-level)*2, method=method)[2])
(0.0, confint(x, level=1-(1-level)*2, method=method)[2])
elseif tail == :right
(StatsBase.confint(x, level=1-(1-level)*2, method=method)[1], 1.0)
(confint(x, level=1-(1-level)*2, method=method)[1], 1.0)
elseif tail == :both
if method == :clopper_pearson
ci_clopper_pearson(x, 1-level)
Expand Down Expand Up @@ -231,9 +231,9 @@ function show_params(io::IO, x::SignTest, ident="")
println(io, ident, text2, x.x)
end

pvalue(x::SignTest; tail=:both) = pvalue(Binomial(x.n, 0.5), x.x; tail=tail)
StatsAPI.pvalue(x::SignTest; tail=:both) = pvalue(Binomial(x.n, 0.5), x.x; tail=tail)

function StatsBase.confint(x::SignTest; level::Float64=0.95, tail=:both)
function StatsAPI.confint(x::SignTest; level::Float64=0.95, tail=:both)
check_level(level)

if tail == :left
Expand Down
4 changes: 2 additions & 2 deletions src/box_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ function show_params(io::IO, x::BoxPierceTest, ident)
println(io, ident, "Q statistic: ", x.Q)
end

pvalue(x::BoxPierceTest) = pvalue(Chisq(x.lag-x.dof), x.Q; tail=:right)
StatsAPI.pvalue(x::BoxPierceTest) = pvalue(Chisq(x.lag-x.dof), x.Q; tail=:right)

#Ljung-Box test

Expand Down Expand Up @@ -120,4 +120,4 @@ function show_params(io::IO, x::LjungBoxTest, ident)
println(io, ident, "Q statistic: ", x.Q)
end

pvalue(x::LjungBoxTest) = pvalue(Chisq(x.lag-x.dof), x.Q; tail=:right)
StatsAPI.pvalue(x::LjungBoxTest) = pvalue(Chisq(x.lag-x.dof), x.Q; tail=:right)
2 changes: 1 addition & 1 deletion src/breusch_godfrey.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,4 @@ function show_params(io::IO, x::BreuschGodfreyTest, ident)
println(io, ident, "T*R^2 statistic: ", x.BG)
end

pvalue(x::BreuschGodfreyTest) = pvalue(Chisq(x.lag), x.BG; tail=:right)
StatsAPI.pvalue(x::BreuschGodfreyTest) = pvalue(Chisq(x.lag), x.BG; tail=:right)
6 changes: 3 additions & 3 deletions src/circular.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ function show_params(io::IO, x::RayleighTest, ident="")
println(io, ident, "test statistic: $(x.Rbar^2 * x.n)")
end

function pvalue(x::RayleighTest)
function StatsAPI.pvalue(x::RayleighTest)
Z = x.Rbar^2 * x.n
x.n > 1e6 ? exp(-Z) :
exp(-Z)*(1+(2*Z-Z^2)/(4*x.n)-(24*Z - 132*Z^2 + 76*Z^3 - 9*Z^4)/(288*x.n^2))
Expand Down Expand Up @@ -135,7 +135,7 @@ function tlinear_Z(x::FisherTLinearAssociation)
end

# p-values
function pvalue(x::FisherTLinearAssociation; tail=:both)
function StatsAPI.pvalue(x::FisherTLinearAssociation; tail=:both)
n = length(x.theta)
if n == 0
return NaN
Expand Down Expand Up @@ -218,7 +218,7 @@ function show_params(io::IO, x::JammalamadakaCircularCorrelation, ident="")
println(io, ident, "test statistic: $(x.Z)")
end

pvalue(x::JammalamadakaCircularCorrelation; tail=:both) = pvalue(Normal(), x.Z; tail=tail)
StatsAPI.pvalue(x::JammalamadakaCircularCorrelation; tail=:both) = pvalue(Normal(), x.Z; tail=tail)

## GENERAL

Expand Down
8 changes: 4 additions & 4 deletions src/correlation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ function population_param_of_interest(p::CorrelationTest)
(param, zero(p.r), p.r)
end

StatsBase.nobs(p::CorrelationTest) = p.n
StatsBase.dof(p::CorrelationTest) = p.n - 2 - p.k
StatsAPI.nobs(p::CorrelationTest) = p.n
StatsAPI.dof(p::CorrelationTest) = p.n - 2 - p.k

function StatsBase.confint(test::CorrelationTest{T}, level::Float64=0.95) where T
function StatsAPI.confint(test::CorrelationTest{T}, level::Float64=0.95) where T
dof(test) > 1 || return (-one(T), one(T)) # Otherwise we can get NaNs
q = quantile(Normal(), 1 - (1-level) / 2)
fisher = atanh(test.r)
Expand All @@ -81,7 +81,7 @@ function StatsBase.confint(test::CorrelationTest{T}, level::Float64=0.95) where
end

default_tail(::CorrelationTest) = :both
pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail)
StatsAPI.pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail)

function show_params(io::IO, test::CorrelationTest, indent="")
println(io, indent, "number of observations: ", nobs(test))
Expand Down
12 changes: 0 additions & 12 deletions src/deprecated.jl

This file was deleted.

2 changes: 1 addition & 1 deletion src/durbin_watson.jl
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ function pan_algorithm(a::AbstractArray, x::Float64, m::Int, n::Int)

end

function pvalue(x::DurbinWatsonTest; tail=:both)
function StatsAPI.pvalue(x::DurbinWatsonTest; tail=:both)

exact_problem_flag = 0
if (x.p_compute == :ndep && x.n <= 100) || x.p_compute == :exact
Expand Down
2 changes: 1 addition & 1 deletion src/f.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ function show_params(io::IO, x::VarianceFTest, ident)
println(io, ident, "degrees of freedom: [$(x.df_x), $(x.df_y)]")
end

function pvalue(x::VarianceFTest; tail=:both)
function StatsAPI.pvalue(x::VarianceFTest; tail=:both)
dist = FDist(x.df_x, x.df_y)
if tail == :both
return 1 - 2*abs(cdf(dist, x.F) - 0.5)
Expand Down
8 changes: 4 additions & 4 deletions src/fisher.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ For `tail = :both`, possible values for `method` are:
Blaker’s exact tests". Biostatistics, Volume 11, Issue 2, 1 April 2010, Pages 373–374,
[link](https://doi.org/10.1093/biostatistics/kxp050)
"""
function pvalue(x::FisherExactTest; tail=:both, method=:central)
function StatsAPI.pvalue(x::FisherExactTest; tail=:both, method=:central)
if tail == :both && method != :central
if method == :minlike
p = pvalue_both_minlike(x)
Expand Down Expand Up @@ -179,7 +179,7 @@ Fisher's non-central hypergeometric distribution. For `tail = :both`, the only
Blaker’s exact tests". Biostatistics, Volume 11, Issue 2, 1 April 2010, Pages 373–374,
[link](https://doi.org/10.1093/biostatistics/kxp050)
"""
function StatsBase.confint(x::FisherExactTest; level::Float64=0.95, tail=:both, method=:central)
function StatsAPI.confint(x::FisherExactTest; level::Float64=0.95, tail=:both, method=:central)
check_level(level)
if x.a == x.c == 0 || x.b == x.d == 0
return (0.0, Inf)
Expand All @@ -203,8 +203,8 @@ function StatsBase.confint(x::FisherExactTest; level::Float64=0.95, tail=:both,
end
elseif tail == :both
if method == :central
(StatsBase.confint(x, level=1-(1-level)/2, tail=:right)[1],
StatsBase.confint(x, level=1-(1-level)/2, tail=:left)[2])
(confint(x, level=1-(1-level)/2, tail=:right)[1],
confint(x, level=1-(1-level)/2, tail=:left)[2])
else
throw(ArgumentError("method=$(method) is not implemented yet"))
end
Expand Down
14 changes: 7 additions & 7 deletions src/hotelling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export OneSampleHotellingT2Test, EqualCovHotellingT2Test, UnequalCovHotellingT2T
abstract type HotellingT2TestTest <: HypothesisTest end

default_tail(::HotellingT2TestTest) = :right
pvalue(T::HotellingT2TestTest; tail=:right) = pvalue(FDist(dof(T)...), T.F, tail=tail)
StatsAPI.pvalue(T::HotellingT2TestTest; tail=:right) = pvalue(FDist(dof(T)...), T.F, tail=tail)

function show_params(io::IO, T::HotellingT2TestTest, indent="")
println(io, indent, "number of observations: ", nobs(T))
Expand Down Expand Up @@ -42,8 +42,8 @@ struct OneSampleHotellingT2Test <: HotellingT2TestTest
S::Matrix
end

StatsBase.nobs(T::OneSampleHotellingT2Test) = T.n
StatsBase.dof(T::OneSampleHotellingT2Test) = (T.p, T.n - T.p)
StatsAPI.nobs(T::OneSampleHotellingT2Test) = T.n
StatsAPI.dof(T::OneSampleHotellingT2Test) = (T.p, T.n - T.p)

"""
OneSampleHotellingT2Test(X::AbstractMatrix, μ₀=<zero vector>)
Expand Down Expand Up @@ -112,8 +112,8 @@ function EqualCovHotellingT2Test(X::AbstractMatrix, Y::AbstractMatrix)
return EqualCovHotellingT2Test(T², F, nx, ny, p, Δ, S)
end

StatsBase.nobs(T::EqualCovHotellingT2Test) = (T.nx, T.ny)
StatsBase.dof(T::EqualCovHotellingT2Test) = (T.p, T.nx + T.ny - T.p - 1)
StatsAPI.nobs(T::EqualCovHotellingT2Test) = (T.nx, T.ny)
StatsAPI.dof(T::EqualCovHotellingT2Test) = (T.p, T.nx + T.ny - T.p - 1)

testname(::EqualCovHotellingT2Test) =
"Two sample Hotelling's T² test (equal covariance matrices)"
Expand Down Expand Up @@ -156,8 +156,8 @@ function UnequalCovHotellingT2Test(X::AbstractMatrix, Y::AbstractMatrix)
return UnequalCovHotellingT2Test(T², F, nx, ny, p, ν, Δ, ST)
end

StatsBase.nobs(T::UnequalCovHotellingT2Test) = (T.nx, T.ny)
StatsBase.dof(T::UnequalCovHotellingT2Test) = (T.p, T.ν)
StatsAPI.nobs(T::UnequalCovHotellingT2Test) = (T.nx, T.ny)
StatsAPI.dof(T::UnequalCovHotellingT2Test) = (T.p, T.ν)

testname(::UnequalCovHotellingT2Test) =
"Two sample Hotelling's T² test (unequal covariance matrices)"
Expand Down
2 changes: 1 addition & 1 deletion src/jarque_bera.jl
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,4 @@ function show_params(io::IO, x::JarqueBeraTest, ident)
println(io, ident, "JB statistic: ", x.JB)
end

pvalue(x::JarqueBeraTest) = pvalue(Chisq(2), x.JB; tail=:right)
StatsAPI.pvalue(x::JarqueBeraTest) = pvalue(Chisq(2), x.JB; tail=:right)
Loading

2 comments on commit 932eaac

@ararslan
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/84635

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.11.0 -m "<description of version>" 932eaac50386b911989c98360b2fb6ac9d91629c
git push origin v0.11.0

Please sign in to comment.