Skip to content

Commit

Permalink
Merge pull request #12 from nignatiadis/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
nignatiadis authored Jul 3, 2022
2 parents 0387776 + 4de5ea2 commit 7a58caf
Show file tree
Hide file tree
Showing 27 changed files with 926 additions and 136 deletions.
22 changes: 12 additions & 10 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Empirikos"
uuid = "cab608d6-c565-4ea1-96d6-ce5441ba21b0"
authors = ["Nikos Ignatiadis <nikos.ignatiadis01@gmail.com> and contributors"]
version = "0.4.5"
version = "0.4.6"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Expand All @@ -17,6 +17,7 @@ Optim = "429524aa-4258-5aef-a3af-852621145aeb"
ParameterJuMP = "774612a8-9878-5177-865a-ca53ae2495f9"
QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
RangeHelpers = "3a07dd3d-1c52-4395-8858-40c6328157db"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
Expand All @@ -25,20 +26,21 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"

[compat]
CSV = "0.8"
CSV = "0.8, 0.9, 0.10"
DataStructures = "0.17,0.18"
Distributions = "0.24.7, 0.25"
Intervals = "1.4"
JuMP = "0.21"
Intervals = "1.4, 1.5, 1.6"
JuMP = "^1"
KernelDensity = "0.6"
LinearFractional = "0.7.4"
MathOptInterface = "0.9"
Optim = "1.2, 1.3"
ParameterJuMP = "0.3"
LinearFractional = "^0.7.5"
MathOptInterface = "^1"
Optim = "1.6"
ParameterJuMP = "^0.4"
QuadGK = "2.0"
RecipesBase = "1.1"
RangeHelpers = "0.1.8"
RecipesBase = "1.2"
Reexport = "0.2, 1.0"
Setfield = "0.7"
Setfield = "0.8, 1"
StatsBase = "0.33"
UnPack = "1.0"
julia = "1.6"
Expand Down
7 changes: 7 additions & 0 deletions src/Empirikos.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ using Optim
using ParameterJuMP
using QuadGK
using Random
using RangeHelpers
using RecipesBase
using Setfield
using Statistics
Expand All @@ -50,6 +51,9 @@ include("samples/binomial.jl")
include("samples/normal.jl")
include("samples/poisson.jl")
include("samples/truncatedpoisson.jl")
include("samples/noncentralhypergeometric.jl")

include("samples/foldednormal.jl")
include("example_priors.jl")
include("confidence_interval_tools.jl")
include("flocalization_intervals.jl")
Expand All @@ -62,6 +66,9 @@ include("datasets/Prostate/Prostate.jl")
include("datasets/Neighborhoods/neighborhoods.jl")
include("datasets/Butterfly/Butterfly.jl")
include("datasets/Surgery/Surgery.jl")
include("datasets/CollinsLangman/CollinsLangman.jl")
include("datasets/CressieSeheult/CressieSeheult.jl")
include("datasets/Bichsel/Bichsel.jl")



Expand Down
54 changes: 37 additions & 17 deletions src/amari.jl
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ Base.@kwdef struct AMARI{N, G, M, EB}
flocalization::N
solver
discretizer = nothing#DataBasedDefault()
plugin_G = KolmogorovSmirnovMinimumDistance(convexclass, solver)
plugin_G = NPMLE(convexclass, solver)
data_split = :none
delta_grid = 0.2:0.5:6.7
delta_objective = RMSE()
Expand Down Expand Up @@ -218,7 +218,7 @@ function initialize_modulus_model(method::AMARI, ::Type{ModulusModelWithF}, targ
bound_delta=bound_delta, target=target)
end

function modulus_cholesky_factor(convexclass::AbstractSimplexPriorClass, plugin_G, discr,
function modulus_cholesky_factor(convexclass::AbstractMixturePriorClass, plugin_G, discr,
eb_samples::HeteroskedasticSamples)
K = nparams(convexclass)
chr = cholesky(zeros(K, K) + I)
Expand Down Expand Up @@ -284,9 +284,17 @@ function set_target!(modulus_model::AbstractModulusModel, target::Empirikos.Line
end

function default_support_discretizer(Zs::AbstractVector{<:AbstractNormalSample})
_low,_up = quantile(response.(Zs), (0.02, 0.98))
_low,_up = quantile(response.(Zs), (0.005, 0.995))
_step = mean( std.(Zs))/100
interval_discretizer(range(_low; stop=_up, step=_step))
interval_discretizer(RangeHelpers.range(_low; stop=above(_up), step=_step))
end

function default_support_discretizer(Zs::AbstractVector{<:FoldedNormalSample})
_up = quantile(response.(Zs), 0.995)
_low = zero(_up)
_step = mean( std.(Zs) )/100
interval_discretizer(RangeHelpers.range(start=_low, stop=above(_up), step=_step);
closed=:left, unbounded=:right)
end


Expand Down Expand Up @@ -482,25 +490,37 @@ function fit_initialized!(method::AMARI, target, Zs; kwargs...)
end


function confint(Q::SteinMinimaxEstimator, target, Zs; level=0.95)
function confint(Q::SteinMinimaxEstimator, target, Zs; level=0.95, tail=:both)
target == Q.modulus_model.target ||
error("Target has changed")
α = 1- level
_bias = Q.max_bias
_Qs = Q.Q.(Zs)
_wts = StatsBase.weights(Zs)
_se = std(Q.Q.(Zs), _wts; corrected=true)/sqrt(nobs(Zs))
point_estimate = mean(Q.Q.(Zs), _wts)
halfwidth = gaussian_ci(_se; maxbias=_bias, α=α)
BiasVarianceConfidenceInterval(estimate = point_estimate,
_se = std(_Qs, _wts; corrected=true)/sqrt(nobs(Zs))
point_estimate = mean(_Qs, _wts)
BiasVarianceConfidenceInterval(;estimate = point_estimate,
maxbias = _bias,
se = _se,
α = α, method = nothing, target = target)
tail = tail,
α = α,
target = target)
end

function confint(method::AMARI, target::Empirikos.LinearEBayesTarget, Zs; initialize=true, kwargs...)
function confint(method::AMARI, target::Empirikos.LinearEBayesTarget, Zs; initialize=true, constrain_outer=true, kwargs...)
_fit = StatsBase.fit(method, target, Zs; initialize=initialize)
confint(_fit, target, Zs; kwargs...)
amari_ci = confint(_fit, target, Zs; kwargs...)
if constrain_outer
floc_worst_case = FLocalizationInterval(flocalization = _fit.method.flocalization,
convexclass = method.convexclass,
solver= method.solver)

outer_ci = confint(floc_worst_case, target)
amari_ci = @set amari_ci.lower = max(amari_ci.lower, outer_ci.lower)
amari_ci = @set amari_ci.upper = min(amari_ci.upper, outer_ci.upper)
# TODO: switch to LowerUpperConfidenceInterval in this case
end
amari_ci
end

function StatsBase.fit(method::AMARI, target, Zs; initialize=true, kwargs...)
Expand All @@ -526,15 +546,15 @@ function Base.broadcasted(::typeof(confint), amari::AMARI,
end

function Base.broadcasted_kwsyntax(::typeof(confint), amari::AMARI,
targets::AbstractArray{<:Empirikos.EBayesTarget}, Zs; level=0.95)
targets::AbstractArray{<:Empirikos.EBayesTarget}, Zs; level=0.95, tail=:both)

init_target = isa(targets[1], LinearEBayesTarget) ? targets[1] : denominator(targets[1])
method = initialize_method(amari, init_target, Zs)

_ci = confint(method, targets[1], Zs; initialize=false, level=level)
_ci = confint(method, targets[1], Zs; initialize=false, level=level, tail=tail)
confint_vec = fill(_ci, axes(targets))
for (index, target) in enumerate(targets[2:end])
confint_vec[index+1] = confint(method, target, Zs; initialize=false, level=level)
confint_vec[index+1] = confint(method, target, Zs; initialize=false, level=level, tail=tail)
end
confint_vec
end
Expand All @@ -552,7 +572,7 @@ Form a confidence interval for the [`Empirikos.EBayesTarget`](@ref) `target` wit
`level` based on the samples `Zs` using the [`AMARI`](@ref) `method`.
"""
function confint(method::AMARI, target::Empirikos.AbstractPosteriorTarget, Zs;
initialize=true, level=0.95, kwargs...)
initialize=true, level=0.95, tail=:both, kwargs...)
if initialize
init_target = Empirikos.PosteriorTargetNullHypothesis(target, 0.0)
method = initialize_method(method, init_target, Zs; kwargs...)
Expand Down Expand Up @@ -598,7 +618,7 @@ function confint(method::AMARI, target::Empirikos.AbstractPosteriorTarget, Zs;


λs = range(0, stop=1, length=10_000)
all_cis = confint.(Ref(bisection_pair), λs; α = α)
all_cis = confint.(Ref(bisection_pair), λs; α = α, tail = tail)
zero_in_ci = first.(all_cis) .<= 0.0 .<= last.(all_cis)

idx_lhs = findfirst(zero_in_ci)
Expand Down
5 changes: 5 additions & 0 deletions src/compound.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ function likelihood_distribution(Z::CompoundSample, μ)
MixtureModel(likelihood_distribution.(Z.vec, μ), Z.probs)
end

# TODO: remove this by improved type hierarchy
function marginalize(Z::CompoundSample, G::Dirac)
likelihood_distribution(Z, G.value)
end

function marginalize(Z::CompoundSample, prior::Distribution)
n = length(Z.vec)
MixtureModel(marginalize.(Z.vec, Ref(prior)), Z.probs)
Expand Down
68 changes: 55 additions & 13 deletions src/confidence_interval_tools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,31 +57,70 @@ end
end

function gaussian_ci(se; maxbias=0.0, α=0.05)
maxbias = abs(maxbias) # should throw an error?
if iszero(se)
return maxbias
end
maxbias = abs(maxbias)
rel_bias = maxbias/se
if abs(rel_bias) > 6
if abs(rel_bias) > 7
pm = quantile(Normal(), 1-α) + abs(rel_bias)
else
pm = sqrt(quantile(NoncentralChisq(1, abs2(rel_bias)), 1-α))
end
se*pm
end

Base.@kwdef struct BiasVarianceConfidenceInterval <: ConfidenceInterval
target = nothing
method = nothing
α::Float64 = 0.05
struct BiasVarianceConfidenceInterval <: ConfidenceInterval
target
method
α::Float64
tail::Symbol
estimate::Float64
se::Float64
maxbias::Float64 = 0.0
halflength::Float64 = gaussian_ci(se; maxbias=maxbias, α=α)
lower::Float64 = estimate - halflength
upper::Float64 = estimate + halflength
maxbias::Float64
halflength::Float64
lower::Float64
upper::Float64
end

function BiasVarianceConfidenceInterval(; target = nothing,
method = nothing,
α = 0.05,
tail = :both,
estimate,
se,
maxbias = 0.0)


if tail === :both
halflength = gaussian_ci(se; maxbias=maxbias, α=α)
lower = estimate - halflength
upper = estimate + halflength
elseif tail === :right
halflength = se*quantile(Normal(), 1-α) + abs(maxbias)
lower = estimate - halflength
upper = Inf
elseif tail == :left
halflength = se*quantile(Normal(), 1-α) + abs(maxbias)
lower = -Inf
upper = estimate + halflength
else
throw(ArgumentError("tail=$(tail) is not a valid keyword argument"))
end

BiasVarianceConfidenceInterval(target,
method,
α,
tail,
estimate,
se,
maxbias,
halflength,
lower,
upper)
end


function Base.show(io::IO, ci::BiasVarianceConfidenceInterval)
print(io, "lower = ", round(ci.lower,sigdigits=4))
print(io, ", upper = ", round(ci.upper,sigdigits=4))
Expand All @@ -107,10 +146,13 @@ end

Base.broadcastable(pair::BisectionPair) = Ref(pair)

function confint(pair::BisectionPair, λ ; α=0.05)
function confint(pair::BisectionPair, λ ; α=0.05, tail=:both)
_se = sqrt( abs2(1-λ)*pair.var1 + abs2(λ)*pair.var2 + 2*λ*(1-λ)*pair.cov)
_maxbias = (1-λ)*pair.max_bias1 + λ*pair.max_bias2
_estimate = (1-λ)*pair.estimate1 + λ*pair.estimate2
bw = gaussian_ci(_se; maxbias=_maxbias, α=α)
_estimate -bw , _estimate +bw

bw = BiasVarianceConfidenceInterval(;
α=α, tail=tail, maxbias=_maxbias, se=_se, estimate = _estimate)

bw.lower, bw.upper
end
Loading

2 comments on commit 7a58caf

@nignatiadis
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/63581

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.6 -m "<description of version>" 7a58cafafd9d26372b07b63aae58459a0cd476d6
git push origin v0.4.6

Please sign in to comment.