Skip to content

Commit

Permalink
empirical.jl and hist.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
nalimilan committed Sep 25, 2021
1 parent 3f29a9c commit 850d3e6
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 56 deletions.
3 changes: 2 additions & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ makedocs(
"scalarstats.md",
"cov.md",
"robust.md",
"ranking.md"]
"ranking.md",
"empirical.md"]
)

deploydocs(
Expand Down
30 changes: 30 additions & 0 deletions docs/src/empirical.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Empirical Estimation of Distributions

## Histograms

The `Histogram` type represents data that has been tabulated into intervals
(known as *bins*) along the real line, or in higher dimensions, over the real
plane.

Histograms can be fitted to data using the `fit` method.

```@docs
fit(::Type{Histogram}, args...; kwargs...)
```

Additional methods
```@docs
merge!
merge
midpoints
norm
normalize
normalize!
zero
```

## Empirical Cumulative Distribution Function

```@docs
ecdf
```
3 changes: 2 additions & 1 deletion docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Statistics can be weighted, and several weights types are distinguished to apply
corrections where necessary.

```@contents
Pages = ["weights.md", "scalarstats.md", "cov.md", "robust.md", "ranking.jl"]
Pages = ["weights.md", "scalarstats.md", "cov.md", "robust.md", "ranking.jl",
"empirical.md"]
Depth = 2
```
10 changes: 8 additions & 2 deletions src/Statistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,12 @@ export std, stdm, var, varm, mean!, mean,
trim, trim!, trimvar, winsor, winsor!,
# ranking.jl
ordinalrank, competerank, denserank, tiedrank,
# rankcorr
corkendall, corspearman
# rankcorr.jl
corkendall, corspearman,
# empirical.jl
ecdf, ECDF,
# hist.jl
fit, AbstractHistogram, Histogram, midpoints, norm, normalize, normalize!

include("common.jl")
include("weights.jl")
Expand All @@ -50,6 +54,8 @@ include("signalcorr.jl")
include("robust.jl")
include("ranking.jl")
include("rankcorr.jl")
include("empirical.jl")
include("hist.jl")

##### mean #####

Expand Down
6 changes: 3 additions & 3 deletions src/empirical.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ function is inside the interval ``(0,1)``; the function is defined for the whole
"""
ecdf(X::RealVector{T}) where T<:Real = ECDF(sort(X))

minimum(ecdf::ECDF) = first(ecdf.sorted_values)
Base.minimum(ecdf::ECDF) = first(ecdf.sorted_values)

maximum(ecdf::ECDF) = last(ecdf.sorted_values)
Base.maximum(ecdf::ECDF) = last(ecdf.sorted_values)

extrema(ecdf::ECDF) = (minimum(ecdf), maximum(ecdf))
Base.extrema(ecdf::ECDF) = (minimum(ecdf), maximum(ecdf))
4 changes: 2 additions & 2 deletions src/hist.jl
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ function histrange(lo::F, hi::F, n::Integer, closed::Symbol=:left) where F
len += one(F)
end
end
Base.floatrange(start,step,len,divisor)
Base.floatrange(start,step,Int(len),divisor)
end

histrange(vs::NTuple{N,AbstractVector},nbins::NTuple{N,Integer},closed::Symbol) where {N} =
Expand Down Expand Up @@ -397,7 +397,7 @@ arrays appropriately. See description of `normalize` for details. Returns `h`.
if mode == :pdf || mode == :density
# Divide weights by bin volume, for :pdf also divide by sum of weights
SumT = norm_type(h)
vs_0 = (mode == :pdf) ? sum(SumT(x) for x in weights) : one(SumT)
vs_0 = (mode == :pdf) ? sum(SumT, weights) : one(SumT)
@inbounds @nloops $N i weights d->(vs_{$N-d+1} = vs_{$N-d} * _edge_binvolume(SumT, edges[d], i_d)) begin
(@nref $N weights i) /= $(Symbol("vs_$N"))
for A in aux_weights
Expand Down
2 changes: 1 addition & 1 deletion test/empirical.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using StatsBase
using Statistics
using Test

@testset "ECDF" begin
Expand Down
90 changes: 45 additions & 45 deletions test/hist.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using StatsBase
using Statistics
using LinearAlgebra, Random, Test

@testset "StatsBase.Histogram" begin
@testset "Histogram" begin


@testset "Histogram binindex and binvolume" begin
Expand All @@ -14,15 +14,15 @@ using LinearAlgebra, Random, Test

@test h1 == Histogram(edg1, :left, false)

@test @inferred StatsBase.binindex(h1, -0.5) == 4
@test @inferred StatsBase.binindex(h2, (1.5, 2)) == (8, 3)
@test @inferred Statistics.binindex(h1, -0.5) == 4
@test @inferred Statistics.binindex(h2, (1.5, 2)) == (8, 3)

@test [StatsBase.binvolume(h1, i) for i in axes(h1.weights, 1)] diff(edg1)
@test [StatsBase.binvolume(h2, (i,j)) for i in axes(h2.weights, 1), j in axes(h2.weights, 2)] diff(edg1) * diff(edg2)'
@test [Statistics.binvolume(h1, i) for i in axes(h1.weights, 1)] diff(edg1)
@test [Statistics.binvolume(h2, (i,j)) for i in axes(h2.weights, 1), j in axes(h2.weights, 2)] diff(edg1) * diff(edg2)'

@test typeof(@inferred(StatsBase.binvolume(h2, (1,1)))) == Float64
@test typeof(@inferred(StatsBase.binvolume(h3, (1,1)))) == Float32
@test typeof(@inferred(StatsBase.binvolume(Float64, h3, (1,1)))) == Float64
@test typeof(@inferred(Statistics.binvolume(h2, (1,1)))) == Float64
@test typeof(@inferred(Statistics.binvolume(h3, (1,1)))) == Float32
@test typeof(@inferred(Statistics.binvolume(Float64, h3, (1,1)))) == Float64
end


Expand Down Expand Up @@ -75,44 +75,44 @@ end

@testset "histrange" begin
# Note: atm histrange must be qualified
@test @inferred(StatsBase.histrange(Float64[], 0, :left)) == 0.0:1.0:0.0
@test StatsBase.histrange(Float64[1:5;], 1, :left) == 0.0:5.0:10.0
@test StatsBase.histrange(Float64[1:10;], 1, :left) == 0.0:10.0:20.0
@test StatsBase.histrange(1.0, 10.0, 1, :left) == 0.0:10.0:20.0

@test StatsBase.histrange([0.201,0.299], 10, :left) == 0.2:0.01:0.3
@test StatsBase.histrange([0.2,0.299], 10, :left) == 0.2:0.01:0.3
@test StatsBase.histrange([0.2,0.3], 10, :left) == 0.2:0.01:0.31
@test StatsBase.histrange(0.2, 0.3, 10, :left) == 0.2:0.01:0.31
@test StatsBase.histrange([0.2,0.3], 10, :right) == 0.19:0.01:0.3
@test StatsBase.histrange(0.2, 0.3, 10, :right) == 0.19:0.01:0.3

@test StatsBase.histrange([200.1,299.9], 10, :left) == 200.0:10.0:300.0
@test StatsBase.histrange([200.0,299.9], 10, :left) == 200.0:10.0:300.0
@test StatsBase.histrange([200.0,300.0], 10, :left) == 200.0:10.0:310.0
@test StatsBase.histrange([200.0,300.0], 10, :right) == 190.0:10.0:300.0

@test @inferred(StatsBase.histrange(Int64[1:5;], 1, :left)) == 0:5:10
@test StatsBase.histrange(Int64[1:10;], 1, :left) == 0:10:20

@test StatsBase.histrange([0, 1, 2, 3], 4, :left) == 0.0:1.0:4.0
@test StatsBase.histrange([0, 1, 1, 3], 4, :left) == 0.0:1.0:4.0
@test StatsBase.histrange([0, 9], 4, :left) == 0.0:5.0:10.0
@test StatsBase.histrange([0, 19], 4, :left) == 0.0:5.0:20.0
@test StatsBase.histrange([0, 599], 4, :left) == 0.0:200.0:600.0
@test StatsBase.histrange([-1, -1000], 4, :left) == -1000.0:500.0:0.0
@test @inferred(Statistics.histrange(Float64[], 0, :left)) == 0.0:1.0:0.0
@test Statistics.histrange(Float64[1:5;], 1, :left) == 0.0:5.0:10.0
@test Statistics.histrange(Float64[1:10;], 1, :left) == 0.0:10.0:20.0
@test Statistics.histrange(1.0, 10.0, 1, :left) == 0.0:10.0:20.0

@test Statistics.histrange([0.201,0.299], 10, :left) == 0.2:0.01:0.3
@test Statistics.histrange([0.2,0.299], 10, :left) == 0.2:0.01:0.3
@test Statistics.histrange([0.2,0.3], 10, :left) == 0.2:0.01:0.31
@test Statistics.histrange(0.2, 0.3, 10, :left) == 0.2:0.01:0.31
@test Statistics.histrange([0.2,0.3], 10, :right) == 0.19:0.01:0.3
@test Statistics.histrange(0.2, 0.3, 10, :right) == 0.19:0.01:0.3

@test Statistics.histrange([200.1,299.9], 10, :left) == 200.0:10.0:300.0
@test Statistics.histrange([200.0,299.9], 10, :left) == 200.0:10.0:300.0
@test Statistics.histrange([200.0,300.0], 10, :left) == 200.0:10.0:310.0
@test Statistics.histrange([200.0,300.0], 10, :right) == 190.0:10.0:300.0

@test @inferred(Statistics.histrange(Int64[1:5;], 1, :left)) == 0:5:10
@test Statistics.histrange(Int64[1:10;], 1, :left) == 0:10:20

@test Statistics.histrange([0, 1, 2, 3], 4, :left) == 0.0:1.0:4.0
@test Statistics.histrange([0, 1, 1, 3], 4, :left) == 0.0:1.0:4.0
@test Statistics.histrange([0, 9], 4, :left) == 0.0:5.0:10.0
@test Statistics.histrange([0, 19], 4, :left) == 0.0:5.0:20.0
@test Statistics.histrange([0, 599], 4, :left) == 0.0:200.0:600.0
@test Statistics.histrange([-1, -1000], 4, :left) == -1000.0:500.0:0.0

# Base issue #13326
l,h = extrema(StatsBase.histrange([typemin(Int),typemax(Int)], 10, :left))
l,h = extrema(Statistics.histrange([typemin(Int),typemax(Int)], 10, :left))
@test l <= typemin(Int)
@test h >= typemax(Int)

@test_throws ArgumentError StatsBase.histrange([1, 10], 0, :left)
@test_throws ArgumentError StatsBase.histrange([1, 10], -1, :left)
@test_throws ArgumentError StatsBase.histrange([1.0, 10.0], 0, :left)
@test_throws ArgumentError StatsBase.histrange([1.0, 10.0], -1, :left)
@test_throws ArgumentError StatsBase.histrange(Float64[],-1, :left)
@test_throws ArgumentError StatsBase.histrange([0.], 0, :left)
@test_throws ArgumentError Statistics.histrange([1, 10], 0, :left)
@test_throws ArgumentError Statistics.histrange([1, 10], -1, :left)
@test_throws ArgumentError Statistics.histrange([1.0, 10.0], 0, :left)
@test_throws ArgumentError Statistics.histrange([1.0, 10.0], -1, :left)
@test_throws ArgumentError Statistics.histrange(Float64[],-1, :left)
@test_throws ArgumentError Statistics.histrange([0.], 0, :left)
end


Expand Down Expand Up @@ -220,8 +220,8 @@ end
end

@testset "midpoints" begin
@test StatsBase.midpoints([1, 2, 4]) == [1.5, 3.0]
@test StatsBase.midpoints(range(0, stop = 1, length = 5)) == 0.125:0.25:0.875
@test Statistics.midpoints([1, 2, 4]) == [1.5, 3.0]
@test Statistics.midpoints(range(0, stop = 1, length = 5)) == 0.125:0.25:0.875
end

end # @testset "StatsBase.Histogram"
end # @testset "Statistics.Histogram"
4 changes: 3 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -896,4 +896,6 @@ include("partialcor.jl")
include("signalcorr.jl")
include("robust.jl")
include("ranking.jl")
include("rankcorr.jl")
include("rankcorr.jl")
include("empirical.jl")
include("hist.jl")

0 comments on commit 850d3e6

Please sign in to comment.