Skip to content

Commit

Permalink
cov.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
nalimilan committed Sep 25, 2021
1 parent 96aba7f commit 71cde77
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 222 deletions.
2 changes: 1 addition & 1 deletion docs/src/scalarstats.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ The package implements functions for computing various statistics over an array
```@docs
mean
mean!
middle
geomean
harmmean
genmean
Expand Down Expand Up @@ -58,6 +57,7 @@ quantile
quantile!
median
median!
middle
```

## Mode and Modes
Expand Down
34 changes: 25 additions & 9 deletions src/Statistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ include("moments.jl")
include("scalarstats.jl")
include("cov.jl")
include("partialcor.jl")
include("toeplitzsolvers.jl")
include("signalcorr.jl")

##### mean #####
Expand Down Expand Up @@ -727,8 +728,10 @@ function _getnobs(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int)
return n
end

_vmean(x::AbstractVector, vardim::Int) = mean(x)
_vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim)
_vmean(x::AbstractVector, vardim::Int, w::Union{AbstractWeights, Nothing}=nothing) =
mean(x, weights=w)
_vmean(x::AbstractMatrix, vardim::Int, w::Union{AbstractWeights, Nothing}=nothing) =
mean(x, dims=vardim, weights=w)

# core functions

Expand Down Expand Up @@ -771,7 +774,7 @@ end
## which can't be handled by broadcast
covm(x::AbstractVector, xmean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x); corrected=corrected)
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
covm(x::AbstractMatrix, xmean, weights::Nothing=nothing, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, vardim; corrected=corrected)
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
Expand All @@ -788,14 +791,24 @@ is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `fals
cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected=corrected)

"""
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true)
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true[, weights::AbstractWeights])
Compute the covariance matrix of the matrix `X` along the dimension `dims`. If `corrected`
is `true` (the default) then the sum is scaled with `n-1`, whereas the sum is scaled with `n`
if `corrected` is `false` where `n = size(X, dims)`.
If `weights` is provided, the biased covariance matrix (`corrected=false`)
is computed by multiplying `scattermat(X, w)` by
``\\frac{1}{\\sum{w}}`` to normalize. However, the unbiased covariance matrix
(`corrected=true`) is dependent on the type of weights used:
* `AnalyticWeights`: ``\\frac{1}{\\sum w - \\sum {w^2} / \\sum w}``
* `FrequencyWeights`: ``\\frac{1}{\\sum{w} - 1}``
* `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)`
* `Weights`: `ArgumentError` (bias correction not supported)
"""
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) =
covm(X, _vmean(X, dims), dims; corrected=corrected)
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true,
weights::Union{AbstractWeights, Nothing}=nothing) =
covm(X, _vmean(X, dims, weights), weights, dims; corrected=corrected)

"""
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true)
Expand Down Expand Up @@ -899,7 +912,8 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =

corm(x::AbstractVector{T}, xmean) where {T} =
T === Missing ? missing : one(float(nonmissingtype(T)))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
corm(x::AbstractMatrix, xmean, weights::Nothing=nothing, vardim::Int=1) =
corzm(x .- xmean, vardim)
function corm(x::AbstractVector, mx, y::AbstractVector, my)
require_one_based_indexing(x, y)
n = length(x)
Expand Down Expand Up @@ -936,11 +950,13 @@ cor(x::AbstractVector{T}) where {T} =
T === Missing ? missing : one(float(nonmissingtype(T)))

"""
cor(X::AbstractMatrix; dims::Int=1)
cor(X::AbstractMatrix; dims::Int=1[, weights::AbstractWeights])
Compute the Pearson correlation matrix of the matrix `X` along the dimension `dims`.
The weighted correlation is computed if `weights` is provided.
"""
cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims)
cor(X::AbstractMatrix; dims::Int=1, weights::Union{AbstractWeights, Nothing}=nothing) =
corm(X, _vmean(X, dims, weights), weights, dims)

"""
cor(x::AbstractVector, y::AbstractVector)
Expand Down
101 changes: 31 additions & 70 deletions src/cov.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,9 @@ _unscaled_covzm(x::DenseMatrix, wv::AbstractWeights, dims::Integer) =
_symmetrize!(unscaled_covzm(x, _scalevars(x, values(wv), dims), dims))

"""
scattermat(X, [wv::AbstractWeights]; mean=nothing, dims=1)
scattermat(X; mean=nothing, dims=1[, weights::AbstractWeights])
Compute the scatter matrix, which is an unnormalized covariance matrix.
A weighting vector `wv` can be specified to weight
the estimate.
# Arguments
* `mean=nothing`: a known mean value. `nothing` indicates that the mean is
Expand All @@ -45,62 +43,33 @@ the estimate.
* `dims=1`: the dimension along which the variables are organized.
When `dims = 1`, the variables are considered columns with observations in rows;
when `dims = 2`, variables are in rows with observations in columns.
"""
function scattermat end


"""
cov(X, w::AbstractWeights, vardim=1; mean=nothing, corrected=false)
Compute the weighted covariance matrix. Similar to `var` and `std` the biased covariance
matrix (`corrected=false`) is computed by multiplying `scattermat(X, w)` by
``\\frac{1}{\\sum{w}}`` to normalize. However, the unbiased covariance matrix
(`corrected=true`) is dependent on the type of weights used:
* `AnalyticWeights`: ``\\frac{1}{\\sum w - \\sum {w^2} / \\sum w}``
* `FrequencyWeights`: ``\\frac{1}{\\sum{w} - 1}``
* `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)`
* `Weights`: `ArgumentError` (bias correction not supported)
"""
cov

scattermat(x::DenseMatrix; mean=nothing, dims::Int=1) =
_scattermatm(x, mean, dims)
_scattermatm(x::DenseMatrix, ::Nothing, dims::Int) =
_unscaled_covzm(x .- mean(x, dims=dims), dims)
_scattermatm(x::DenseMatrix, mean, dims::Int=1) =
* `weights`: optional weights for observations.
"""
scattermat(x::DenseMatrix; mean=nothing, dims::Int=1,
weights::Union{AbstractWeights, Nothing}=nothing) =
_scattermatm(x, weights, mean, dims)
_scattermatm(x::DenseMatrix, weights::Nothing, mean::Nothing, dims::Int) =
_unscaled_covzm(x .- Statistics.mean(x, dims=dims), dims)
_scattermatm(x::DenseMatrix, weights::Nothing, mean, dims::Int=1) =
_unscaled_covzm(x .- mean, dims)

scattermat(x::DenseMatrix, wv::AbstractWeights; mean=nothing, dims::Int=1) =
_scattermatm(x, wv, mean, dims)
_scattermatm(x::DenseMatrix, wv::AbstractWeights, ::Nothing, dims::Int) =
_unscaled_covzm(x .- mean(x, wv, dims=dims), wv, dims)
_scattermatm(x::DenseMatrix, wv::AbstractWeights, mean, dims::Int) =
_unscaled_covzm(x .- mean, wv, dims)
_scattermatm(x::DenseMatrix, weights::AbstractWeights, mean::Nothing, dims::Int) =
_unscaled_covzm(x .- Statistics.mean(x, weights=weights, dims=dims), weights, dims)
_scattermatm(x::DenseMatrix, weights::AbstractWeights, mean, dims::Int) =
_unscaled_covzm(x .- mean, weights, dims)

## weighted cov
covm(x::DenseMatrix, mean, w::AbstractWeights, dims::Int=1;
covm(x::DenseMatrix, mean, weights::AbstractWeights, dims::Int=1;
corrected::Bool=true) =
rmul!(scattermat(x, w, mean=mean, dims=dims), varcorrection(w, depcheck(:covm, corrected)))

rmul!(scattermat(x, weights=weights, mean=mean, dims=dims),
varcorrection(weights, corrected))

cov(x::DenseMatrix, w::AbstractWeights, dims::Int=1; corrected::Bool=true) =
covm(x, mean(x, w, dims=dims), w, dims; corrected=depcheck(:cov, corrected))

function corm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int=1)
c = covm(x, mean, w, vardim; corrected=false)
s = stdm(x, w, mean, vardim; corrected=false)
function corm(x::DenseMatrix, mean, weights::AbstractWeights, vardim::Int=1)
c = covm(x, mean, weights, vardim; corrected=false)
s = std(x, mean=mean, weights=weights, dims=vardim, corrected=false)
cov2cor!(c, s)
end

"""
cor(X, w::AbstractWeights, dims=1)
Compute the Pearson correlation matrix of `X` along the dimension
`dims` with a weighting `w` .
"""
cor(x::DenseMatrix, w::AbstractWeights, dims::Int=1) =
corm(x, mean(x, w, dims=dims), w, dims)

"""
cov2cor(C, s)
Expand Down Expand Up @@ -156,7 +125,8 @@ cov(ce::CovarianceEstimator, x::AbstractVector, y::AbstractVector) =
error("cov is not defined for $(typeof(ce)), $(typeof(x)) and $(typeof(y))")

"""
cov(ce::CovarianceEstimator, X::AbstractMatrix, [w::AbstractWeights]; mean=nothing, dims::Int=1)
cov(ce::CovarianceEstimator, X::AbstractMatrix; mean=nothing, dims::Int=1,
[weights::AbstractWeights])
Compute the covariance matrix of the matrix `X` along dimension `dims`
using estimator `ce`. A weighting vector `w` can be specified.
Expand All @@ -170,18 +140,16 @@ The keyword argument `mean` can be:
* when `dims=2`, an `AbstractVector` of length `N` or an `AbstractMatrix`
of size `(N,1)`.
"""
cov(ce::CovarianceEstimator, X::AbstractMatrix; mean=nothing, dims::Int=1) =
cov(ce::CovarianceEstimator, X::AbstractMatrix; mean=nothing, dims::Int=1,
weights::Union{AbstractWeights, Nothing}=nothing) =
error("cov is not defined for $(typeof(ce)) and $(typeof(X))")

cov(ce::CovarianceEstimator, X::AbstractMatrix, w::AbstractWeights; mean=nothing, dims::Int=1) =
error("cov is not defined for $(typeof(ce)), $(typeof(X)) and $(typeof(w))")

"""
SimpleCovariance(;corrected::Bool=false)
Simple covariance estimator. Estimation calls `cov(x; corrected=corrected)`,
`cov(x, y; corrected=corrected)` or `cov(X, w, dims; corrected=corrected)`
where `x`, `y` are vectors, `X` is a matrix and `w` is a weighting vector.
`cov(x, y; corrected=corrected)` or `cov(X, dims=dims, weights=weights, corrected=corrected)`
where `x`, `y` are vectors, `X` is a matrix and `weights` is a weighting vector.
"""
struct SimpleCovariance <: CovarianceEstimator
corrected::Bool
Expand All @@ -194,20 +162,13 @@ cov(sc::SimpleCovariance, x::AbstractVector) =
cov(sc::SimpleCovariance, x::AbstractVector, y::AbstractVector) =
cov(x, y; corrected=sc.corrected)

function cov(sc::SimpleCovariance, X::AbstractMatrix; dims::Int=1, mean=nothing)
dims (1, 2) || throw(ArgumentError("Argument dims can only be 1 or 2 (given: $dims)"))
if mean === nothing
return cov(X; dims=dims, corrected=sc.corrected)
else
return covm(X, mean, dims, corrected=sc.corrected)
end
end

function cov(sc::SimpleCovariance, X::AbstractMatrix, w::AbstractWeights; dims::Int=1, mean=nothing)
function cov(sc::SimpleCovariance, X::AbstractMatrix;
dims::Int=1,
weights::Union{AbstractWeights, Nothing}=nothing,
mean=nothing)
dims (1, 2) || throw(ArgumentError("Argument dims can only be 1 or 2 (given: $dims)"))
if mean === nothing
return cov(X, w, dims, corrected=sc.corrected)
else
return covm(X, mean, w, dims, corrected=sc.corrected)
mean = Statistics.mean(X, dims=dims, weights=weights)
end
return covm(X, mean, weights, dims, corrected=sc.corrected)
end
Loading

0 comments on commit 71cde77

Please sign in to comment.