From c596aa7d3d0346d755599feecca0af1a0a1a38bb Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 14 Feb 2021 18:43:40 +0100 Subject: [PATCH 1/7] Improve handling of NaN in ranks and rank correlations Throw an error when NaN is encountered by rank functions, and make `corspearman` return `NaN`, instead of silently sorting them at the end. This is consistent with what `corkendall` and `cor` do. --- src/rankcorr.jl | 103 +++++++++++++++++++++++++++++++++++++++++++---- src/ranking.jl | 6 ++- test/rankcorr.jl | 36 ++++++++++++++++- test/ranking.jl | 8 ++++ 4 files changed, 142 insertions(+), 11 deletions(-) diff --git a/src/rankcorr.jl b/src/rankcorr.jl index 0592530b8..5c275f901 100644 --- a/src/rankcorr.jl +++ b/src/rankcorr.jl @@ -17,14 +17,101 @@ Compute Spearman's rank correlation coefficient. If `x` and `y` are vectors, the output is a float, otherwise it's a matrix corresponding to the pairwise correlations of the columns of `x` and `y`. """ -corspearman(x::RealVector, y::RealVector) = cor(tiedrank(x), tiedrank(y)) +function corspearman(x::RealVector, y::RealVector) + n = length(x) + n == length(y) || throw(DimensionMismatch("vectors must have same length")) + (any(isnan, x) || any(isnan, y)) && return NaN + return cor(tiedrank(x), tiedrank(y)) +end + +function corspearman(X::RealMatrix, y::RealVector) + size(X, 1) == length(y) || + throw(DimensionMismatch("X and y have inconsistent dimensions")) + n = size(X, 2) + C = Matrix{Float64}(I, n, 1) + any(isnan, y) && return fill!(C, NaN) + yrank = tiedrank(y) + for j = 1:n + Xj = view(X, :, j) + if any(isnan, Xj) + C[j,1] = NaN + else + Xjrank = tiedrank(Xj) + C[j,1] = cor(Xjrank, yrank) + end + end + return C +end + +function corspearman(x::RealVector, Y::RealMatrix) + size(Y, 1) == length(x) || + throw(DimensionMismatch("x and Y have inconsistent dimensions")) + n = size(Y, 2) + C = Matrix{Float64}(I, 1, n) + any(isnan, x) && return fill!(C, NaN) + xrank = tiedrank(x) + for j = 1:n + Yj = view(Y, :, j) + if any(isnan, Yj) + C[1,j] = NaN + else + Yjrank = tiedrank(Yj) + C[1,j] = cor(xrank, Yjrank) + end + end + return C +end -corspearman(X::RealMatrix, Y::RealMatrix) = - cor(mapslices(tiedrank, X, dims=1), mapslices(tiedrank, Y, dims=1)) -corspearman(X::RealMatrix, y::RealVector) = cor(mapslices(tiedrank, X, dims=1), tiedrank(y)) -corspearman(x::RealVector, Y::RealMatrix) = cor(tiedrank(x), mapslices(tiedrank, Y, dims=1)) +function corspearman(X::RealMatrix) + n = size(X, 2) + C = Matrix{Float64}(I, n, n) + for j = 1:n + Xj = view(X, :, j) + if any(isnan, Xj) + C[:,j] .= NaN + C[j,:] .= NaN + C[j,j] = 1 + continue + end + Xjrank = tiedrank(Xj) + for i = 1:(j-1) + Xi = view(X, :, i) + if any(isnan, Xi) + C[i,j] = C[j,i] = NaN + else + Xirank = tiedrank(Xi) + C[i,j] = C[j,i] = cor(Xjrank, Xirank) + end + end + end + return C +end -corspearman(X::RealMatrix) = (Z = mapslices(tiedrank, X, dims=1); cor(Z, Z)) +function corspearman(X::RealMatrix, Y::RealMatrix) + size(X, 1) == size(Y, 1) || + throw(ArgumentError("number of columns in each array must match")) + nr = size(X, 2) + nc = size(Y, 2) + C = Matrix{Float64}(undef, nr, nc) + for j = 1:nr + Xj = view(X, :, j) + if any(isnan, Xj) + C[j,:] .= NaN + continue + end + Xjrank = tiedrank(Xj) + for i = 1:nc + Yi = view(Y, :, i) + if any(isnan, Yi) + C[j,i] = NaN + else + Yirank = tiedrank(Yi) + C[j,i] = cor(Xjrank, Yirank) + end + end + end + return C +end ####################################### @@ -39,12 +126,12 @@ corspearman(X::RealMatrix) = (Z = mapslices(tiedrank, X, dims=1); cor(Z, Z)) function corkendall!(x::RealVector, y::RealVector, permx::AbstractVector{<:Integer}=sortperm(x)) if any(isnan, x) || any(isnan, y) return NaN end n = length(x) - if n != length(y) error("Vectors must have same length") end + n == length(y) || throw(DimensionMismatch("vectors must have same length")) # Initial sorting permute!(x, permx) permute!(y, permx) - + # Use widen to avoid overflows on both 32bit and 64bit npairs = div(widen(n) * (n - 1), 2) ntiesx = ndoubleties = nswaps = widen(0) diff --git a/src/ranking.jl b/src/ranking.jl index 05a5b4657..1ef92cfed 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -13,7 +13,9 @@ function _check_randparams(rks, x, p) end # ranking helper function: calls sortperm(x) and then ranking method f! -function _rank(f!, x::AbstractArray, R::Type=Int; sortkwargs...) +function _rank(f!, x::AbstractArray{T}, R::Type=Int; sortkwargs...) where {T} + any(v -> v isa Number && isnan(v), x) && + throw(ArgumentError("ranks not defined in the presence of NaN")) rks = similar(x, R) ord = reshape(sortperm(vec(x); sortkwargs...), size(x)) return f!(rks, x, ord) @@ -21,6 +23,8 @@ end # ranking helper function for arrays with missing values function _rank(f!, x::AbstractArray{>: Missing}, R::Type=Int; sortkwargs...) + any(v -> v isa Number && isnan(v), x) && + throw(ArgumentError("ranks not defined in the presence of NaN")) inds = findall(!ismissing, vec(x)) isempty(inds) && return missings(R, size(x)) xv = disallowmissing(view(vec(x), inds)) diff --git a/test/rankcorr.jl b/test/rankcorr.jl index 6110d4c41..c23c5f541 100644 --- a/test/rankcorr.jl +++ b/test/rankcorr.jl @@ -2,10 +2,11 @@ using StatsBase using Test X = Float64[1 0; 2 1; 3 0; 4 1; 5 10] +Y = Float64[5 5 6; 3 4 1; 4 0 4; 2 6 1; 5 7 10] x1 = X[:,1] x2 = X[:,2] -y = [5, 3, 4, 2, 5] +y = Y[:,1] # corspearman @@ -23,10 +24,13 @@ c22 = corspearman(x2, x2) @test corspearman(X, X) ≈ [c11 c12; c12 c22] @test corspearman(X) ≈ [c11 c12; c12 c22] +@test corspearman(X, Y) == + [corspearman(X[:,i], Y[:,j]) for i in axes(X, 2), j in axes(Y, 2)] + # corkendall # Check error, handling of NaN, Inf etc -@test_throws ErrorException("Vectors must have same length") corkendall([1,2,3,4], [1,2,3]) +@test_throws DimensionMismatch("vectors must have same length") corkendall([1,2,3,4], [1,2,3]) @test isnan(corkendall([1,2], [3,NaN])) @test isnan(corkendall([1,1,1], [1,2,3])) @test corkendall([-Inf,-0.0,Inf],[1,2,3]) == 1.0 @@ -106,3 +110,31 @@ w = repeat(z, n) StatsBase.midpoint(1,10) == 5 StatsBase.midpoint(1,widen(10)) == 5 + + +# NaN handling + +Xnan = copy(X) +Xnan[1,1] = NaN +Ynan = copy(Y) +Ynan[2,1] = NaN + +for f in (corspearman, corkendall) + @test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4])) + @test all(isnan, f([1.0, NaN], [1 2; 3 4])) + @test all(isnan, f([1 2; 3 4], [1.0, NaN])) + @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1]) + @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4])) + @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4])) + @test isequal(f(Xnan, Ynan), + [f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)]) +end + + +# Wrong dimensions +for f in (corspearman, corkendall) + @test_throws DimensionMismatch f([1], [1, 2]) + @test_throws DimensionMismatch f([1], [1 2; 3 4]) + @test_throws DimensionMismatch f([1 2; 3 4], [1]) + @test_throws ArgumentError f([1 2; 3 4: 4 6], [1 2; 3 4]) +end \ No newline at end of file diff --git a/test/ranking.jl b/test/ranking.jl index 8745f7396..f5125231b 100644 --- a/test/ranking.jl +++ b/test/ranking.jl @@ -13,6 +13,8 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered @test ordinalrank(s) == ordinalrank(x) @test ordinalrank(x, rev = true) == ordinalrank(-x) @test ordinalrank(x, lt = (x, y) -> isless(y, x)) == ordinalrank(-x) +@test_throws ArgumentError ordinalrank([2.0, NaN, 1.0]) +@test_throws ArgumentError ordinalrank([2.0, NaN, missing, 1.0]) @test competerank(a) == [1, 2, 2, 4, 5, 5, 5, 8] @test competerank(x) == [4, 1, 2, 5, 5, 2, 8, 5] @@ -21,6 +23,8 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered @test competerank(s) == competerank(x) @test competerank(x, rev = true) == competerank(-x) @test competerank(x, lt = (x, y) -> isless(y, x)) == competerank(-x) +@test_throws ArgumentError competerank([2.0, NaN, 1.0]) +@test_throws ArgumentError competerank([2.0, NaN, missing, 1.0]) @test denserank(a) == [1, 2, 2, 3, 4, 4, 4, 5] @test denserank(x) == [3, 1, 2, 4, 4, 2, 5, 4] @@ -29,6 +33,8 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered @test denserank(s) == denserank(x) @test denserank(x, rev = true) == denserank(-x) @test denserank(x, lt = (x, y) -> isless(y, x)) == denserank(-x) +@test_throws ArgumentError denserank([2.0, NaN, 1.0]) +@test_throws ArgumentError denserank([2.0, NaN, missing, 1.0]) @test tiedrank(a) == [1.0, 2.5, 2.5, 4.0, 6.0, 6.0, 6.0, 8.0] @test tiedrank(x) == [4.0, 1.0, 2.5, 6.0, 6.0, 2.5, 8.0, 6.0] @@ -37,3 +43,5 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered @test tiedrank(s) == tiedrank(x) @test tiedrank(x, rev = true) == tiedrank(-x) @test tiedrank(x, lt = (x, y) -> isless(y, x)) == tiedrank(-x) +@test_throws ArgumentError tiedrank([2.0, NaN, 1.0]) +@test_throws ArgumentError tiedrank([2.0, NaN, missing, 1.0]) \ No newline at end of file From f8116c0285866ba5fe05bce2f09cb9aef1500b0b Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Mon, 15 Feb 2021 10:46:11 +0100 Subject: [PATCH 2/7] More tests, change corkendall dimensions --- src/rankcorr.jl | 3 ++- test/rankcorr.jl | 15 +++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/rankcorr.jl b/src/rankcorr.jl index 5c275f901..6bb8f4d45 100644 --- a/src/rankcorr.jl +++ b/src/rankcorr.jl @@ -174,8 +174,9 @@ matrices or vectors. corkendall(x::RealVector, y::RealVector) = corkendall!(copy(x), copy(y)) function corkendall(X::RealMatrix, y::RealVector) + n = size(X, 2) permy = sortperm(y) - return([corkendall!(copy(y), X[:,i], permy) for i in 1:size(X, 2)]) + return(reshape([corkendall!(copy(y), X[:,i], permy) for i in 1:n], n, 1)) end function corkendall(x::RealVector, Y::RealMatrix) diff --git a/test/rankcorr.jl b/test/rankcorr.jl index c23c5f541..4c8eab912 100644 --- a/test/rankcorr.jl +++ b/test/rankcorr.jl @@ -40,7 +40,7 @@ c22 = corspearman(x2, x2) @test corkendall(x1, y) == -1/sqrt(90) @test corkendall(x2, y) == -1/sqrt(72) # RealMatrix, RealVector -@test corkendall(X, y) == [-1/sqrt(90), -1/sqrt(72)] +@test corkendall(X, y) == [-1/sqrt(90) -1/sqrt(72)]' # RealVector, RealMatrix @test corkendall(y, X) == [-1/sqrt(90) -1/sqrt(72)] @@ -94,19 +94,19 @@ z = [1 1 1; @test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z[:,1], z) == [1 0 1/3] -@test corkendall(z, z[:,1]) == [1; 0; 1/3] +@test corkendall(z, z[:,1]) == [1 0 1/3]' z = float(z) @test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z[:,1], z) == [1 0 1/3] -@test corkendall(z, z[:,1]) == [1; 0; 1/3] +@test corkendall(z, z[:,1]) == [1 0 1/3]' w = repeat(z, n) @test corkendall(w) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(w, w) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(w[:,1], w) == [1 0 1/3] -@test corkendall(w, w[:,1]) == [1; 0; 1/3] +@test corkendall(w, w[:,1]) == [1 0 1/3]' StatsBase.midpoint(1,10) == 5 StatsBase.midpoint(1,widen(10)) == 5 @@ -126,12 +126,19 @@ for f in (corspearman, corkendall) @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1]) @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4])) @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4])) + for k in 1:2 + @test isequal(f(Xnan[:,k], Ynan), + [f(Xnan[:,k], Ynan[:,j]) for i in 1:1, j in axes(Ynan, 2)]) + @test isequal(f(Xnan, Ynan[:,k]), + [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2), j in 1:1]) + end @test isequal(f(Xnan, Ynan), [f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)]) end # Wrong dimensions + for f in (corspearman, corkendall) @test_throws DimensionMismatch f([1], [1, 2]) @test_throws DimensionMismatch f([1], [1 2; 3 4]) From 480ef05d89e867b0656d35a8c2d357121cd7885b Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Mon, 15 Feb 2021 11:01:56 +0100 Subject: [PATCH 3/7] Another missing test --- test/rankcorr.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/rankcorr.jl b/test/rankcorr.jl index 4c8eab912..e14ac8074 100644 --- a/test/rankcorr.jl +++ b/test/rankcorr.jl @@ -134,6 +134,9 @@ for f in (corspearman, corkendall) end @test isequal(f(Xnan, Ynan), [f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)]) + @test isequal(f(Xnan), + [i == j ? 1.0 : f(Xnan[:,i], Xnan[:,j]) + for i in axes(Xnan, 2), j in axes(Xnan, 2)]) end From e09cbd2ff1ef63b31a2674c064a72e49f09232eb Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Thu, 25 Feb 2021 19:42:14 +0100 Subject: [PATCH 4/7] Revert changes to ranking functions --- src/ranking.jl | 6 +----- test/ranking.jl | 8 -------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/src/ranking.jl b/src/ranking.jl index 1ef92cfed..05a5b4657 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -13,9 +13,7 @@ function _check_randparams(rks, x, p) end # ranking helper function: calls sortperm(x) and then ranking method f! -function _rank(f!, x::AbstractArray{T}, R::Type=Int; sortkwargs...) where {T} - any(v -> v isa Number && isnan(v), x) && - throw(ArgumentError("ranks not defined in the presence of NaN")) +function _rank(f!, x::AbstractArray, R::Type=Int; sortkwargs...) rks = similar(x, R) ord = reshape(sortperm(vec(x); sortkwargs...), size(x)) return f!(rks, x, ord) @@ -23,8 +21,6 @@ end # ranking helper function for arrays with missing values function _rank(f!, x::AbstractArray{>: Missing}, R::Type=Int; sortkwargs...) - any(v -> v isa Number && isnan(v), x) && - throw(ArgumentError("ranks not defined in the presence of NaN")) inds = findall(!ismissing, vec(x)) isempty(inds) && return missings(R, size(x)) xv = disallowmissing(view(vec(x), inds)) diff --git a/test/ranking.jl b/test/ranking.jl index f5125231b..8745f7396 100644 --- a/test/ranking.jl +++ b/test/ranking.jl @@ -13,8 +13,6 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered @test ordinalrank(s) == ordinalrank(x) @test ordinalrank(x, rev = true) == ordinalrank(-x) @test ordinalrank(x, lt = (x, y) -> isless(y, x)) == ordinalrank(-x) -@test_throws ArgumentError ordinalrank([2.0, NaN, 1.0]) -@test_throws ArgumentError ordinalrank([2.0, NaN, missing, 1.0]) @test competerank(a) == [1, 2, 2, 4, 5, 5, 5, 8] @test competerank(x) == [4, 1, 2, 5, 5, 2, 8, 5] @@ -23,8 +21,6 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered @test competerank(s) == competerank(x) @test competerank(x, rev = true) == competerank(-x) @test competerank(x, lt = (x, y) -> isless(y, x)) == competerank(-x) -@test_throws ArgumentError competerank([2.0, NaN, 1.0]) -@test_throws ArgumentError competerank([2.0, NaN, missing, 1.0]) @test denserank(a) == [1, 2, 2, 3, 4, 4, 4, 5] @test denserank(x) == [3, 1, 2, 4, 4, 2, 5, 4] @@ -33,8 +29,6 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered @test denserank(s) == denserank(x) @test denserank(x, rev = true) == denserank(-x) @test denserank(x, lt = (x, y) -> isless(y, x)) == denserank(-x) -@test_throws ArgumentError denserank([2.0, NaN, 1.0]) -@test_throws ArgumentError denserank([2.0, NaN, missing, 1.0]) @test tiedrank(a) == [1.0, 2.5, 2.5, 4.0, 6.0, 6.0, 6.0, 8.0] @test tiedrank(x) == [4.0, 1.0, 2.5, 6.0, 6.0, 2.5, 8.0, 6.0] @@ -43,5 +37,3 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered @test tiedrank(s) == tiedrank(x) @test tiedrank(x, rev = true) == tiedrank(-x) @test tiedrank(x, lt = (x, y) -> isless(y, x)) == tiedrank(-x) -@test_throws ArgumentError tiedrank([2.0, NaN, 1.0]) -@test_throws ArgumentError tiedrank([2.0, NaN, missing, 1.0]) \ No newline at end of file From e7f16e3e8848297e72e020814b23cef98b453672 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Thu, 25 Feb 2021 19:51:48 +0100 Subject: [PATCH 5/7] Revert corkendall changes --- src/rankcorr.jl | 5 ++--- test/rankcorr.jl | 45 ++++++++++++++++++++++++++++----------------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/rankcorr.jl b/src/rankcorr.jl index 6bb8f4d45..580046205 100644 --- a/src/rankcorr.jl +++ b/src/rankcorr.jl @@ -126,7 +126,7 @@ end function corkendall!(x::RealVector, y::RealVector, permx::AbstractVector{<:Integer}=sortperm(x)) if any(isnan, x) || any(isnan, y) return NaN end n = length(x) - n == length(y) || throw(DimensionMismatch("vectors must have same length")) + if n != length(y) error("Vectors must have same length") end # Initial sorting permute!(x, permx) @@ -174,9 +174,8 @@ matrices or vectors. corkendall(x::RealVector, y::RealVector) = corkendall!(copy(x), copy(y)) function corkendall(X::RealMatrix, y::RealVector) - n = size(X, 2) permy = sortperm(y) - return(reshape([corkendall!(copy(y), X[:,i], permy) for i in 1:n], n, 1)) + return([corkendall!(copy(y), X[:,i], permy) for i in 1:size(X, 2)]) end function corkendall(x::RealVector, Y::RealMatrix) diff --git a/test/rankcorr.jl b/test/rankcorr.jl index e14ac8074..93b64449b 100644 --- a/test/rankcorr.jl +++ b/test/rankcorr.jl @@ -30,7 +30,7 @@ c22 = corspearman(x2, x2) # corkendall # Check error, handling of NaN, Inf etc -@test_throws DimensionMismatch("vectors must have same length") corkendall([1,2,3,4], [1,2,3]) +@test_throws ErrorException("Vectors must have same length") corkendall([1,2,3,4], [1,2,3]) @test isnan(corkendall([1,2], [3,NaN])) @test isnan(corkendall([1,1,1], [1,2,3])) @test corkendall([-Inf,-0.0,Inf],[1,2,3]) == 1.0 @@ -40,7 +40,7 @@ c22 = corspearman(x2, x2) @test corkendall(x1, y) == -1/sqrt(90) @test corkendall(x2, y) == -1/sqrt(72) # RealMatrix, RealVector -@test corkendall(X, y) == [-1/sqrt(90) -1/sqrt(72)]' +@test corkendall(X, y) == [-1/sqrt(90), -1/sqrt(72)] # RealVector, RealMatrix @test corkendall(y, X) == [-1/sqrt(90) -1/sqrt(72)] @@ -94,19 +94,19 @@ z = [1 1 1; @test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z[:,1], z) == [1 0 1/3] -@test corkendall(z, z[:,1]) == [1 0 1/3]' +@test corkendall(z, z[:,1]) == [1; 0; 1/3] z = float(z) @test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(z[:,1], z) == [1 0 1/3] -@test corkendall(z, z[:,1]) == [1 0 1/3]' +@test corkendall(z, z[:,1]) == [1; 0; 1/3] w = repeat(z, n) @test corkendall(w) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(w, w) == [1 0 1/3; 0 1 0; 1/3 0 1] @test corkendall(w[:,1], w) == [1 0 1/3] -@test corkendall(w, w[:,1]) == [1 0 1/3]' +@test corkendall(w, w[:,1]) == [1; 0; 1/3] StatsBase.midpoint(1,10) == 5 StatsBase.midpoint(1,widen(10)) == 5 @@ -126,25 +126,36 @@ for f in (corspearman, corkendall) @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1]) @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4])) @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4])) - for k in 1:2 - @test isequal(f(Xnan[:,k], Ynan), - [f(Xnan[:,k], Ynan[:,j]) for i in 1:1, j in axes(Ynan, 2)]) - @test isequal(f(Xnan, Ynan[:,k]), - [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2), j in 1:1]) - end + @test isequal(f(Xnan, Ynan), [f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)]) @test isequal(f(Xnan), [i == j ? 1.0 : f(Xnan[:,i], Xnan[:,j]) for i in axes(Xnan, 2), j in axes(Xnan, 2)]) + for k in 1:2 + @test isequal(f(Xnan[:,k], Ynan), + [f(Xnan[:,k], Ynan[:,j]) for i in 1:1, j in axes(Ynan, 2)]) + # TODO: fix corkendall (PR#659) + if f === corspearman + @test isequal(f(Xnan, Ynan[:,k]), + [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2), j in 1:1]) + else + @test isequal(f(Xnan, Ynan[:,k]), + [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2)]) + end + end end # Wrong dimensions -for f in (corspearman, corkendall) - @test_throws DimensionMismatch f([1], [1, 2]) - @test_throws DimensionMismatch f([1], [1 2; 3 4]) - @test_throws DimensionMismatch f([1 2; 3 4], [1]) - @test_throws ArgumentError f([1 2; 3 4: 4 6], [1 2; 3 4]) -end \ No newline at end of file +@test_throws DimensionMismatch corspearman([1], [1, 2]) +@test_throws DimensionMismatch corspearman([1], [1 2; 3 4]) +@test_throws DimensionMismatch corspearman([1 2; 3 4], [1]) +@test_throws ArgumentError corspearman([1 2; 3 4: 4 6], [1 2; 3 4]) + +# TODO: fix corkendall to match corspearman (PR#659) +@test_throws ErrorException corkendall([1], [1, 2]) +@test_throws ErrorException corkendall([1], [1 2; 3 4]) +@test_throws ErrorException corkendall([1 2; 3 4], [1]) +@test_throws ArgumentError corkendall([1 2; 3 4: 4 6], [1 2; 3 4]) \ No newline at end of file From 84cfb9cfae64c949644a384d84ab7a02edcf6c7f Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 28 Feb 2021 19:54:25 +0100 Subject: [PATCH 6/7] Cache any(isnan, Xi) --- src/rankcorr.jl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/rankcorr.jl b/src/rankcorr.jl index 580046205..e696a9a36 100644 --- a/src/rankcorr.jl +++ b/src/rankcorr.jl @@ -65,9 +65,13 @@ end function corspearman(X::RealMatrix) n = size(X, 2) C = Matrix{Float64}(I, n, n) + anynan = Vector{Union{Bool, Missing}}(missing, n) for j = 1:n Xj = view(X, :, j) - if any(isnan, Xj) + if ismissing(anynan[j]) + anynan[j] = any(isnan, Xj) + end + if anynan[j] C[:,j] .= NaN C[j,:] .= NaN C[j,j] = 1 @@ -76,7 +80,10 @@ function corspearman(X::RealMatrix) Xjrank = tiedrank(Xj) for i = 1:(j-1) Xi = view(X, :, i) - if any(isnan, Xi) + if ismissing(anynan[i]) + anynan[j] = any(isnan, Xi) + end + if anynan[i] C[i,j] = C[j,i] = NaN else Xirank = tiedrank(Xi) From 2437e07dbc488034a4f80269d6289d75ae3fbff2 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Mon, 1 Mar 2021 21:45:28 +0100 Subject: [PATCH 7/7] Simplify anynan --- src/rankcorr.jl | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/rankcorr.jl b/src/rankcorr.jl index e696a9a36..714548d5d 100644 --- a/src/rankcorr.jl +++ b/src/rankcorr.jl @@ -65,12 +65,10 @@ end function corspearman(X::RealMatrix) n = size(X, 2) C = Matrix{Float64}(I, n, n) - anynan = Vector{Union{Bool, Missing}}(missing, n) + anynan = Vector{Bool}(undef, n) for j = 1:n Xj = view(X, :, j) - if ismissing(anynan[j]) - anynan[j] = any(isnan, Xj) - end + anynan[j] = any(isnan, Xj) if anynan[j] C[:,j] .= NaN C[j,:] .= NaN @@ -80,9 +78,6 @@ function corspearman(X::RealMatrix) Xjrank = tiedrank(Xj) for i = 1:(j-1) Xi = view(X, :, i) - if ismissing(anynan[i]) - anynan[j] = any(isnan, Xi) - end if anynan[i] C[i,j] = C[j,i] = NaN else