Skip to content

Commit

Permalink
Upgrade to StatsBase v0.34 (#59)
Browse files Browse the repository at this point in the history
* import numerical array types from NumericalTypeAliases and not StatsBase

* add CompatHelper action

* replaced uses of type aliases with their definitions in the same manner as JuliaStats/StatsBase.jl#840

* Include both StatsBase 0.33 and 0.34 in compat

Co-authored-by: Alex Arslan <ararslan@comcast.net>

---------

Co-authored-by: Alex Arslan <ararslan@comcast.net>
  • Loading branch information
AsafManela and ararslan authored Aug 17, 2023
1 parent 9128e4f commit 14bf622
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 58 deletions.
45 changes: 45 additions & 0 deletions .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: CompatHelper
on:
schedule:
- cron: 0 0 * * *
workflow_dispatch:
permissions:
contents: write
pull-requests: write
jobs:
CompatHelper:
runs-on: ubuntu-latest
steps:
- name: Check if Julia is already available in the PATH
id: julia_in_path
run: which julia
continue-on-error: true
- name: Install Julia, but only if it is not already available in the PATH
uses: julia-actions/setup-julia@v1
with:
version: '1'
arch: ${{ runner.arch }}
if: steps.julia_in_path.outcome != 'success'
- name: "Add the General registry via Git"
run: |
import Pkg
ENV["JULIA_PKG_SERVER"] = ""
Pkg.Registry.add("General")
shell: julia --color=yes {0}
- name: "Install CompatHelper"
run: |
import Pkg
name = "CompatHelper"
uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
version = "3"
Pkg.add(; name, uuid, version)
shell: julia --color=yes {0}
- name: "Run CompatHelper"
run: |
import CompatHelper
CompatHelper.main()
shell: julia --color=yes {0}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
# COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "MLBase"
uuid = "f0e99cf1-93fa-52ec-9ecc-5026115318e0"
version = "0.9.1"
version = "0.9.2"

[deps]
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
Expand All @@ -11,7 +11,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
[compat]
IterTools = "1"
Reexport = "1"
StatsBase = "0.33"
StatsBase = "0.33, 0.34"
julia = "1"

[extras]
Expand Down
1 change: 0 additions & 1 deletion src/MLBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ module MLBase
import Base: length, show, keys, precision, length, getindex
import Base: iterate
import Base.Order: lt, Ordering, ForwardOrdering, ReverseOrdering, Forward, Reverse
import StatsBase: RealVector, IntegerVector, RealMatrix, IntegerMatrix, RealArray
import IterTools: product

export
Expand Down
38 changes: 19 additions & 19 deletions src/classification.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# classify

function classify(x::RealVector, ord::Ordering)
function classify(x::AbstractVector{<:Real}, ord::Ordering)
n = length(x)
v = x[1]
k::Int = 1
Expand All @@ -18,9 +18,9 @@ function classify(x::RealVector, ord::Ordering)
return k
end

classify(x::RealVector) = classify(x, Forward)
classify(x::AbstractVector{<:Real}) = classify(x, Forward)

function classify!(r::IntegerVector, x::RealMatrix, ord::Ordering)
function classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}, ord::Ordering)
m = size(x, 1)
n = size(x, 2)
length(r) == n || throw(DimensionMismatch("Mismatched length of r."))
Expand All @@ -30,15 +30,15 @@ function classify!(r::IntegerVector, x::RealMatrix, ord::Ordering)
return r
end

classify!(r::IntegerVector, x::RealMatrix) = classify!(r, x, Forward)
classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}) = classify!(r, x, Forward)

# - this one throws a deprecation
classify(x::RealMatrix, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, ord)
classify(x::RealMatrix) = classify(x, Forward)
classify(x::AbstractMatrix{<:Real}, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, ord)
classify(x::AbstractMatrix{<:Real}) = classify(x, Forward)

# classify with score(s)

function classify_withscore(x::RealVector, ord::Ordering)
function classify_withscore(x::AbstractVector{<:Real}, ord::Ordering)
n = length(x)
v = x[1]
k::Int = 1
Expand All @@ -52,9 +52,9 @@ function classify_withscore(x::RealVector, ord::Ordering)
return (k, v)
end

classify_withscore(x::RealVector) = classify_withscore(x, Forward)
classify_withscore(x::AbstractVector{<:Real}) = classify_withscore(x, Forward)

function classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix, ord::Ordering)
function classify_withscores!(r::AbstractVector{<:Integer}, s::AbstractVector{<:Real}, x::AbstractMatrix{<:Real}, ord::Ordering)
m = size(x, 1)
n = size(x, 2)
length(r) == n || throw(DimensionMismatch("Mismatched length of r."))
Expand All @@ -66,27 +66,27 @@ function classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix, or
return (r, s)
end

classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix) =
classify_withscores!(r::AbstractVector{<:Integer}, s::AbstractVector{<:Real}, x::AbstractMatrix{<:Real}) =
classify_withscores!(r, s, x, Forward)

function classify_withscores(x::RealMatrix{T}, ord::Ordering) where T<:Real
function classify_withscores(x::AbstractMatrix{<:Real}{T}, ord::Ordering) where T<:Real
n = size(x, 2)
r = Array{Int}(undef, n)
s = Array{T}(undef, n)
return classify_withscores!(r, s, x, ord)
end

classify_withscores(x::RealMatrix{T}) where {T<:Real} = classify_withscores(x, Forward)
classify_withscores(x::AbstractMatrix{<:Real}{T}) where {T<:Real} = classify_withscores(x, Forward)


# classify with threshold

classify(x::RealVector, t::Real, ord::Ordering) =
classify(x::AbstractVector{<:Real}, t::Real, ord::Ordering) =
((k, v) = classify_withscore(x, ord); ifelse(lt(ord, v, t), 0, k))

classify(x::RealVector, t::Real) = classify(x, t, Forward)
classify(x::AbstractVector{<:Real}, t::Real) = classify(x, t, Forward)

function classify!(r::IntegerVector, x::RealMatrix, t::Real, ord::Ordering)
function classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}, t::Real, ord::Ordering)
m = size(x, 1)
n = size(x, 2)
length(r) == n || throw(DimensionMismatch("Mismatched length of r."))
Expand All @@ -96,10 +96,10 @@ function classify!(r::IntegerVector, x::RealMatrix, t::Real, ord::Ordering)
return r
end

classify!(r::IntegerVector, x::RealMatrix, t::Real) = classify!(r, x, t, Forward)
classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}, t::Real) = classify!(r, x, t, Forward)

classify(x::RealMatrix, t::Real, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, t, ord)
classify(x::RealMatrix, t::Real) = classify(x, t, Forward)
classify(x::AbstractMatrix{<:Real}, t::Real, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, t, ord)
classify(x::AbstractMatrix{<:Real}, t::Real) = classify(x, t, Forward)


## label map
Expand Down Expand Up @@ -154,7 +154,7 @@ labeldecode(lmap::LabelMap{T}, ys::AbstractArray{Int}) where {T} =

## group labels

function groupindices(k::Int, xs::IntegerVector; warning::Bool=true)
function groupindices(k::Int, xs::AbstractVector{<:Integer}; warning::Bool=true)
gs = Array{Vector{Int}}(undef, k)
for i = 1:k
gs[i] = Int[]
Expand Down
66 changes: 33 additions & 33 deletions src/perfeval.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

## correctrate & errorrate

correctrate(gt::IntegerVector, r::IntegerVector) = counteq(gt, r) / length(gt)
errorrate(gt::IntegerVector, r::IntegerVector) = countne(gt, r) / length(gt)
correctrate(gt::AbstractVector{<:Integer}, r::AbstractVector{<:Integer}) = counteq(gt, r) / length(gt)
errorrate(gt::AbstractVector{<:Integer}, r::AbstractVector{<:Integer}) = countne(gt, r) / length(gt)

## confusion matrix

function confusmat(k::Integer, gts::IntegerVector, preds::IntegerVector)
function confusmat(k::Integer, gts::AbstractVector{<:Integer}, preds::AbstractVector{<:Integer})
n = length(gts)
length(preds) == n || throw(DimensionMismatch("Inconsistent lengths."))
R = zeros(Int, k, k)
Expand All @@ -21,7 +21,7 @@ end

## counthits & hitrate

function counthits(gt::IntegerVector, rklst::IntegerMatrix, k::Integer)
function counthits(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, k::Integer)
n = length(gt)
size(rklst, 2) == n || throw(DimensionMismatch("Input dimensions mismatch."))
m = min(size(rklst, 1), Int(k))
Expand All @@ -40,7 +40,7 @@ function counthits(gt::IntegerVector, rklst::IntegerMatrix, k::Integer)
return cnt::Int
end

function counthits(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector)
function counthits(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, ks::AbstractVector{<:Integer})
n = length(gt)
size(rklst, 2) == n || throw(DimensionMismatch("Input dimensions mismatch."))
issorted(ks) || throw(DimensionMismatch("ks must be sorted."))
Expand All @@ -67,10 +67,10 @@ function counthits(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector)
end


hitrate(gt::IntegerVector, rklst::IntegerMatrix, k::Integer) =
hitrate(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, k::Integer) =
(counthits(gt, rklst, k) / length(gt))::Float64

function hitrates(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector)
function hitrates(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, ks::AbstractVector{<:Integer})
n = length(gt)
h = counthits(gt, rklst, ks)
nk = length(ks)
Expand Down Expand Up @@ -124,7 +124,7 @@ f1score(x::ROCNums) = (tp2 = x.tp + x.tp; tp2 / (tp2 + x.fp + x.fn) )
_ispos(x::Bool) = x
_ispos(x::Real) = x > zero(x)

function _roc(gt::IntegerVector, pr)
function _roc(gt::AbstractVector{<:Integer}, pr)
len = length(gt)
length(pr) == len || throw(DimensionMismatch("Inconsistent lengths."))

Expand Down Expand Up @@ -159,14 +159,14 @@ function _roc(gt::IntegerVector, pr)
end

# compute roc numbers based on prediction
roc(gt::IntegerVector, pr::IntegerVector) = _roc(gt, pr)
roc(gt::AbstractVector{<:Integer}, pr::AbstractVector{<:Integer}) = _roc(gt, pr)

##
# BinaryThresPredVec immutates a vector:
#
# v[i] := scores[i] < thres ? 0 : 1
#
struct BinaryThresPredVec{ScoreVec <: RealVector,
struct BinaryThresPredVec{ScoreVec <: AbstractVector{<:Real},
T <: Real,
Ord <: Ordering}
scores::ScoreVec
Expand All @@ -178,19 +178,19 @@ length(v::BinaryThresPredVec) = length(v.scores)
getindex(v::BinaryThresPredVec, i::Integer) = !lt(v.ord, v.scores[i], v.thres)

# compute roc numbers based on scores & threshold
roc(gt::IntegerVector, scores::RealVector, t::Real, ord::Ordering) =
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, t::Real, ord::Ordering) =
_roc(gt, BinaryThresPredVec(scores, t, ord))

roc(gt::IntegerVector, scores::RealVector, thres::Real) =
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, thres::Real) =
roc(gt, scores, thres, Forward)

##
# ThresPredVec immutates a vector:
#
# v[i] := scores[i] < thres ? 0 : preds[i]
#
struct ThresPredVec{PredVec <: IntegerVector,
ScoreVec <: RealVector,
struct ThresPredVec{PredVec <: AbstractVector{<:Integer},
ScoreVec <: AbstractVector{<:Real},
T <: Real,
Ord <: Ordering}

Expand All @@ -201,7 +201,7 @@ struct ThresPredVec{PredVec <: IntegerVector,
end

function ThresPredVec(
preds::PVec, scores::SVec, thres::T, ord::Ord) where {PVec<:IntegerVector,SVec<:RealVector,T<:Real,Ord<:Ordering}
preds::PVec, scores::SVec, thres::T, ord::Ord) where {PVec<:AbstractVector{<:Integer},SVec<:AbstractVector{<:Real},T<:Real,Ord<:Ordering}
n = length(preds)
length(scores) == n || throw(DimensionMismatch("Inconsistent lengths."))
ThresPredVec{PVec,SVec,T,Ord}(preds, scores, thres, ord)
Expand All @@ -211,10 +211,10 @@ length(v::ThresPredVec) = length(v.preds)
getindex(v::ThresPredVec, i::Integer) = ifelse(lt(v.ord, v.scores[i], v.thres), 0, v.preds[i])

# compute roc numbers based on predictions & scores & threshold
roc(gt::IntegerVector, preds::Tuple{PV,SV}, t::Real, ord::Ordering) where {PV<:IntegerVector,SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, t::Real, ord::Ordering) where {PV<:AbstractVector{<:Integer},SV<:AbstractVector{<:Real}} =
_roc(gt, ThresPredVec(preds..., t, ord))

roc(gt::IntegerVector, preds::Tuple{PV,SV}, thres::Real) where {PV<:IntegerVector,SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, thres::Real) where {PV<:AbstractVector{<:Integer},SV<:AbstractVector{<:Real}} =
roc(gt, preds, thres, Forward)


Expand All @@ -226,7 +226,7 @@ roc(gt::IntegerVector, preds::Tuple{PV,SV}, thres::Real) where {PV<:IntegerVecto
# threshold[i] <= x < threshold[i+1] --> i+1
# x >= threshold[n] --> n+1
#
function find_thresbin(x::Real, thresholds::RealVector, ord::Ordering)
function find_thresbin(x::Real, thresholds::AbstractVector{<:Real}, ord::Ordering)
n = length(thresholds)
r = 1
if !lt(ord, x, thresholds[1])
Expand All @@ -244,16 +244,16 @@ function find_thresbin(x::Real, thresholds::RealVector, ord::Ordering)
return r::Int
end

find_thresbin(x::Real, thresholds::RealVector) = find_thresbin(x, thresholds, Forward)
find_thresbin(x::Real, thresholds::AbstractVector{<:Real}) = find_thresbin(x, thresholds, Forward)

lin_thresholds(scores::RealArray, n::Integer, ord::ForwardOrdering) =
lin_thresholds(scores::AbstractArray{<:Real}, n::Integer, ord::ForwardOrdering) =
((s0, s1) = extrema(scores); intv = (s1 - s0) / (n-1); s0:intv:s1)

lin_thresholds(scores::RealArray, n::Integer, ord::ReverseOrdering{ForwardOrdering}) =
lin_thresholds(scores::AbstractArray{<:Real}, n::Integer, ord::ReverseOrdering{ForwardOrdering}) =
((s0, s1) = extrema(scores); intv = (s0 - s1) / (n-1); s1:intv:s0)

# roc for binary predictions
function roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector, ord::Ordering)
function roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, thresholds::AbstractVector{<:Real}, ord::Ordering)
issorted(thresholds, ord) || error("thresholds must be sorted w.r.t. the given ordering.")

ns = length(scores)
Expand Down Expand Up @@ -291,19 +291,19 @@ function roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector, ord:
return r
end

roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector) = roc(gt, scores, thresholds, Forward)
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, thresholds::AbstractVector{<:Real}) = roc(gt, scores, thresholds, Forward)

roc(gt::IntegerVector, scores::RealVector, n::Integer, ord::Ordering) =
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, n::Integer, ord::Ordering) =
roc(gt, scores, lin_thresholds(scores, n, ord), ord)

roc(gt::IntegerVector, scores::RealVector, n::Integer) = roc(gt, scores, n, Forward)
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, n::Integer) = roc(gt, scores, n, Forward)

roc(gt::IntegerVector, scores::RealVector, ord::Ordering) = roc(gt, scores, 100, ord)
roc(gt::IntegerVector, scores::RealVector) = roc(gt, scores, Forward)
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, ord::Ordering) = roc(gt, scores, 100, ord)
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}) = roc(gt, scores, Forward)

# roc for multi-way predictions
function roc(
gt::IntegerVector, preds::Tuple{PV,SV}, thresholds::RealVector, ord::Ordering) where {PV<:IntegerVector,SV<:RealVector}
gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, thresholds::AbstractVector{<:Real}, ord::Ordering) where {PV<:AbstractVector{<:Integer},SV<:AbstractVector{<:Real}}

issorted(thresholds, ord) || error("thresholds must be sorted w.r.t. the given ordering.")
pr::PV = preds[1]
Expand Down Expand Up @@ -354,17 +354,17 @@ function roc(
return r
end

roc(gt::IntegerVector, preds::Tuple{PV,SV}, thresholds::RealVector) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, thresholds::AbstractVector{<:Real}) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} =
roc(gt, preds, thresholds, Forward)

roc(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer, ord::Ordering) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, n::Integer, ord::Ordering) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} =
roc(gt, preds, lin_thresholds(preds[2],n,ord), ord)

roc(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, n::Integer) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} =
roc(gt, preds, n, Forward)

roc(gt::IntegerVector, preds::Tuple{PV,SV}, ord::Ordering) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, ord::Ordering) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} =
roc(gt, preds, 100, ord)

roc(gt::IntegerVector, preds::Tuple{PV,SV}) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} =
roc(gt, preds, Forward)
Loading

2 comments on commit 14bf622

@ararslan
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/89850

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.9.2 -m "<description of version>" 14bf6224f7c92675b139fbfa5bbd9c93d98d0e8a
git push origin v0.9.2

Please sign in to comment.