From a7fb967495ab68dff11562277e01ad3e7f0f855b Mon Sep 17 00:00:00 2001
From: Seth Axen <seth@sethaxen.com>
Date: Mon, 22 May 2023 22:56:03 +0200
Subject: [PATCH] Add OrderStatistic and JointOrderStatistics distributions
 (#1668)

* Add OrderStatistic distribution

* Add JointOrderStatistics distribution

* Simplify implementation of logpdf

* Bug fix

* Add complementary methods

* Avoid code duplication

* Repair cquantile

* Add OrderStatistic tests

* Fix cquantile and add comments

* Actually repair cquantile function

* Add more tests

* Remove suffix

* Remove scrU

* Use correction for multiple tests

* Simplify code

* Use correct distribution name

* Use larger sample size

* Ensure return type is constant

* Swap i and j

* Ensure eltype is returned

* Add JointOrderStatistics tests

* Add reference

* Link between docstrings

* Unify OrderStatistic tests

* More stringently compute number of checks

* Update testset name

* Increase number of draws

* Set seed in testset

* Add an order statistics docs page

* Rename rank variables to `rank` and `ranks`

* Make arg checking more efficient

* Support tuple of ranks

* Apply suggestions from code review

Co-authored-by: David Widmann <devmotion@users.noreply.github.com>

* Use Fill

* Reduce chances of type-instability

* Ensure type-stability and use GammaMTSampler

* Update docstrings

* Apply suggestions from code review

Co-authored-by: David Widmann <devmotion@users.noreply.github.com>

* Add tests that check for empty ranks

* Add comment explaining choice of gamma sampler

* Apply suggestions from code review

Co-authored-by: David Widmann <devmotion@users.noreply.github.com>

* Test for 0 density out of support

* Return -Inf for logpdf if out of support

---------

Co-authored-by: David Widmann <devmotion@users.noreply.github.com>
---
 docs/make.jl                              |   1 +
 docs/src/order_statistics.md              |  16 ++
 src/Distributions.jl                      |   2 +
 src/multivariate/jointorderstatistics.jl  | 168 ++++++++++++++++
 src/multivariates.jl                      |   1 +
 src/univariate/orderstatistic.jl          | 108 ++++++++++
 src/univariates.jl                        |   2 +
 test/multivariate/jointorderstatistics.jl | 232 ++++++++++++++++++++++
 test/runtests.jl                          |   2 +
 test/univariate/orderstatistic.jl         | 232 ++++++++++++++++++++++
 10 files changed, 764 insertions(+)
 create mode 100644 docs/src/order_statistics.md
 create mode 100644 src/multivariate/jointorderstatistics.jl
 create mode 100644 src/univariate/orderstatistic.jl
 create mode 100644 test/multivariate/jointorderstatistics.jl
 create mode 100644 test/univariate/orderstatistic.jl

diff --git a/docs/make.jl b/docs/make.jl
index 39e51d4db6..f95b3f60b3 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -17,6 +17,7 @@ makedocs(
         "reshape.md",
         "cholesky.md",
         "mixture.md",
+        "order_statistics.md",
         "convolution.md",
         "fit.md",
         "extends.md",
diff --git a/docs/src/order_statistics.md b/docs/src/order_statistics.md
new file mode 100644
index 0000000000..cd2bad58bd
--- /dev/null
+++ b/docs/src/order_statistics.md
@@ -0,0 +1,16 @@
+# Order Statistics
+
+The $i$th [Order Statistic](https://en.wikipedia.org/wiki/Order_statistic) of a random sample of size $n$ from a univeriate distribution is the $i$th element after sorting in increasing order.
+As a special case, the first and $n$th order statistics are the minimum and maximum of the sample, while for odd $n$, the $\lceil \frac{n}{2} \rceil$th entry is the sample median.
+
+Given any univariate distribution and the sample size $n$, we can construct the distribution of its $i$th order statistic:
+
+```@docs
+OrderStatistic
+```
+
+If we are interested in more than one order statistic, for continuous univariate distributions we can also construct the joint distribution of order statistics:
+
+```@docs
+JointOrderStatistics
+```
diff --git a/src/Distributions.jl b/src/Distributions.jl
index 489f6435ac..1e2d580c55 100644
--- a/src/Distributions.jl
+++ b/src/Distributions.jl
@@ -108,6 +108,7 @@ export
     IsoNormal,
     IsoNormalCanon,
     JohnsonSU,
+    JointOrderStatistics,
     Kolmogorov,
     KSDist,
     KSOneSided,
@@ -144,6 +145,7 @@ export
     Normal,
     NormalCanon,
     NormalInverseGaussian,
+    OrderStatistic,
     Pareto,
     PGeneralizedGaussian,
     SkewedExponentialPower,
diff --git a/src/multivariate/jointorderstatistics.jl b/src/multivariate/jointorderstatistics.jl
new file mode 100644
index 0000000000..71c3f93bf3
--- /dev/null
+++ b/src/multivariate/jointorderstatistics.jl
@@ -0,0 +1,168 @@
+# Implementation based on chapters 2-4 of
+# Arnold, Barry C., Narayanaswamy Balakrishnan, and Haikady Navada Nagaraja.
+# A first course in order statistics. Society for Industrial and Applied Mathematics, 2008.
+
+"""
+    JointOrderStatistics <: ContinuousMultivariateDistribution
+
+The joint distribution of a subset of order statistics from a sample from a continuous
+univariate distribution.
+
+    JointOrderStatistics(
+        dist::ContinuousUnivariateDistribution,
+        n::Int,
+        ranks=Base.OneTo(n);
+        check_args::Bool=true,
+    )
+
+Construct the joint distribution of order statistics for the specified `ranks` from an IID
+sample of size `n` from `dist`.
+
+The ``i``th order statistic of a sample is the ``i``th element of the sorted sample.
+For example, the 1st order statistic is the sample minimum, while the ``n``th order
+statistic is the sample maximum.
+
+`ranks` must be a sorted vector or tuple of unique `Int`s between 1 and `n`.
+
+For a single order statistic, use [`OrderStatistic`](@ref) instead.
+
+## Examples
+
+```julia
+JointOrderStatistics(Normal(), 10)           # Product(fill(Normal(), 10)) restricted to ordered vectors
+JointOrderStatistics(Cauchy(), 10, 2:9)      # joint distribution of all but the extrema
+JointOrderStatistics(Cauchy(), 10, (1, 10))  # joint distribution of only the extrema
+```
+"""
+struct JointOrderStatistics{
+    D<:ContinuousUnivariateDistribution,R<:Union{AbstractVector{Int},Tuple{Int,Vararg{Int}}}
+} <: ContinuousMultivariateDistribution
+    dist::D
+    n::Int
+    ranks::R
+    function JointOrderStatistics(
+        dist::ContinuousUnivariateDistribution,
+        n::Int,
+        ranks::Union{AbstractVector{Int},Tuple{Int,Vararg{Int}}}=Base.OneTo(n);
+        check_args::Bool=true,
+    )
+        @check_args(
+            JointOrderStatistics,
+            (n, n ≥ 1, "`n` must be a positive integer."),
+            (
+                ranks,
+                _are_ranks_valid(ranks, n),
+                "`ranks` must be a sorted vector or tuple of unique integers between 1 and `n`.",
+            ),
+        )
+        return new{typeof(dist),typeof(ranks)}(dist, n, ranks)
+    end
+end
+
+_islesseq(x, y) = isless(x, y) || isequal(x, y)
+
+function _are_ranks_valid(ranks, n)
+    # this is equivalent to but faster than
+    # issorted(ranks) && allunique(ranks)
+    !isempty(ranks) && first(ranks) ≥ 1 && last(ranks) ≤ n && issorted(ranks; lt=_islesseq)
+end
+function _are_ranks_valid(ranks::AbstractRange, n)
+    !isempty(ranks) && first(ranks) ≥ 1 && last(ranks) ≤ n && step(ranks) > 0
+end
+
+length(d::JointOrderStatistics) = length(d.ranks)
+function insupport(d::JointOrderStatistics, x::AbstractVector)
+    length(d) == length(x) || return false
+    xi, state = iterate(x) # at least one element!
+    dist = d.dist
+    insupport(dist, xi) || return false
+    while (xj_state = iterate(x, state)) !== nothing
+        xj, state = xj_state
+        xj ≥ xi && insupport(dist, xj) || return false
+        xi = xj
+    end
+    return true
+end
+minimum(d::JointOrderStatistics) = Fill(minimum(d.dist), length(d))
+maximum(d::JointOrderStatistics) = Fill(maximum(d.dist), length(d))
+
+params(d::JointOrderStatistics) = tuple(params(d.dist)..., d.n, d.ranks)
+partype(d::JointOrderStatistics) = partype(d.dist)
+Base.eltype(::Type{<:JointOrderStatistics{D}}) where {D} = Base.eltype(D)
+Base.eltype(d::JointOrderStatistics) = eltype(d.dist)
+
+function logpdf(d::JointOrderStatistics, x::AbstractVector{<:Real})
+    n = d.n
+    ranks = d.ranks
+    lp = loglikelihood(d.dist, x)
+    T = typeof(lp)
+    lp += loggamma(T(n + 1))
+    if length(ranks) == n
+        issorted(x) && return lp
+        return oftype(lp, -Inf)
+    end
+    i = first(ranks)
+    xᵢ = first(x)
+    if i > 1  # _marginalize_range(d.dist, 0, i, -Inf, xᵢ, T)
+        lp += (i - 1) * logcdf(d.dist, xᵢ) - loggamma(T(i))
+    end
+    for (j, xⱼ) in Iterators.drop(zip(ranks, x), 1)
+        xⱼ < xᵢ && return oftype(lp, -Inf)
+        lp += _marginalize_range(d.dist, i, j, xᵢ, xⱼ, T)
+        i = j
+        xᵢ = xⱼ
+    end
+    if i < n  # _marginalize_range(d.dist, i, n + 1, xᵢ, Inf, T)
+        lp += (n - i) * logccdf(d.dist, xᵢ) - loggamma(T(n - i + 1))
+    end
+    return lp
+end
+
+# given ∏ₖf(xₖ), marginalize all xₖ for i < k < j
+function _marginalize_range(dist, i, j, xᵢ, xⱼ, T)
+    k = j - i - 1
+    k == 0 && return zero(T)
+    return k * T(logdiffcdf(dist, xⱼ, xᵢ)) - loggamma(T(k + 1))
+end
+
+function _rand!(rng::AbstractRNG, d::JointOrderStatistics, x::AbstractVector{<:Real})
+    n = d.n
+    if n == length(d.ranks)  # ranks == 1:n
+        # direct method, slower than inversion method for large `n` and distributions with
+        # fast quantile function or that use inversion sampling
+        rand!(rng, d.dist, x)
+        sort!(x)
+    else
+        # use exponential generation method with inversion, where for gaps in the ranks, we
+        # use the fact that the sum Y of k IID variables xₘ ~ Exp(1) is Y ~ Gamma(k, 1).
+        # Lurie, D., and H. O. Hartley. "Machine-generation of order statistics for Monte
+        # Carlo computations." The American Statistician 26.1 (1972): 26-27.
+        # this is slow if length(d.ranks) is close to n and quantile for d.dist is expensive,
+        # but this branch is probably taken when length(d.ranks) is small or much smaller than n.
+        T = typeof(one(eltype(x)))
+        s = zero(eltype(x))
+        i = 0
+        for (m, j) in zip(eachindex(x), d.ranks)
+            k = j - i
+            if k > 1
+                # specify GammaMTSampler directly to avoid unnecessarily checking the shape
+                # parameter again and because it has been benchmarked to be the fastest for
+                # shape k ≥ 1 and scale 1
+                s += T(rand(rng, GammaMTSampler(Gamma{T}(T(k), T(1)))))
+            else
+                s += randexp(rng, T)
+            end
+            i = j
+            x[m] = s
+        end
+        j = n + 1
+        k = j - i
+        if k > 1
+            s += T(rand(rng, GammaMTSampler(Gamma{T}(T(k), T(1)))))
+        else
+            s += randexp(rng, T)
+        end
+        x .= quantile.(d.dist, x ./ s)
+    end
+    return x
+end
diff --git a/src/multivariates.jl b/src/multivariates.jl
index 477c78ba5a..7a6f926a65 100644
--- a/src/multivariates.jl
+++ b/src/multivariates.jl
@@ -112,6 +112,7 @@ end
 for fname in ["dirichlet.jl",
               "multinomial.jl",
               "dirichletmultinomial.jl",
+              "jointorderstatistics.jl",
               "mvnormal.jl",
               "mvnormalcanon.jl",
               "mvlognormal.jl",
diff --git a/src/univariate/orderstatistic.jl b/src/univariate/orderstatistic.jl
new file mode 100644
index 0000000000..1a7055ef91
--- /dev/null
+++ b/src/univariate/orderstatistic.jl
@@ -0,0 +1,108 @@
+# Implementation based on chapters 2-4 of
+# Arnold, Barry C., Narayanaswamy Balakrishnan, and Haikady Navada Nagaraja.
+# A first course in order statistics. Society for Industrial and Applied Mathematics, 2008.
+
+"""
+    OrderStatistic{D<:UnivariateDistribution,S<:ValueSupport} <: UnivariateDistribution{S}
+
+The distribution of an order statistic from IID samples from a univariate distribution.
+
+    OrderStatistic(dist::UnivariateDistribution, n::Int, rank::Int; check_args::Bool=true)
+
+Construct the distribution of the `rank` ``=i``th order statistic from `n` independent
+samples from `dist`.
+
+The ``i``th order statistic of a sample is the ``i``th element of the sorted sample.
+For example, the 1st order statistic is the sample minimum, while the ``n``th order
+statistic is the sample maximum.
+
+If ``f`` is the probability density (mass) function of `dist` with distribution function
+``F``, then the probability density function ``g`` of the order statistic for continuous
+`dist` is
+```math
+g(x; n, i) = {n \\choose i} [F(x)]^{i-1} [1 - F(x)]^{n-i} f(x),
+```
+and the probability mass function ``g`` of the order statistic for discrete `dist` is
+```math
+g(x; n, i) = \\sum_{k=i}^n {n \\choose k} \\left( [F(x)]^k [1 - F(x)]^{n-k} - [F(x_-)]^k [1 - F(x_-)]^{n-k} \\right),
+```
+where ``x_-`` is the largest element in the support of `dist` less than ``x``.
+
+For the joint distribution of a subset of order statistics, use
+[`JointOrderStatistics`](@ref) instead.
+
+## Examples
+
+```julia
+OrderStatistic(Cauchy(), 10, 1)              # distribution of the sample minimum
+OrderStatistic(DiscreteUniform(10), 10, 10)  # distribution of the sample maximum
+OrderStatistic(Gamma(1, 1), 11, 5)           # distribution of the sample median
+```
+"""
+struct OrderStatistic{D<:UnivariateDistribution,S<:ValueSupport} <:
+       UnivariateDistribution{S}
+    dist::D
+    n::Int
+    rank::Int
+    function OrderStatistic(
+        dist::UnivariateDistribution, n::Int, rank::Int; check_args::Bool=true
+    )
+        @check_args(OrderStatistic, 1 ≤ rank ≤ n)
+        return new{typeof(dist),value_support(typeof(dist))}(dist, n, rank)
+    end
+end
+
+minimum(d::OrderStatistic) = minimum(d.dist)
+maximum(d::OrderStatistic) = maximum(d.dist)
+insupport(d::OrderStatistic, x::Real) = insupport(d.dist, x)
+
+params(d::OrderStatistic) = tuple(params(d.dist)..., d.n, d.rank)
+partype(d::OrderStatistic) = partype(d.dist)
+Base.eltype(::Type{<:OrderStatistic{D}}) where {D} = Base.eltype(D)
+Base.eltype(d::OrderStatistic) = eltype(d.dist)
+
+# distribution of the ith order statistic from an IID uniform distribution, with CDF Uᵢₙ(x)
+function _uniform_orderstatistic(d::OrderStatistic)
+    n = d.n
+    rank = d.rank
+    return Beta{Int}(rank, n - rank + 1)
+end
+
+function logpdf(d::OrderStatistic, x::Real)
+    b = _uniform_orderstatistic(d)
+    p = cdf(d.dist, x)
+    if value_support(typeof(d)) === Continuous
+        return logpdf(b, p) + logpdf(d.dist, x)
+    else
+        return logdiffcdf(b, p, p - pdf(d.dist, x))
+    end
+end
+
+for f in (:logcdf, :logccdf, :cdf, :ccdf)
+    @eval begin
+        function $f(d::OrderStatistic, x::Real)
+            b = _uniform_orderstatistic(d)
+            return $f(b, cdf(d.dist, x))
+        end
+    end
+end
+
+for f in (:quantile, :cquantile)
+    @eval begin
+        function $f(d::OrderStatistic, p::Real)
+            # since cdf is Fᵢₙ(x) = Uᵢₙ(Fₓ(x)), and Uᵢₙ is invertible and increasing, we
+            # have Fₓ(x) = Uᵢₙ⁻¹(Fᵢₙ(x)). then quantile function is
+            # Qᵢₙ(p) = inf{x: p ≤ Fᵢₙ(x)} = inf{x: Uᵢₙ⁻¹(p) ≤ Fₓ(x)} = Qₓ(Uᵢₙ⁻¹(p))
+            b = _uniform_orderstatistic(d)
+            return quantile(d.dist, $f(b, p))
+        end
+    end
+end
+
+function rand(rng::AbstractRNG, d::OrderStatistic)
+    # inverse transform sampling. Since quantile function is Qₓ(Uᵢₙ⁻¹(p)), we draw a random
+    # variable from Uᵢₙ and pass it through the quantile function of `d.dist`
+    T = eltype(d.dist)
+    b = _uniform_orderstatistic(d)
+    return T(quantile(d.dist, rand(rng, b)))
+end
diff --git a/src/univariates.jl b/src/univariates.jl
index 16d1a2e71e..726fd8e429 100644
--- a/src/univariates.jl
+++ b/src/univariates.jl
@@ -731,3 +731,5 @@ end
 for dname in continuous_distributions
     include(joinpath("univariate", "continuous", "$(dname).jl"))
 end
+
+include(joinpath("univariate", "orderstatistic.jl"))
diff --git a/test/multivariate/jointorderstatistics.jl b/test/multivariate/jointorderstatistics.jl
new file mode 100644
index 0000000000..d5d65a752e
--- /dev/null
+++ b/test/multivariate/jointorderstatistics.jl
@@ -0,0 +1,232 @@
+using Distributions, LinearAlgebra, Random, SpecialFunctions, Statistics, Test
+
+@testset "JointOrderStatistics" begin
+    Random.seed!(123)
+
+    @testset "check_args" begin
+        dist = Normal()
+        JointOrderStatistics(dist, 2, 1:2)
+        JointOrderStatistics(dist, 3, 2:3)
+        JointOrderStatistics(dist, 5, [2, 3])
+        @test_throws DomainError JointOrderStatistics(dist, 0, 1:2)
+        @test_throws DomainError JointOrderStatistics(dist, 2, 2:3)
+        @test_throws DomainError JointOrderStatistics(dist, 3, 0:3)
+        @test_throws DomainError JointOrderStatistics(dist, 5, 3:-1:2)
+        @test_throws DomainError JointOrderStatistics(dist, 5, 2:1:1)
+        @test_throws DomainError JointOrderStatistics(dist, 0, [1, 2])
+        @test_throws DomainError JointOrderStatistics(dist, 2, [2, 3])
+        @test_throws DomainError JointOrderStatistics(dist, 3, [0, 1, 2, 3])
+        @test_throws DomainError JointOrderStatistics(dist, 5, Int[])
+        @test_throws DomainError JointOrderStatistics(dist, 5, (3, 2))
+        @test_throws DomainError JointOrderStatistics(dist, 5, (3, 3))
+        JointOrderStatistics(dist, 0, 1:2; check_args=false)
+        JointOrderStatistics(dist, 2, 2:3; check_args=false)
+        JointOrderStatistics(dist, 3, 0:3; check_args=false)
+        JointOrderStatistics(dist, 5, 3:-1:2; check_args=false)
+        JointOrderStatistics(dist, 5, 2:1:1; check_args=false)
+        JointOrderStatistics(dist, 0, [1, 2]; check_args=false)
+        JointOrderStatistics(dist, 2, [2, 3]; check_args=false)
+        JointOrderStatistics(dist, 3, [0, 1, 2, 3]; check_args=false)
+        JointOrderStatistics(dist, 5, Int[]; check_args=false)
+        JointOrderStatistics(dist, 5, (3, 2); check_args=false)
+        JointOrderStatistics(dist, 5, (3, 3); check_args=false)
+    end
+
+    @testset for T in [Float32, Float64],
+        dist in [Uniform(T(2), T(10)), Exponential(T(10)), Normal(T(100), T(10))],
+        n in [16, 40],
+        r in [
+            1:n,
+            ([i, j] for j in 2:n for i in 1:min(10, j - 1))...,
+            vcat(2:4, (n - 10):(n - 5)),
+            (2, n ÷ 2, n - 5),
+        ]
+
+        d = JointOrderStatistics(dist, n, r)
+
+        @testset "basic" begin
+            @test d isa JointOrderStatistics
+            @test d.dist === dist
+            @test d.n === n
+            @test d.ranks === r
+            @test length(d) == length(r)
+            @test params(d) == (params(dist)..., d.n, d.ranks)
+            @test partype(d) === partype(dist)
+            @test eltype(d) === eltype(dist)
+
+            length(r) == n && @test JointOrderStatistics(dist, n) == d
+        end
+
+        @testset "support" begin
+            @test minimum(d) == fill(minimum(dist), length(r))
+            @test maximum(d) == fill(maximum(dist), length(r))
+            x = sort(rand(dist, length(r)))
+            x2 = sort(rand(dist, length(r) + 1))
+            @test insupport(d, x)
+            if length(x) > 1
+                @test !insupport(d, reverse(x))
+                @test !insupport(d, x[1:(end - 1)])
+            end
+            @test !insupport(d, x2)
+            @test !insupport(d, fill(NaN, length(x)))
+        end
+
+        @testset "pdf/logpdf" begin
+            x = convert(Vector{T}, sort(rand(dist, length(r))))
+            @test @inferred(logpdf(d, x)) isa T
+            @test @inferred(pdf(d, x)) isa T
+
+            if length(r) == 1
+                @test logpdf(d, x) ≈ logpdf(OrderStatistic(dist, n, r[1]), x[1])
+                @test pdf(d, x) ≈ pdf(OrderStatistic(dist, n, r[1]), x[1])
+            elseif length(r) == 2
+                i, j = r
+                xi, xj = x
+                lc = T(
+                    logfactorial(n) - logfactorial(i - 1) - logfactorial(n - j) -
+                    logfactorial(j - i - 1),
+                )
+                lp = (
+                    lc +
+                    (i - 1) * logcdf(dist, xi) +
+                    (n - j) * logccdf(dist, xj) +
+                    (j - i - 1) * logdiffcdf(dist, xj, xi) +
+                    logpdf(dist, xi) +
+                    logpdf(dist, xj)
+                )
+                @test logpdf(d, x) ≈ lp
+                @test pdf(d, x) ≈ exp(lp)
+            elseif collect(r) == 1:n
+                @test logpdf(d, x) ≈ sum(Base.Fix1(logpdf, d.dist), x) + loggamma(T(n + 1))
+                @test pdf(d, x) ≈ exp(logpdf(d, x))
+            end
+
+            @testset "no density for vectors out of support" begin
+                # check unsorted vectors have 0 density
+                x2 = copy(x)
+                x2[1], x2[2] = x2[2], x2[1]
+                @test logpdf(d, x2) == T(-Inf)
+                @test pdf(d, x2) == zero(T)
+
+                x3 = copy(x)
+                x3[end-1], x3[end] = x3[end], x3[end-1]
+                @test logpdf(d, x3) == T(-Inf)
+                @test pdf(d, x3) == zero(T)
+
+                # check out of support of original distribution
+                if islowerbounded(dist)
+                    x4 = copy(x)
+                    x4[1] = minimum(dist) - 1
+                    @test logpdf(d, x4) == T(-Inf)
+                    @test pdf(d, x4) == zero(T)
+                end
+            end
+        end
+    end
+
+    @testset "rand" begin
+        @testset for T in [Float32, Float64]
+            dist = Uniform(T(-2), T(1))
+            d = JointOrderStatistics(dist, 10, 1:10)
+            S = typeof(rand(dist))
+
+            v = rand(d)
+            @test v isa Vector{S}
+            @test insupport(d, v)
+            @test size(v) == (10,)
+
+            rng = Random.default_rng()
+            Random.seed!(rng, 42)
+            x = @inferred(rand(rng, d, 20))
+            @test x isa Matrix{S}
+            @test size(x) == (10, 20)
+            @test all(xi -> insupport(d, xi), eachcol(x))
+
+            Random.seed!(rng, 42)
+            x2 = rand(rng, d, 20)
+            @test x2 == x
+        end
+
+        ndraws = 300_000
+        dists = [Uniform(), Exponential()]
+
+        @testset "marginal mean and standard deviation" begin
+            n = 20
+            rs = [1:n, [1, n], vcat(1:7, 12:17)]
+            @testset for dist in dists, r in rs
+                d = JointOrderStatistics(dist, n, r)
+                x = rand(d, ndraws)
+                @test all(xi -> insupport(d, xi), eachcol(x))
+
+                m = mean(x; dims=2)
+                v = var(x; mean=m, dims=2)
+                if dist isa Uniform
+                    # Arnold (2008). A first course in order statistics. eq 2.2.20-21
+                    m_exact = r ./ (n + 1)
+                    v_exact = @. (m_exact * (1 - m_exact) / (n + 2))
+                elseif dist isa Exponential
+                    # Arnold (2008). A first course in order statistics. eq 4.6.6-7
+                    m_exact = [sum(k -> inv(n - k + 1), 1:i) for i in r]
+                    v_exact = [sum(k -> inv((n - k + 1)^2), 1:i) for i in r]
+                end
+                # compute asymptotic sample standard deviation
+                mean_std = @. sqrt(v_exact / ndraws)
+                m4 = dropdims(mapslices(xi -> moment(xi, 4), x; dims=2); dims=2)
+                var_std = @. sqrt((m4 - v_exact^2) / ndraws)
+
+                nchecks = length(r)
+                α = (0.01 / nchecks) / 2  # multiple correction
+                tol = quantile(Normal(), 1 - α)
+                for k in eachindex(m, m_exact, v, v_exact, mean_std, var_std)
+                    @test m[k] ≈ m_exact[k] atol = (tol * mean_std[k])
+                    @test v[k] ≈ v_exact[k] atol = (tol * var_std[k])
+                end
+            end
+        end
+
+        @testset "pairwise correlations" begin
+            n = 100
+            rs = [    # good mixture of r values with gaps and no gaps
+                1:n,
+                vcat(1:10, (div(n, 2) - 5):(div(n, 2) + 5), (n - 9):n),
+                vcat(10:20, (n - 19):(n - 10)),
+                (1, n),
+            ]
+
+            nchecks = length(dists) * sum(rs) do r
+                m = length(r)
+                return div(m * (m - 1), 2)
+            end
+            α = (0.01 / nchecks) / 2  # multiple correction
+            tol = quantile(Normal(), 1 - α) / sqrt(ndraws)
+
+            @testset for dist in dists, r in rs
+                d = JointOrderStatistics(dist, n, r)
+                x = rand(d, ndraws)
+                @test all(xi -> insupport(d, xi), eachcol(x))
+
+                m = length(r)
+
+                xcor = cor(x; dims=2)
+                if dist isa Uniform
+                    # Arnold (2008). A first course in order statistics. Eq 2.3.16
+                    s = @. n - r + 1
+                    xcor_exact = Symmetric(sqrt.((r .* collect(s)') ./ (collect(r)' .* s)))
+                elseif dist isa Exponential
+                    # Arnold (2008).  A first course in order statistics. Eq 4.6.8
+                    v = [sum(k -> inv((n - k + 1)^2), 1:i) for i in r]
+                    xcor_exact = Symmetric(sqrt.(v ./ v'))
+                end
+                for ii in 1:m, ji in (ii + 1):m
+                    i = r[ii]
+                    j = r[ji]
+                    ρ = xcor[ii, ji]
+                    ρ_exact = xcor_exact[ii, ji]
+                    # use variance-stabilizing transformation, recommended in §3.6 of
+                    # Van der Vaart, A. W. (2000). Asymptotic statistics (Vol. 3).
+                    @test atanh(ρ) ≈ atanh(ρ_exact) atol = tol
+                end
+            end
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 9c8f5b3a4b..cb724278b4 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -88,6 +88,8 @@ const tests = [
     "univariate/continuous/skewedexponentialpower",
     "univariate/discrete/discreteuniform",
     "univariate/continuous/tdist",
+    "univariate/orderstatistic",
+    "multivariate/jointorderstatistics",
     "multivariate/product",
     "eachvariate",
     "univariate/continuous/triangular",
diff --git a/test/univariate/orderstatistic.jl b/test/univariate/orderstatistic.jl
new file mode 100644
index 0000000000..5cd65cb856
--- /dev/null
+++ b/test/univariate/orderstatistic.jl
@@ -0,0 +1,232 @@
+using Test, Distributions
+using Random
+using StatsBase
+
+@testset "OrderStatistic" begin
+    @testset "basic" begin
+        for dist in [Uniform(), Normal(), DiscreteUniform(10)], n in [1, 2, 10], i in 1:n
+            d = OrderStatistic(dist, n, i)
+            @test d isa OrderStatistic
+            if dist isa DiscreteUnivariateDistribution
+                @test d isa DiscreteUnivariateDistribution
+            else
+                @test d isa ContinuousUnivariateDistribution
+            end
+            @test d.dist === dist
+            @test d.n == n
+            @test d.rank == i
+        end
+        @test_throws ArgumentError OrderStatistic(Normal(), 0, 1)
+        OrderStatistic(Normal(), 0, 1; check_args=false)
+        @test_throws ArgumentError OrderStatistic(Normal(), 10, 11)
+        OrderStatistic(Normal(), 10, 11; check_args=false)
+        @test_throws ArgumentError OrderStatistic(Normal(), 10, 0)
+        OrderStatistic(Normal(), 10, 0; check_args=false)
+    end
+
+    @testset "params" begin
+        for dist in [Uniform(), Normal(), DiscreteUniform(10)], n in [1, 2, 10], i in 1:n
+            d = OrderStatistic(dist, n, i)
+            @test params(d) == (params(dist)..., n, i)
+            @test partype(d) === partype(dist)
+        end
+    end
+
+    @testset "support" begin
+        n = 10
+        for i in 1:10
+            d1 = OrderStatistic(Uniform(), n, i)
+            @test minimum(d1) == 0
+            @test maximum(d1) == 1
+            @test insupport(d1, 0)
+            @test insupport(d1, 0.5)
+            @test insupport(d1, 1)
+            @test !insupport(d1, -eps())
+            @test !insupport(d1, 1 + eps())
+            @test !hasfinitesupport(d1)
+            @test islowerbounded(d1)
+            @test isupperbounded(d1)
+
+            d2 = OrderStatistic(Normal(), n, i)
+            @test minimum(d2) == -Inf
+            @test maximum(d2) == Inf
+            @test insupport(d2, -Inf)
+            @test insupport(d2, 0)
+            @test insupport(d2, Inf)
+            @test !hasfinitesupport(d2)
+            @test !islowerbounded(d2)
+            @test !isupperbounded(d2)
+
+            d3 = OrderStatistic(DiscreteUniform(1, 10), n, i)
+            @test minimum(d3) == 1
+            @test maximum(d3) == 10
+            @test insupport(d3, 1)
+            @test insupport(d3, 5)
+            @test insupport(d3, 10)
+            @test !insupport(d3, 0)
+            @test !insupport(d3, 11)
+            @test hasfinitesupport(d3)
+            @test islowerbounded(d3)
+            @test isupperbounded(d3)
+        end
+    end
+
+    @testset "pdf/logpdf" begin
+        @testset "continuous" begin
+            # test against the exact formula computed using BigFloats
+            @testset for T in (Float32, Float64)
+                @testset for dist in
+                             [Uniform(T(-2), T(1)), Normal(T(3), T(2)), Exponential(T(10))],
+                    n in [1, 10, 100],
+                    i in 1:n
+
+                    d = OrderStatistic(dist, n, i)
+                    c = factorial(big(n)) / factorial(big(i - 1)) / factorial(big(n - i))
+                    # since density is concentrated around the i/n quantile, sample a point
+                    # nearby it
+                    x = quantile(dist, clamp(i / n + (rand() - 1//2) / 10, 0, 1))
+                    p = cdf(dist, big(x))
+                    pdf_exp = c * p^(i - 1) * (1 - p)^(n - i) * pdf(dist, big(x))
+                    @test @inferred(T, pdf(d, x)) ≈ T(pdf_exp)
+                    @test @inferred(T, logpdf(d, x)) ≈ T(log(pdf_exp))
+                end
+            end
+        end
+        @testset "discrete" begin
+            # test check that the pdf is the difference of the CDF at adjacent points
+            @testset for dist in
+                         [DiscreteUniform(10, 30), Poisson(100.0), Binomial(20, 0.3)],
+                n in [1, 10, 100],
+                i in 1:n
+
+                d = OrderStatistic(dist, n, i)
+                xs = quantile(dist, 0.01):quantile(dist, 0.99)
+                for x in xs
+                    p = @inferred pdf(d, x)
+                    lp = @inferred logpdf(d, x)
+                    @test lp ≈ logdiffcdf(d, x, x - 1)
+                    @test p ≈ exp(lp)
+                end
+            end
+        end
+    end
+
+    @testset "distribution normalizes to 1" begin
+        @testset for dist in [
+                Uniform(-2, 1),
+                Normal(2, 3),
+                Exponential(5),
+                DiscreteUniform(10, 40),
+                Poisson(100),
+            ],
+            n in [1, 10, 20],
+            i in 1:n
+
+            d = OrderStatistic(dist, n, i)
+            Distributions.expectation(one, d) ≈ 1
+        end
+    end
+
+    @testset "cdf/logcdf/ccdf/logccdf/quantile/cquantile" begin
+        # test against the exact formula computed using BigFloats
+        @testset for T in (Float32, Float64)
+            dists = [
+                (Uniform(T(-2), T(1)), Uniform(big(-2), big(1))),
+                (Normal(T(3), T(2)), Normal(big(3), big(2))),
+                (Exponential(T(10)), Exponential(big(10))),
+                (DiscreteUniform(1, 10), DiscreteUniform(1, 10)),
+                (Poisson(T(20)), Poisson(big(20))),
+            ]
+            @testset for (dist, bigdist) in dists, n in [10, 100], i in 1:n
+                dist isa DiscreteDistribution && T !== Float64 && continue
+                d = OrderStatistic(dist, n, i)
+                # since density is concentrated around the i/n quantile, sample a point
+                # nearby it
+                x = quantile(dist, clamp(i / n + (rand() - 1//2) / 10, 1e-4, 1 - 1e-4))
+                p = cdf(bigdist, big(x))
+                cdf_exp = sum(i:n) do j
+                    c = binomial(big(n), big(j))
+                    return c * p^j * (1 - p)^(n - j)
+                end
+                @test @inferred(T, cdf(d, x)) ≈ T(cdf_exp)
+                @test cdf(d, maximum(d)) ≈ one(T)
+                @test cdf(d, minimum(d) - 1) ≈ zero(T)
+                @test @inferred(T, logcdf(d, x)) ≈ T(log(cdf_exp))
+                @test logcdf(d, maximum(d)) ≈ zero(T)
+                @test logcdf(d, minimum(d) - 1) ≈ -Inf
+                @test @inferred(T, ccdf(d, x)) ≈ T(1 - cdf_exp)
+                @test ccdf(d, maximum(d)) ≈ zero(T)
+                @test ccdf(d, minimum(d) - 1) ≈ one(T)
+                @test @inferred(T, logccdf(d, x)) ≈ T(log(1 - cdf_exp))
+                @test logccdf(d, maximum(d)) ≈ -Inf
+                @test logccdf(d, minimum(d) - 1) ≈ zero(T)
+                q = cdf(d, x)
+                if dist isa DiscreteDistribution
+                    # for discrete distributions, tiny numerical error can cause the wrong
+                    # integer value to be returned.
+                    q -= sqrt(eps(T))
+                end
+                xq = @inferred(T, quantile(d, q))
+                xqc = @inferred(T, cquantile(d, 1 - q))
+                @test xq ≈ xqc
+                @test isapprox(xq, T(x); atol=1e-4) ||
+                    (dist isa DiscreteDistribution && xq < x)
+            end
+        end
+    end
+
+    @testset "rand" begin
+        @testset for T in [Float32, Float64],
+            dist in [Uniform(T(-2), T(1)), Normal(T(1), T(2))]
+
+            d = OrderStatistic(dist, 10, 5)
+            rng = Random.default_rng()
+            Random.seed!(rng, 42)
+            x = @inferred(rand(rng, d))
+            xs = @inferred(rand(rng, d, 10))
+            S = eltype(rand(dist))
+            @test typeof(x) === S
+            @test eltype(xs) === S
+            @test length(xs) == 10
+
+            Random.seed!(rng, 42)
+            x2 = rand(rng, d)
+            xs2 = rand(rng, d, 10)
+            @test x2 == x
+            @test xs2 == xs
+        end
+
+        ndraws = 100_000
+        nchecks = 4 * 2 * 111  # NOTE: update if the below number of tests changes
+        α = (0.01 / nchecks) / 2  # multiple correction
+        tol = quantile(Normal(), 1 - α)
+
+        @testset for dist in [Uniform(), Exponential(), Poisson(20), Binomial(20, 0.3)]
+            @testset for n in [1, 10, 100], i in 1:n
+                d = OrderStatistic(dist, n, i)
+                x = rand(d, ndraws)
+                m, v = mean_and_var(x)
+                if dist isa Uniform
+                    # Arnold (2008). A first course in order statistics. Eqs 2.2.20-21
+                    m_exact = i / (n + 1)
+                    v_exact = m_exact * (1 - m_exact) / (n + 2)
+                elseif dist isa Exponential
+                    # Arnold (2008). A first course in order statistics. Eqs 4.6.6-7
+                    m_exact = sum(k -> inv(n - k + 1), 1:i)
+                    v_exact = sum(k -> inv((n - k + 1)^2), 1:i)
+                elseif dist isa DiscreteUnivariateDistribution
+                    # estimate mean and variance with explicit sum, Eqs 3.2.6-7 from
+                    # Arnold (2008). A first course in order statistics.
+                    xs = 0:quantile(dist, 0.9999)
+                    m_exact = sum(x -> ccdf(d, x), xs)
+                    v_exact = 2 * sum(x -> x * ccdf(d, x), xs) + m_exact - m_exact^2
+                end
+                # compute asymptotic sample standard deviation
+                mean_std = sqrt(v_exact / ndraws)
+                var_std = sqrt((moment(x, 4) - v_exact^2) / ndraws)
+                @test m ≈ m_exact atol = (tol * mean_std)
+                @test v ≈ v_exact atol = (tol * var_std)
+            end
+        end
+    end
+end