Speed up RGB <--> XYZ conversions again

kimikage · Oct 11, 2019 · f789000 · f789000
1 parent 1dcc83c
commit f789000
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 18 deletions.
diff --git a/src/conversions.jl b/src/conversions.jl
@@ -74,9 +74,8 @@ correct_gamut(c::CV) where {CV<:AbstractRGB} = CV(clamp01(red(c)), clamp01(green
 clamp01(v::T) where {T<:Fractional} = ifelse(v < zero(T), zero(T), ifelse(v > one(T), one(T), v))
 
 function srgb_compand(v::Fractional)
-    # the following is an optimization technique for `1.055v^(1/2.4) - 0.055`.
-    # x^y ≈ exp(y*log(x)) ≈ exp2(y*log2(y)); the middle form is faster
-    v <= 0.0031308 ? 12.92v : 1.055 * exp(1/2.4 * log(v)) - 0.055
+    # `pow5_12` is an optimized function to get `v^(1/2.4)`
+    v <= 0.0031308 ? 12.92v : 1.055 * pow5_12(v) - 0.055
 end
 
 cnvt(::Type{CV}, c::AbstractRGB) where {CV<:AbstractRGB} = CV(red(c), green(c), blue(c))
@@ -271,17 +270,30 @@ cnvt(::Type{HSI{T}}, c::Color3) where {T} = cnvt(HSI{T}, convert(RGB{T}, c))
 # -----------------
 
 function invert_srgb_compand(v::Fractional)
-    v <= 0.04045 && return v/12.92
-    # the following is an optimization technique for `((v+0.055) /1.055)^2.4`.
-    # see also: srgb_compand(v::Fractional)
-    x = (v + 0.055) / 1.055
-    return x^2 * exp(0.4 * log(x)) # 2.4 == 2 + 0.4
+    # `pow12_5` is an optimized function to get `x^2.4`
+    v <= 0.04045 ? 1/12.92 * v : pow12_5(1/1.055 * (v + 0.055))
 end
 
-const invert_srgb_compand_n0f8 = [invert_srgb_compand(v/255) for v = 0:255] # LUT
+# lookup table for `N0f8` (the extra two elements are for `Float32` splines)
+const invert_srgb_compand_n0f8 = [invert_srgb_compand(v/255.0) for v = 0:257]
 
 function invert_srgb_compand(v::N0f8)
-    invert_srgb_compand_n0f8[reinterpret(UInt8, v) + 1]
+    @inbounds invert_srgb_compand_n0f8[reinterpret(UInt8, v) + 1]
+end
+
+function invert_srgb_compand(v::Float32)
+    i = unsafe_trunc(Int32, v * 255)
+    (i < 13 || i > 255) && return invert_srgb_compand(Float64(v))
+    @inbounds y = view(invert_srgb_compand_n0f8, i:i+3)
+    dv = v * 255.0 - i
+    dv == 0.0 && @inbounds return y[2]
+    if v < 0.38857287f0
+        return @fastmath(y[2]+0.5*dv*((-2/3*y[1]- y[2])+(2y[3]-1/3*y[4])+
+                                  dv*((     y[1]-2y[2])+  y[3]-
+                                  dv*(( 1/3*y[1]- y[2])+( y[3]-1/3*y[4]) ))))
+    else
+        return @fastmath(y[2]+0.5*dv*((4y[3]-3y[2])-y[4]+dv*((y[4]-y[3])+(y[2]-y[3]))))
+    end
 end
 
 function cnvt(::Type{XYZ{T}}, c::AbstractRGB) where T

diff --git a/src/utilities.jl b/src/utilities.jl
@@ -1,6 +1,17 @@
 # Helper data for CIE observer functions
 include("cie_data.jl")
 
+# TODO: move `pow7` from "src/differences.jl" to here
+
+@inline pow3_4(x) = (y = @fastmath(sqrt(x)); y*@fastmath(sqrt(y))) # x^(3/4)
+
+# `pow5_12` is called from `srgb_compand`.
+pow5_12(x) = pow3_4(x) / cbrt(x) # 5/12 == 1/2 + 1/4 - 1/3 == 3/4 - 1/3
+
+# `pow12_5` is called from `invert_srgb_compand`.
+# x^y ≈ exp(y*log(x)) ≈ exp2(y*log2(y)); the middle form is faster
+@noinline pow12_5(x) = x^2 * exp(0.4 * log(x)) # 12/5 == 2.4 == 2 + 0.4
+
 
 # Linear interpolation in [a, b] where x is in [0,1],
 # or coerced to be if not.

diff --git a/test/conversion.jl b/test/conversion.jl
@@ -20,14 +20,7 @@ using ColorTypes: eltype_default, parametric3
     # srgb_compand / invert_srgb_compand
     @test Colors.srgb_compand(0.5) ≈ 0.7353569830524494 atol=eps()
     @test Colors.invert_srgb_compand(0.7353569830524494) ≈ 0.5 atol=eps()
-    # issue #351
-    l_pow_x_y() = for i=1:1000; (i/1000)^(1/2.4) end
-    l_exp_y_log_x() = for i=1:1000; exp(1/2.4*log(i/1000)) end
-    l_pow_x_y(); t_pow_x_y = @elapsed l_pow_x_y()
-    l_exp_y_log_x(); t_exp_y_log_x = @elapsed l_exp_y_log_x()
-    if t_exp_y_log_x > t_pow_x_y
-        @warn "Optimization in `[invert_]srgb_compand()` may have the opposite effect."
-    end
+    @test Colors.invert_srgb_compand(0.735357f0) ≈ 0.5f0 atol=eps(Float32)
 
     fractional_types = (RGB, BGR, RGB1, RGB4)  # types that support Fractional
 

diff --git a/test/utilities.jl b/test/utilities.jl
@@ -1,6 +1,22 @@
 using Colors, FixedPointNumbers, Test, InteractiveUtils
 
 @testset "Utilities" begin
+    # issue #351
+    xs = max.(rand(1000), 1e-4)
+    @noinline l_pow_x_y() = for x in xs; x^2.4 end
+    @noinline l_pow12_5() = for x in xs; Colors.pow12_5(x) end
+    l_pow_x_y(); t_pow_x_y = @elapsed l_pow_x_y()
+    l_pow12_5(); t_pow12_5 = @elapsed l_pow12_5()
+    if t_pow12_5 > t_pow_x_y
+        @warn "Optimization technique in `pow12_5` may have the opposite effect."
+    end
+    @noinline l_exp_y_log_x() = for x in xs; exp(1/2.4 * log(x)) end
+    @noinline l_pow5_12() = for x in xs; Colors.pow5_12(x) end
+    l_exp_y_log_x(); t_exp_y_log_x = @elapsed l_exp_y_log_x()
+    l_pow5_12(); t_pow5_12 = @elapsed l_pow5_12()
+    if t_pow5_12 > t_exp_y_log_x
+        @warn "Optimization technique in `pow5_12` may have the opposite effect."
+    end
 
     @test hex(RGB(1,0.5,0)) == "FF8000"
     @test hex(RGBA(1,0.5,0,0.25)) == "40FF8000"