From be48c0b4c17240654d180fde380696122c2cc13b Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Fri, 11 Oct 2024 09:54:41 +1300
Subject: [PATCH 01/11] Fix mutable_copy(::BigFloat) in Julia v1.12

---
 src/implementations/BigFloat.jl | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/implementations/BigFloat.jl b/src/implementations/BigFloat.jl
index 442abed2..43e1b401 100644
--- a/src/implementations/BigFloat.jl
+++ b/src/implementations/BigFloat.jl
@@ -9,12 +9,18 @@
 
 mutability(::Type{BigFloat}) = IsMutable()
 
-# Copied from `deepcopy_internal` implementation in Julia:
-# https://github.com/JuliaLang/julia/blob/7d41d1eb610cad490cbaece8887f9bbd2a775021/base/mpfr.jl#L1041-L1050
-function mutable_copy(x::BigFloat)
-    d = x._d
-    d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
-    return Base.MPFR._BigFloat(x.prec, x.sign, x.exp, d′)
+@static if VERSION >= v"1.12"
+    function mutable_copy(x::BigFloat)
+        return Base.MPFR._BigFloat(copy(getfield(x, :d)))
+    end
+else
+    # Copied from `deepcopy_internal` implementation in Julia:
+    # https://github.com/JuliaLang/julia/blob/7d41d1eb610cad490cbaece8887f9bbd2a775021/base/mpfr.jl#L1041-L1050
+    function mutable_copy(x::BigFloat)
+        d = x._d
+        d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
+        return Base.MPFR._BigFloat(x.prec, x.sign, x.exp, d′)
+    end
 end
 
 const _MPFRRoundingMode = Base.MPFR.MPFRRoundingMode

From aa0fa693624ce90a241903e9d9606cb33824667e Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Fri, 11 Oct 2024 10:16:07 +1300
Subject: [PATCH 02/11] Update src/implementations/BigFloat.jl

---
 src/implementations/BigFloat.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/implementations/BigFloat.jl b/src/implementations/BigFloat.jl
index 43e1b401..e261e8aa 100644
--- a/src/implementations/BigFloat.jl
+++ b/src/implementations/BigFloat.jl
@@ -9,7 +9,7 @@
 
 mutability(::Type{BigFloat}) = IsMutable()
 
-@static if VERSION >= v"1.12"
+@static if VERSION >= v"1.12.0-DEV.1343"
     function mutable_copy(x::BigFloat)
         return Base.MPFR._BigFloat(copy(getfield(x, :d)))
     end

From 7f2159c8ca616eecb5b0d65ea522bcd3620362cb Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Fri, 11 Oct 2024 10:22:51 +1300
Subject: [PATCH 03/11] Update

---
 src/implementations/BigFloat.jl | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/implementations/BigFloat.jl b/src/implementations/BigFloat.jl
index e261e8aa..6856f47d 100644
--- a/src/implementations/BigFloat.jl
+++ b/src/implementations/BigFloat.jl
@@ -9,17 +9,19 @@
 
 mutability(::Type{BigFloat}) = IsMutable()
 
+# These methods are copied from `deepcopy_internal` in `base/mpfr.jl`. We don't
+# use `mutable_copy(x) = deepcopy(x)` because this creates an empty `IdDict()`
+# which costs some extra allocations. We don't need the IdDict case because we
+# never call `mutable_copy` recursively.
 @static if VERSION >= v"1.12.0-DEV.1343"
-    function mutable_copy(x::BigFloat)
-        return Base.MPFR._BigFloat(copy(getfield(x, :d)))
-    end
+    mutable_copy(x::BigFloat) = Base.MPFR._BigFloat(copy(getfield(x, :d)))
 else
-    # Copied from `deepcopy_internal` implementation in Julia:
-    # https://github.com/JuliaLang/julia/blob/7d41d1eb610cad490cbaece8887f9bbd2a775021/base/mpfr.jl#L1041-L1050
     function mutable_copy(x::BigFloat)
         d = x._d
-        d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
-        return Base.MPFR._BigFloat(x.prec, x.sign, x.exp, d′)
+        GC.@preserve d begin
+            d′ = unsafe_string(pointer(d), sizeof(d))
+            return Base.MPFR._BigFloat(x.prec, x.sign, x.exp, d′)
+        end
     end
 end
 

From b261d01ebc211393c8465788887871d68b784fe3 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Fri, 11 Oct 2024 10:26:10 +1300
Subject: [PATCH 04/11] Update src/implementations/BigFloat.jl

---
 src/implementations/BigFloat.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/implementations/BigFloat.jl b/src/implementations/BigFloat.jl
index 6856f47d..97b9782b 100644
--- a/src/implementations/BigFloat.jl
+++ b/src/implementations/BigFloat.jl
@@ -14,7 +14,8 @@ mutability(::Type{BigFloat}) = IsMutable()
 # which costs some extra allocations. We don't need the IdDict case because we
 # never call `mutable_copy` recursively.
 @static if VERSION >= v"1.12.0-DEV.1343"
-    mutable_copy(x::BigFloat) = Base.MPFR._BigFloat(copy(getfield(x, :d)))
+    # mutable_copy(x::BigFloat) = Base.MPFR._BigFloat(copy(getfield(x, :d)))
+    mutable_copy(x::BigFloat) = deepcopy(x)
 else
     function mutable_copy(x::BigFloat)
         d = x._d

From ec3f42baea8ed04fccd196390733c98d6a176b92 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Fri, 11 Oct 2024 10:57:33 +1300
Subject: [PATCH 05/11] Simplify test/bigfloat_dot.jl

---
 src/implementations/BigFloat.jl |  3 +-
 test/bigfloat_dot.jl            | 80 +++++++--------------------------
 2 files changed, 16 insertions(+), 67 deletions(-)

diff --git a/src/implementations/BigFloat.jl b/src/implementations/BigFloat.jl
index 97b9782b..6856f47d 100644
--- a/src/implementations/BigFloat.jl
+++ b/src/implementations/BigFloat.jl
@@ -14,8 +14,7 @@ mutability(::Type{BigFloat}) = IsMutable()
 # which costs some extra allocations. We don't need the IdDict case because we
 # never call `mutable_copy` recursively.
 @static if VERSION >= v"1.12.0-DEV.1343"
-    # mutable_copy(x::BigFloat) = Base.MPFR._BigFloat(copy(getfield(x, :d)))
-    mutable_copy(x::BigFloat) = deepcopy(x)
+    mutable_copy(x::BigFloat) = Base.MPFR._BigFloat(copy(getfield(x, :d)))
 else
     function mutable_copy(x::BigFloat)
         d = x._d
diff --git a/test/bigfloat_dot.jl b/test/bigfloat_dot.jl
index 37de53a8..295e0e36 100644
--- a/test/bigfloat_dot.jl
+++ b/test/bigfloat_dot.jl
@@ -4,69 +4,18 @@
 # v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
 # one at http://mozilla.org/MPL/2.0/.
 
-backup_bigfloats(v::AbstractVector{BigFloat}) = map(MA.copy_if_mutable, v)
-
-absolute_error(accurate::Real, approximate::Real) = abs(accurate - approximate)
-
-function relative_error(accurate::Real, approximate::Real)
-    return absolute_error(accurate, approximate) / abs(accurate)
-end
-
-function dotter(x::V, y::V) where {V<:AbstractVector{<:Real}}
-    let x = x, y = y
-        () -> LinearAlgebra.dot(x, y)
-    end
-end
-
-function reference_dot(x::V, y::V) where {F<:Real,V<:AbstractVector{F}}
-    return setprecision(dotter(x, y), F, 8 * precision(F))
-end
-
-function dot_test_relative_error(x::V, y::V) where {V<:AbstractVector{BigFloat}}
-    buf = MA.buffer_for(LinearAlgebra.dot, V, V)
-
-    input = (x, y)
-    backup = map(backup_bigfloats, input)
-
-    output = BigFloat()
-
-    MA.buffered_operate_to!!(buf, output, LinearAlgebra.dot, input...)
-
-    @test input == backup
-
-    return relative_error(reference_dot(input...), output)
-end
-
-subtracter(s::Real) =
-    let s = s
-        x -> x - s
-    end
-
-our_rand(n::Int, bias::Real) = map(subtracter(bias), rand(BigFloat, n))
-
 function rand_dot_rel_err(size::Int, bias::Real)
-    x = our_rand(size, bias)
-    y = our_rand(size, bias)
-    return dot_test_relative_error(x, y)
-end
-
-function max_rand_dot_rel_err(size::Int, bias::Real, iter_cnt::Int)
-    max_rel_err = zero(BigFloat)
-    for i in 1:iter_cnt
-        rel_err = rand_dot_rel_err(size, bias)
-        <(max_rel_err, rel_err) && (max_rel_err = rel_err)
-    end
-    return max_rel_err
-end
-
-function max_rand_dot_ulps(size::Int, bias::Real, iter_cnt::Int)
-    return max_rand_dot_rel_err(size, bias, iter_cnt) / eps(BigFloat)
-end
-
-function ulper(size::Int, bias::Real, iter_cnt::Int)
-    let s = size, b = bias, c = iter_cnt
-        () -> max_rand_dot_ulps(s, b, c)
+    x = rand(BigFloat, size) .- bias
+    y = rand(BigFloat, size) .- bias
+    backup = (MA.copy_if_mutable(x), MA.copy_if_mutable(y))
+    buf = MA.buffer_for(LinearAlgebra.dot, Vector{BigFloat}, Vector{BigFloat})
+    output = BigFloat()
+    MA.buffered_operate_to!!(buf, output, LinearAlgebra.dot, x, y)
+    @test (x, y) == backup
+    accurate = setprecision(BigFloat, 8 * precision(BigFloat)) do
+        return LinearAlgebra.dot(x, y)
     end
+    return abs(accurate - output) / abs(accurate)
 end
 
 @testset "prec:$prec size:$size bias:$bias" for (prec, size, bias) in
@@ -78,11 +27,9 @@ end
     # precision (except when vector lengths are really huge with
     # respect to the precision).
     (32, 64),
-
     # Compensated summation should be accurate even for very large
     # input vectors, so test that.
     (10000,),
-
     # The zero "bias" signifies that the input will be entirely
     # nonnegative (drawn from the interval [0, 1]), while a positive
     # bias shifts that interval towards negative infinity. We want to
@@ -91,8 +38,11 @@ end
     # no guarantee on the relative error in that case.
     (0.0, 2^-2, 2^-2 + 2^-3 + 2^-4),
 )
-    iter_cnt = 10
-    err = setprecision(ulper(size, bias, iter_cnt), BigFloat, prec)
+    err = setprecision(BigFloat, prec) do
+        return mapreduce(max, 1:10) do _
+            return rand_dot_rel_err(size, bias) / eps(BigFloat)
+        end
+    end
     @test 0 <= err < 1
 end
 

From 5bb93e19d77b55f67c223c6ccbe6ac78359ac053 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Fri, 11 Oct 2024 11:23:02 +1300
Subject: [PATCH 06/11] Update

---
 test/bigfloat_dot.jl | 43 +++++++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/test/bigfloat_dot.jl b/test/bigfloat_dot.jl
index 295e0e36..d2fc9ad2 100644
--- a/test/bigfloat_dot.jl
+++ b/test/bigfloat_dot.jl
@@ -4,20 +4,6 @@
 # v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
 # one at http://mozilla.org/MPL/2.0/.
 
-function rand_dot_rel_err(size::Int, bias::Real)
-    x = rand(BigFloat, size) .- bias
-    y = rand(BigFloat, size) .- bias
-    backup = (MA.copy_if_mutable(x), MA.copy_if_mutable(y))
-    buf = MA.buffer_for(LinearAlgebra.dot, Vector{BigFloat}, Vector{BigFloat})
-    output = BigFloat()
-    MA.buffered_operate_to!!(buf, output, LinearAlgebra.dot, x, y)
-    @test (x, y) == backup
-    accurate = setprecision(BigFloat, 8 * precision(BigFloat)) do
-        return LinearAlgebra.dot(x, y)
-    end
-    return abs(accurate - output) / abs(accurate)
-end
-
 @testset "prec:$prec size:$size bias:$bias" for (prec, size, bias) in
                                                 Iterators.product(
     # These precisions (in bits) are most probably smaller than what
@@ -39,9 +25,34 @@ end
     (0.0, 2^-2, 2^-2 + 2^-3 + 2^-4),
 )
     err = setprecision(BigFloat, prec) do
-        return mapreduce(max, 1:10) do _
-            return rand_dot_rel_err(size, bias) / eps(BigFloat)
+        maximum_relative_error = mapreduce(max, 1:10) do _
+            # Generate some random vectors for dot(x, y) input.
+            x = rand(BigFloat, size) .- bias
+            y = rand(BigFloat, size) .- bias
+            # Copy x and y so that we can check we haven't mutated them after
+            # the fact.
+            old_x, old_y = MA.copy_if_mutable(x), MA.copy_if_mutable(y)
+            # Compute output = dot(x, y)
+            buf = MA.buffer_for(
+                LinearAlgebra.dot,
+                Vector{BigFloat},
+                Vector{BigFloat},
+            )
+            output = BigFloat()
+            MA.buffered_operate_to!!(buf, output, LinearAlgebra.dot, x, y)
+            # Check that we haven't mutated x or y
+            @test old_x == x
+            @test old_y == y
+            # Compute dot(x, y) in larger precision. This will be used to
+            # compare with our `dot`.
+            accurate = setprecision(BigFloat, 8 * precision(BigFloat)) do
+                return LinearAlgebra.dot(x, y)
+            end
+            # Compute the relative error
+            return abs(accurate - output) / abs(accurate)
         end
+        # Return estimate for ULP
+        return maximum_relative_error / eps(BigFloat)
     end
     @test 0 <= err < 1
 end

From 18befd9919b33c48b00a18f04216a4cef24cd38e Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Fri, 11 Oct 2024 11:25:19 +1300
Subject: [PATCH 07/11] Update

---
 test/bigfloat_dot.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/bigfloat_dot.jl b/test/bigfloat_dot.jl
index d2fc9ad2..e7836108 100644
--- a/test/bigfloat_dot.jl
+++ b/test/bigfloat_dot.jl
@@ -46,8 +46,8 @@
             # Compute dot(x, y) in larger precision. This will be used to
             # compare with our `dot`.
             accurate = setprecision(BigFloat, 8 * precision(BigFloat)) do
-                return LinearAlgebra.dot(x, y)
-            end
+                    return LinearAlgebra.dot(x, y)
+                end
             # Compute the relative error
             return abs(accurate - output) / abs(accurate)
         end

From 879898a47a43aef32462b9c083fe0e95e6ab11a3 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Sun, 13 Oct 2024 12:16:06 +1300
Subject: [PATCH 08/11] Update

---
 src/implementations/BigFloat.jl | 71 ++++++++++++++-------------------
 1 file changed, 31 insertions(+), 40 deletions(-)

diff --git a/src/implementations/BigFloat.jl b/src/implementations/BigFloat.jl
index 6856f47d..62fff1ba 100644
--- a/src/implementations/BigFloat.jl
+++ b/src/implementations/BigFloat.jl
@@ -310,7 +310,7 @@ struct DotBuffer{F<:Real}
     multiplication_temp::F
     inner_temp::F
 
-    DotBuffer{F}() where {F<:Real} = new{F}(ntuple(i -> F(), Val{4}())...)
+    DotBuffer{F}() where {F<:Real} = new{F}(ntuple(i -> zero(F), Val{4}())...)
 end
 
 function buffer_for(
@@ -394,6 +394,31 @@ end
 #       # end.
 #       sum + c
 #   end
+
+# Returns abs(x) <= abs(y) without allocating.
+function _abs_lte_abs(x::F, y::F) where {F<:BigFloat}
+    x_is_neg = signbit(x)
+    y_is_neg = signbit(y)
+    x_neg = x_is_neg != y_is_neg
+    if x_neg
+        operate!(-, x)
+    end
+    ret = if y_is_neg
+        y <= x
+    else
+        x <= y
+    end
+    if x_neg
+        operate!(-, x)
+    end
+    return ret
+end
+
+function _mpfr_swap(x::BigFloat, y::BigFloat)
+    ccall((:mpfr_swap, :libmpfr), Cvoid, (Ref{BigFloat}, Ref{BigFloat}), x, y)
+    return
+end
+
 function buffered_operate_to!(
     buf::DotBuffer{F},
     sum::F,
@@ -401,60 +426,26 @@ function buffered_operate_to!(
     x::AbstractVector{F},
     y::AbstractVector{F},
 ) where {F<:BigFloat}
-    set! = (o, i) -> operate_to!(o, copy, i)
-
-    local swap! = function (x::BigFloat, y::BigFloat)
-        ccall((:mpfr_swap, :libmpfr), Cvoid, (Ref{BigFloat}, Ref{BigFloat}), x, y)
-        return nothing
-    end
-
-    # Returns abs(x) <= abs(y) without allocating.
-    local abs_lte_abs = function (x::F, y::F)
-        local x_is_neg = signbit(x)
-        local y_is_neg = signbit(y)
-
-        local x_neg = x_is_neg != y_is_neg
-
-        x_neg && operate!(-, x)
-
-        local ret = if y_is_neg
-            y <= x
-        else
-            x <= y
-        end
-
-        x_neg && operate!(-, x)
-
-        return ret
-    end
-
     operate!(zero, sum)
     operate!(zero, buf.compensation)
-
     for i in 0:(length(x)-1)
-        set!(buf.multiplication_temp, x[begin+i])
+        operate_to!(buf.multiplication_temp, copy, x[begin+i])
         operate!(*, buf.multiplication_temp, y[begin+i])
-
         operate!(zero, buf.summation_temp)
         operate_to!(buf.summation_temp, +, buf.multiplication_temp, sum)
-
-        if abs_lte_abs(buf.multiplication_temp, sum)
-            set!(buf.inner_temp, sum)
+        if _abs_lte_abs(buf.multiplication_temp, sum)
+            operate_to!(buf.inner_temp, copy, sum)
             operate!(-, buf.inner_temp, buf.summation_temp)
             operate!(+, buf.inner_temp, buf.multiplication_temp)
         else
-            set!(buf.inner_temp, buf.multiplication_temp)
+            operate_to!(buf.inner_temp, copy, buf.multiplication_temp)
             operate!(-, buf.inner_temp, buf.summation_temp)
             operate!(+, buf.inner_temp, sum)
         end
-
         operate!(+, buf.compensation, buf.inner_temp)
-
-        swap!(sum, buf.summation_temp)
+        _mpfr_swap(sum, buf.summation_temp)
     end
-
     operate!(+, sum, buf.compensation)
-
     return sum
 end
 

From 74b39827dd377c775225943c42c89d4a334316b2 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Sun, 13 Oct 2024 14:09:54 +1300
Subject: [PATCH 09/11] Update

---
 src/implementations/BigFloat.jl | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/implementations/BigFloat.jl b/src/implementations/BigFloat.jl
index 62fff1ba..61a39e07 100644
--- a/src/implementations/BigFloat.jl
+++ b/src/implementations/BigFloat.jl
@@ -414,11 +414,6 @@ function _abs_lte_abs(x::F, y::F) where {F<:BigFloat}
     return ret
 end
 
-function _mpfr_swap(x::BigFloat, y::BigFloat)
-    ccall((:mpfr_swap, :libmpfr), Cvoid, (Ref{BigFloat}, Ref{BigFloat}), x, y)
-    return
-end
-
 function buffered_operate_to!(
     buf::DotBuffer{F},
     sum::F,
@@ -428,6 +423,7 @@ function buffered_operate_to!(
 ) where {F<:BigFloat}
     operate!(zero, sum)
     operate!(zero, buf.compensation)
+    tmp = zero(F)
     for i in 0:(length(x)-1)
         operate_to!(buf.multiplication_temp, copy, x[begin+i])
         operate!(*, buf.multiplication_temp, y[begin+i])
@@ -443,7 +439,9 @@ function buffered_operate_to!(
             operate!(+, buf.inner_temp, sum)
         end
         operate!(+, buf.compensation, buf.inner_temp)
-        _mpfr_swap(sum, buf.summation_temp)
+        operate_to!(tmp, copy, buf.multiplication_temp)
+        operate_to!(buf.multiplication_temp, copy, sum)
+        operate_to!(sum, copy, tmp)
     end
     operate!(+, sum, buf.compensation)
     return sum

From 5254fb5e0617ad5488bf864e31bac3569d616a69 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Sun, 13 Oct 2024 14:23:23 +1300
Subject: [PATCH 10/11] Update

---
 src/implementations/BigFloat.jl | 33 +++++++++------------------------
 1 file changed, 9 insertions(+), 24 deletions(-)

diff --git a/src/implementations/BigFloat.jl b/src/implementations/BigFloat.jl
index 61a39e07..d2230630 100644
--- a/src/implementations/BigFloat.jl
+++ b/src/implementations/BigFloat.jl
@@ -374,41 +374,29 @@ end
 #
 #   function KahanBabushkaNeumaierSum(input)
 #       sum = 0.0
-#
 #       # A running compensation for lost low-order bits.
 #       c = 0.0
-#
 #       for i ∈ eachindex(input)
 #           t = sum + input[i]
-#
 #           if abs(input[i]) ≤ abs(sum)
 #               c += (sum - t) + input[i]
 #           else
 #               c += (input[i] - t) + sum
 #           end
-#
 #           sum = t
 #       end
-#
-#       # The result, with the correction only applied once in the very
-#       # end.
+#       # The result, with the correction only applied once in the very end.
 #       sum + c
 #   end
 
 # Returns abs(x) <= abs(y) without allocating.
 function _abs_lte_abs(x::F, y::F) where {F<:BigFloat}
-    x_is_neg = signbit(x)
-    y_is_neg = signbit(y)
-    x_neg = x_is_neg != y_is_neg
-    if x_neg
+    x_is_neg, y_is_neg = signbit(x), signbit(y)
+    if x_is_neg != y_is_neg
         operate!(-, x)
     end
-    ret = if y_is_neg
-        y <= x
-    else
-        x <= y
-    end
-    if x_neg
+    ret = y_is_neg ? y <= x : x <= y
+    if x_is_neg != y_is_neg
         operate!(-, x)
     end
     return ret
@@ -423,10 +411,9 @@ function buffered_operate_to!(
 ) where {F<:BigFloat}
     operate!(zero, sum)
     operate!(zero, buf.compensation)
-    tmp = zero(F)
-    for i in 0:(length(x)-1)
-        operate_to!(buf.multiplication_temp, copy, x[begin+i])
-        operate!(*, buf.multiplication_temp, y[begin+i])
+    for (xi, yi) in zip(x, y)
+        operate_to!(buf.multiplication_temp, copy, xi)
+        operate!(*, buf.multiplication_temp, yi)
         operate!(zero, buf.summation_temp)
         operate_to!(buf.summation_temp, +, buf.multiplication_temp, sum)
         if _abs_lte_abs(buf.multiplication_temp, sum)
@@ -439,9 +426,7 @@ function buffered_operate_to!(
             operate!(+, buf.inner_temp, sum)
         end
         operate!(+, buf.compensation, buf.inner_temp)
-        operate_to!(tmp, copy, buf.multiplication_temp)
-        operate_to!(buf.multiplication_temp, copy, sum)
-        operate_to!(sum, copy, tmp)
+        operate_to!(sum, copy, buf.summation_temp)
     end
     operate!(+, sum, buf.compensation)
     return sum

From ce60f92c8410280faef6ce93e5a55d474c40946c Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Sun, 13 Oct 2024 15:16:59 +1300
Subject: [PATCH 11/11] update

---
 src/implementations/BigFloat.jl | 60 ++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/implementations/BigFloat.jl b/src/implementations/BigFloat.jl
index d2230630..ba8d414d 100644
--- a/src/implementations/BigFloat.jl
+++ b/src/implementations/BigFloat.jl
@@ -305,12 +305,12 @@ function operate_to!(
 end
 
 struct DotBuffer{F<:Real}
-    compensation::F
-    summation_temp::F
-    multiplication_temp::F
-    inner_temp::F
+    c::F
+    t::F
+    input::F
+    tmp::F
 
-    DotBuffer{F}() where {F<:Real} = new{F}(ntuple(i -> zero(F), Val{4}())...)
+    DotBuffer{F}() where {F<:Real} = new{F}(zero(F), zero(F), zero(F), zero(F))
 end
 
 function buffer_for(
@@ -379,10 +379,11 @@ end
 #       for i ∈ eachindex(input)
 #           t = sum + input[i]
 #           if abs(input[i]) ≤ abs(sum)
-#               c += (sum - t) + input[i]
+#               tmp = (sum - t) + input[i]
 #           else
-#               c += (input[i] - t) + sum
+#               tmp = (input[i] - t) + sum
 #           end
+#           c += tmp
 #           sum = t
 #       end
 #       # The result, with the correction only applied once in the very end.
@@ -390,7 +391,7 @@ end
 #   end
 
 # Returns abs(x) <= abs(y) without allocating.
-function _abs_lte_abs(x::F, y::F) where {F<:BigFloat}
+function _abs_lte_abs(x::BigFloat, y::BigFloat)
     x_is_neg, y_is_neg = signbit(x), signbit(y)
     if x_is_neg != y_is_neg
         operate!(-, x)
@@ -403,32 +404,31 @@ function _abs_lte_abs(x::F, y::F) where {F<:BigFloat}
 end
 
 function buffered_operate_to!(
-    buf::DotBuffer{F},
-    sum::F,
+    buf::DotBuffer{BigFloat},
+    sum::BigFloat,
     ::typeof(LinearAlgebra.dot),
-    x::AbstractVector{F},
-    y::AbstractVector{F},
-) where {F<:BigFloat}
-    operate!(zero, sum)
-    operate!(zero, buf.compensation)
-    for (xi, yi) in zip(x, y)
-        operate_to!(buf.multiplication_temp, copy, xi)
-        operate!(*, buf.multiplication_temp, yi)
-        operate!(zero, buf.summation_temp)
-        operate_to!(buf.summation_temp, +, buf.multiplication_temp, sum)
-        if _abs_lte_abs(buf.multiplication_temp, sum)
-            operate_to!(buf.inner_temp, copy, sum)
-            operate!(-, buf.inner_temp, buf.summation_temp)
-            operate!(+, buf.inner_temp, buf.multiplication_temp)
+    x::AbstractVector{BigFloat},
+    y::AbstractVector{BigFloat},
+)                                                   # See pseudocode description
+    operate!(zero, sum)                             # sum = 0
+    operate!(zero, buf.c)                           # c = 0
+    for (xi, yi) in zip(x, y)                       # for i in eachindex(input)
+        operate_to!(buf.input, copy, xi)            # input = x[i]
+        operate!(*, buf.input, yi)                  # input = x[i] * y[i]
+        operate_to!(buf.t, +, sum, buf.input)       # t = sum + input
+        if _abs_lte_abs(buf.input, sum)             # if |input| < |sum|
+            operate_to!(buf.tmp, copy, sum)         # tmp = sum
+            operate!(-, buf.tmp, buf.t)             # tmp = sum - t
+            operate!(+, buf.tmp, buf.input)         # tmp = (sum - t) + input
         else
-            operate_to!(buf.inner_temp, copy, buf.multiplication_temp)
-            operate!(-, buf.inner_temp, buf.summation_temp)
-            operate!(+, buf.inner_temp, sum)
+            operate_to!(buf.tmp, copy, buf.input)   # tmp = input
+            operate!(-, buf.tmp, buf.t)             # tmp = input - t
+            operate!(+, buf.tmp, sum)               # tmp = (input - t) + sum
         end
-        operate!(+, buf.compensation, buf.inner_temp)
-        operate_to!(sum, copy, buf.summation_temp)
+        operate!(+, buf.c, buf.tmp)                 # c += tmp
+        operate_to!(sum, copy, buf.t)               # sum = t
     end
-    operate!(+, sum, buf.compensation)
+    operate!(+, sum, buf.c)                         # sum += c
     return sum
 end