diff --git a/numerics/sin_cos.cpp b/numerics/sin_cos.cpp index 88db80cd70..0f3436d610 100644 --- a/numerics/sin_cos.cpp +++ b/numerics/sin_cos.cpp @@ -35,10 +35,10 @@ static bool OSACA_loop_terminator = false; #define OSACA_FUNCTION_BEGIN(arg) \ double volatile OSACA_input = arg; \ - /* Putting a load of the input from memory in the analysed section makes the \ - * dependency graph clearer, but adds a potentially spurious move to the \ - * loop-carried latency. Remove the `volatile` above to carry the loop \ - * through registers.*/ \ + /* Putting a load of the input from memory in the analysed section makes */ \ + /* the dependency graph clearer, but adds a potentially spurious move to */ \ + /* the loop-carried latency. Remove the `volatile` above to carry the */ \ + /* loop through registers.*/ \ IACA_VC64_START; \ double OSACA_loop_carry = OSACA_input; \ OSACA_loop: \ @@ -56,33 +56,33 @@ static bool OSACA_loop_terminator = false; if constexpr (bool volatile OSACA_computed_condition = (condition); \ [] { UNDER_OSACA_HYPOTHESES(return (condition)); }()) -#define UNDER_OSACA_HYPOTHESES(statement) \ - do { \ - constexpr bool UseHardwareFMA = true; \ - constexpr double θ = 0.1; \ - /* From argument reduction. */ \ - constexpr double n_double = θ * (2 / π); \ - constexpr double reduction_value = θ - n_double * π_over_2_high; \ - constexpr double reduction_error = n_double * π_over_2_low; \ - /* Used to determine whether a better argument reduction is needed. */ \ - constexpr DoublePrecision θ_reduced = \ - QuickTwoDifference(reduction_value, reduction_error); \ - /* Used in Sin to detect the near-0 case. */ \ - constexpr double abs_x = \ - θ_reduced.value > 0 ? θ_reduced.value : -θ_reduced.value; \ - /* Used throughout the top-level functions. */ \ - constexpr std::int64_t quadrant = \ - static_cast(n_double) & 0b11; \ - /* Used in DetectDangerousRounding. */ \ - constexpr double normalized_error = 0; \ - /* Not NaN is the only part that matters; used at the end of the top-level \ - * functions to determine whether to call the slow path. */ \ - constexpr double value = 1; \ - { statement; } \ +#define UNDER_OSACA_HYPOTHESES(statement) \ + do { \ + constexpr bool UseHardwareFMA = true; \ + constexpr double θ = 0.1; \ + /* From argument reduction. */ \ + constexpr double n_double = θ * (2 / π); \ + constexpr double reduction_value = θ - n_double * π_over_2_high; \ + constexpr double reduction_error = n_double * π_over_2_low; \ + /* Used to determine whether a better argument reduction is needed. */ \ + constexpr DoublePrecision θ_reduced = \ + QuickTwoDifference(reduction_value, reduction_error); \ + /* Used in Sin to detect the near-0 case. */ \ + constexpr double abs_x = \ + θ_reduced.value > 0 ? θ_reduced.value : -θ_reduced.value; \ + /* Used throughout the top-level functions. */ \ + constexpr std::int64_t quadrant = \ + static_cast(n_double) & 0b11; \ + /* Used in DetectDangerousRounding. */ \ + constexpr double normalized_error = 0; \ + /* Not NaN is the only part that matters; used at the end of the */ \ + /* top-level functions to determine whether to call the slow path. */ \ + constexpr double value = 1; \ + { statement; } \ } while (false) #else -#define OSACA_IF (condition) if (condition) +#define OSACA_IF_(condition) if (condition) #endif @@ -143,8 +143,8 @@ inline std::int64_t AccurateTableIndex(double const abs_x) { template double FusedMultiplyAdd(double const a, double const b, double const c) { - OSACA_IF ((fma_policy == FMAPolicy::Force && CanEmitFMAInstructions) || - (fma_policy == FMAPolicy::Auto && UseHardwareFMA)) { + OSACA_IF_((fma_policy == FMAPolicy::Force && CanEmitFMAInstructions) || + (fma_policy == FMAPolicy::Auto && UseHardwareFMA)) { using quantities::_elementary_functions::FusedMultiplyAdd; return FusedMultiplyAdd(a, b, c); } else { @@ -154,8 +154,8 @@ double FusedMultiplyAdd(double const a, double const b, double const c) { template double FusedNegatedMultiplyAdd(double const a, double const b, double const c) { - OSACA_IF ((fma_policy == FMAPolicy::Force && CanEmitFMAInstructions) || - (fma_policy == FMAPolicy::Auto && UseHardwareFMA)) { + OSACA_IF_((fma_policy == FMAPolicy::Force && CanEmitFMAInstructions) || + (fma_policy == FMAPolicy::Auto && UseHardwareFMA)) { using quantities::_elementary_functions::FusedNegatedMultiplyAdd; return FusedNegatedMultiplyAdd(a, b, c); } else { @@ -178,7 +178,7 @@ inline double DetectDangerousRounding(double const x, double const Δx) { _mm_castsi128_pd(_mm_sub_epi64(error_128i, value_exponent_128i))); // TODO(phl): Error analysis to refine the thresholds. Also, can we avoid // negative numbers? - OSACA_IF (normalized_error < -0x1.ffffp971 && + OSACA_IF_(normalized_error < -0x1.ffffp971 && normalized_error > -0x1.00008p972) { #if _DEBUG LOG(ERROR) << std::setprecision(25) << x << " " << std::hexfloat << value @@ -193,12 +193,12 @@ inline double DetectDangerousRounding(double const x, double const Δx) { inline void Reduce(Argument const θ, DoublePrecision& θ_reduced, std::int64_t& quadrant) { - OSACA_IF (θ < π / 4 && θ > -π / 4) { + OSACA_IF_(θ < π / 4 && θ > -π / 4) { θ_reduced.value = θ; θ_reduced.error = 0; quadrant = 0; return; - } else OSACA_IF (θ <= π_over_2_threshold && θ >= -π_over_2_threshold) { + } else OSACA_IF_(θ <= π_over_2_threshold && θ >= -π_over_2_threshold) { // We are not very sensitive to rounding errors in this expression, because // in the worst case it could cause the reduced angle to jump from the // vicinity of π / 4 to the vicinity of -π / 4 with appropriate adjustment @@ -211,7 +211,7 @@ inline void Reduce(Argument const θ, Argument const error = n_double * π_over_2_low; θ_reduced = QuickTwoDifference(value, error); // TODO(phl): Error analysis needed to find the right bounds. - OSACA_IF (θ_reduced.value < -0x1.0p-30 || θ_reduced.value > 0x1.0p-30) { + OSACA_IF_(θ_reduced.value < -0x1.0p-30 || θ_reduced.value > 0x1.0p-30) { quadrant = n & 0b11; return; } @@ -249,7 +249,7 @@ Value SinImplementation(DoublePrecision const θ_reduced) { auto const& x = θ_reduced.value; auto const& e = θ_reduced.error; double const abs_x = std::abs(x); - OSACA_IF (abs_x < sin_near_zero_cutoff) { + OSACA_IF_(abs_x < sin_near_zero_cutoff) { double const x² = x * x; double const x³ = x² * x; double const x³_term = FusedMultiplyAdd( @@ -328,8 +328,8 @@ Value __cdecl Sin(Argument const θ) { std::int64_t quadrant; Reduce(θ, θ_reduced, quadrant); double value; - OSACA_IF (UseHardwareFMA) { - OSACA_IF (quadrant & 0b1) { + OSACA_IF_(UseHardwareFMA) { + OSACA_IF_(quadrant & 0b1) { value = CosImplementation(θ_reduced); } else { #if PRINCIPIA_USE_OSACA_SIN @@ -341,15 +341,15 @@ Value __cdecl Sin(Argument const θ) { #endif } } else { - OSACA_IF (quadrant & 0b1) { + OSACA_IF_(quadrant & 0b1) { value = CosImplementation(θ_reduced); } else { value = SinImplementation(θ_reduced); } } - OSACA_IF (value != value) { + OSACA_IF_(value != value) { OSACA_RETURN_SIN(cr_sin(θ)); - } else OSACA_IF(quadrant & 0b10) { + } else OSACA_IF_(quadrant & 0b10) { OSACA_RETURN_SIN(-value); } else { OSACA_RETURN_SIN(value); @@ -365,22 +365,22 @@ Value __cdecl Cos(Argument θ) { std::int64_t quadrant; Reduce(θ, θ_reduced, quadrant); double value; - OSACA_IF (UseHardwareFMA) { - OSACA_IF (quadrant & 0b1) { + OSACA_IF_(UseHardwareFMA) { + OSACA_IF_(quadrant & 0b1) { value = SinImplementation(θ_reduced); } else { value = CosImplementation(θ_reduced); } } else { - OSACA_IF (quadrant & 0b1) { + OSACA_IF_(quadrant & 0b1) { value = SinImplementation(θ_reduced); } else { value = CosImplementation(θ_reduced); } } - OSACA_IF (value != value) { + OSACA_IF_(value != value) { OSACA_RETURN_COS(cr_cos(θ)); - } else OSACA_IF (quadrant == 1 || quadrant == 2) { + } else OSACA_IF_(quadrant == 1 || quadrant == 2) { OSACA_RETURN_COS(-value); } else { OSACA_RETURN_COS(value);