Skip to content

Commit

Permalink
Simplify and improve accuracy for subnormals in hypot() (pythonGH-102785
Browse files Browse the repository at this point in the history
)
  • Loading branch information
rhettinger authored and Fidget-Spinner committed Mar 27, 2023
1 parent f9d4acc commit d451dd0
Showing 1 changed file with 28 additions and 35 deletions.
63 changes: 28 additions & 35 deletions Modules/mathmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -2498,7 +2498,7 @@ verified for 1 billion random inputs with n=5. [7]
static inline double
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
{
double x, h, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
DoubleLength pr, sm;
int max_e;
Py_ssize_t i;
Expand All @@ -2513,49 +2513,42 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
return max;
}
frexp(max, &max_e);
if (max_e >= -1023) {
scale = ldexp(1.0, -max_e);
assert(max * scale >= 0.5);
assert(max * scale < 1.0);
if (max_e < -1023) {
/* When max_e < -1023, ldexp(1.0, -max_e) would overflow.
So we first perform lossless scaling from subnormals back to normals,
then recurse back to vector_norm(), and then finally undo the scaling.
*/
for (i=0 ; i < n ; i++) {
x = vec[i];
assert(Py_IS_FINITE(x) && fabs(x) <= max);
vec[i] /= DBL_MIN;
}
return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
}
scale = ldexp(1.0, -max_e);
assert(max * scale >= 0.5);
assert(max * scale < 1.0);
for (i=0 ; i < n ; i++) {
x = vec[i];
assert(Py_IS_FINITE(x) && fabs(x) <= max);

x *= scale;
assert(fabs(x) < 1.0);
x *= scale;
assert(fabs(x) < 1.0);

pr = dl_mul(x, x);
assert(pr.hi <= 1.0);
pr = dl_mul(x, x);
assert(pr.hi <= 1.0);

sm = dl_fast_sum(csum, pr.hi);
csum = sm.hi;
frac1 += pr.lo;
frac2 += sm.lo;
}
h = sqrt(csum - 1.0 + (frac1 + frac2));
pr = dl_mul(-h, h);
sm = dl_fast_sum(csum, pr.hi);
csum = sm.hi;
frac1 += pr.lo;
frac2 += sm.lo;
x = csum - 1.0 + (frac1 + frac2);
return (h + x / (2.0 * h)) / scale;
}
/* When max_e < -1023, ldexp(1.0, -max_e) overflows.
So instead of multiplying by a scale, we just divide by *max*.
*/
for (i=0 ; i < n ; i++) {
x = vec[i];
assert(Py_IS_FINITE(x) && fabs(x) <= max);
x /= max;
x = x*x;
assert(x <= 1.0);
assert(fabs(csum) >= fabs(x));
oldcsum = csum;
csum += x;
frac1 += (oldcsum - csum) + x;
}
return max * sqrt(csum - 1.0 + frac1);
h = sqrt(csum - 1.0 + (frac1 + frac2));
pr = dl_mul(-h, h);
sm = dl_fast_sum(csum, pr.hi);
csum = sm.hi;
frac1 += pr.lo;
frac2 += sm.lo;
x = csum - 1.0 + (frac1 + frac2);
return (h + x / (2.0 * h)) / scale;
}

#define NUM_STACK_ELEMS 16
Expand Down

0 comments on commit d451dd0

Please sign in to comment.