Skip to content

Commit

Permalink
chore(shadertools): Port fp64 module to UBO (#2262)
Browse files Browse the repository at this point in the history
  • Loading branch information
felixpalmer committed Oct 3, 2024
1 parent 3aa0005 commit 1c120a2
Show file tree
Hide file tree
Showing 7 changed files with 1,193 additions and 3 deletions.
4 changes: 2 additions & 2 deletions modules/shadertools/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export {fp64ify, fp64LowPart, fp64ifyMatrix4} from './modules/math/fp64/fp64-uti
export {random} from './modules/math/random/random';

export {fp32} from './modules/math/fp32/fp32';
// export {fp64, fp64arithmetic} from './modules/math/fp64/fp64';
export {fp64, fp64arithmetic} from './modules/math/fp64/fp64';

// engine shader modules

Expand Down Expand Up @@ -93,7 +93,7 @@ export {pbrMaterial} from './modules/lighting/pbr-material/pbr-material';
// DEPRECATED - v8 legacy shader modules (non-uniform buffer)

// math libraries
export {fp64, fp64arithmetic} from './modules-webgl1/math/fp64/fp64';
// export {fp64, fp64arithmetic} from './modules-webgl1/math/fp64/fp64';

// projection and lighting
export {geometry as geometry1} from './modules-webgl1/geometry/geometry';
Expand Down
174 changes: 174 additions & 0 deletions modules/shadertools/src/modules/math/fp64/fp64-arithmetic-glsl.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// luma.gl
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

export const fp64arithmeticShader = /* glsl */ `\
uniform fp64arithmeticUniforms {
uniform float ONE;
} fp64;
/*
About LUMA_FP64_CODE_ELIMINATION_WORKAROUND
The purpose of this workaround is to prevent shader compilers from
optimizing away necessary arithmetic operations by swapping their sequences
or transform the equation to some 'equivalent' form.
The method is to multiply an artifical variable, ONE, which will be known to
the compiler to be 1 only at runtime. The whole expression is then represented
as a polynomial with respective to ONE. In the coefficients of all terms, only one a
and one b should appear
err = (a + b) * ONE^6 - a * ONE^5 - (a + b) * ONE^4 + a * ONE^3 - b - (a + b) * ONE^2 + a * ONE
*/
// Divide float number to high and low floats to extend fraction bits
vec2 split(float a) {
const float SPLIT = 4097.0;
float t = a * SPLIT;
#if defined(LUMA_FP64_CODE_ELIMINATION_WORKAROUND)
float a_hi = t * fp64.ONE - (t - a);
float a_lo = a * fp64.ONE - a_hi;
#else
float a_hi = t - (t - a);
float a_lo = a - a_hi;
#endif
return vec2(a_hi, a_lo);
}
// Divide float number again when high float uses too many fraction bits
vec2 split2(vec2 a) {
vec2 b = split(a.x);
b.y += a.y;
return b;
}
// Special sum operation when a > b
vec2 quickTwoSum(float a, float b) {
#if defined(LUMA_FP64_CODE_ELIMINATION_WORKAROUND)
float sum = (a + b) * fp64.ONE;
float err = b - (sum - a) * fp64.ONE;
#else
float sum = a + b;
float err = b - (sum - a);
#endif
return vec2(sum, err);
}
// General sum operation
vec2 twoSum(float a, float b) {
float s = (a + b);
#if defined(LUMA_FP64_CODE_ELIMINATION_WORKAROUND)
float v = (s * fp64.ONE - a) * fp64.ONE;
float err = (a - (s - v) * fp64.ONE) * fp64.ONE * fp64.ONE * fp64.ONE + (b - v);
#else
float v = s - a;
float err = (a - (s - v)) + (b - v);
#endif
return vec2(s, err);
}
vec2 twoSub(float a, float b) {
float s = (a - b);
#if defined(LUMA_FP64_CODE_ELIMINATION_WORKAROUND)
float v = (s * fp64.ONE - a) * fp64.ONE;
float err = (a - (s - v) * fp64.ONE) * fp64.ONE * fp64.ONE * fp64.ONE - (b + v);
#else
float v = s - a;
float err = (a - (s - v)) - (b + v);
#endif
return vec2(s, err);
}
vec2 twoSqr(float a) {
float prod = a * a;
vec2 a_fp64 = split(a);
#if defined(LUMA_FP64_CODE_ELIMINATION_WORKAROUND)
float err = ((a_fp64.x * a_fp64.x - prod) * fp64.ONE + 2.0 * a_fp64.x *
a_fp64.y * fp64.ONE * fp64.ONE) + a_fp64.y * a_fp64.y * fp64.ONE * fp64.ONE * fp64.ONE;
#else
float err = ((a_fp64.x * a_fp64.x - prod) + 2.0 * a_fp64.x * a_fp64.y) + a_fp64.y * a_fp64.y;
#endif
return vec2(prod, err);
}
vec2 twoProd(float a, float b) {
float prod = a * b;
vec2 a_fp64 = split(a);
vec2 b_fp64 = split(b);
float err = ((a_fp64.x * b_fp64.x - prod) + a_fp64.x * b_fp64.y +
a_fp64.y * b_fp64.x) + a_fp64.y * b_fp64.y;
return vec2(prod, err);
}
vec2 sum_fp64(vec2 a, vec2 b) {
vec2 s, t;
s = twoSum(a.x, b.x);
t = twoSum(a.y, b.y);
s.y += t.x;
s = quickTwoSum(s.x, s.y);
s.y += t.y;
s = quickTwoSum(s.x, s.y);
return s;
}
vec2 sub_fp64(vec2 a, vec2 b) {
vec2 s, t;
s = twoSub(a.x, b.x);
t = twoSub(a.y, b.y);
s.y += t.x;
s = quickTwoSum(s.x, s.y);
s.y += t.y;
s = quickTwoSum(s.x, s.y);
return s;
}
vec2 mul_fp64(vec2 a, vec2 b) {
vec2 prod = twoProd(a.x, b.x);
// y component is for the error
prod.y += a.x * b.y;
#if defined(LUMA_FP64_HIGH_BITS_OVERFLOW_WORKAROUND)
prod = split2(prod);
#endif
prod = quickTwoSum(prod.x, prod.y);
prod.y += a.y * b.x;
#if defined(LUMA_FP64_HIGH_BITS_OVERFLOW_WORKAROUND)
prod = split2(prod);
#endif
prod = quickTwoSum(prod.x, prod.y);
return prod;
}
vec2 div_fp64(vec2 a, vec2 b) {
float xn = 1.0 / b.x;
#if defined(LUMA_FP64_HIGH_BITS_OVERFLOW_WORKAROUND)
vec2 yn = mul_fp64(a, vec2(xn, 0));
#else
vec2 yn = a * xn;
#endif
float diff = (sub_fp64(a, mul_fp64(b, yn))).x;
vec2 prod = twoProd(xn, diff);
return sum_fp64(yn, prod);
}
vec2 sqrt_fp64(vec2 a) {
if (a.x == 0.0 && a.y == 0.0) return vec2(0.0, 0.0);
if (a.x < 0.0) return vec2(0.0 / 0.0, 0.0 / 0.0);
float x = 1.0 / sqrt(a.x);
float yn = a.x * x;
#if defined(LUMA_FP64_CODE_ELIMINATION_WORKAROUND)
vec2 yn_sqr = twoSqr(yn) * fp64.ONE;
#else
vec2 yn_sqr = twoSqr(yn);
#endif
float diff = sub_fp64(a, yn_sqr).x;
vec2 prod = twoProd(x * 0.5, diff);
#if defined(LUMA_FP64_HIGH_BITS_OVERFLOW_WORKAROUND)
return sum_fp64(split(yn), prod);
#else
return sum_fp64(vec2(yn, 0.0), prod);
#endif
}
`;
Loading

0 comments on commit 1c120a2

Please sign in to comment.