@luma.gl/shadertools
Version:
Shader module system for luma.gl
212 lines (195 loc) • 6.92 kB
JavaScript
// luma.gl
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors
export const fp64arithmeticWGSL = /* wgsl */ `\
struct Fp64ArithmeticUniforms {
ONE: f32,
SPLIT: f32,
};
@group(0) @binding(auto) var<uniform> fp64arithmetic : Fp64ArithmeticUniforms;
fn fp64_nan(seed: f32) -> f32 {
let nanBits = 0x7fc00000u | select(0u, 1u, seed < 0.0);
return bitcast<f32>(nanBits);
}
fn fp64_runtime_zero() -> f32 {
return fp64arithmetic.ONE * 0.0;
}
fn prevent_fp64_optimization(value: f32) -> f32 {
#ifdef LUMA_FP64_CODE_ELIMINATION_WORKAROUND
return value + fp64_runtime_zero();
#else
return value;
#endif
}
fn split(a: f32) -> vec2f {
let splitValue = prevent_fp64_optimization(fp64arithmetic.SPLIT + fp64_runtime_zero());
let t = prevent_fp64_optimization(a * splitValue);
let temp = prevent_fp64_optimization(t - a);
let aHi = prevent_fp64_optimization(t - temp);
let aLo = prevent_fp64_optimization(a - aHi);
return vec2f(aHi, aLo);
}
fn split2(a: vec2f) -> vec2f {
var b = split(a.x);
b.y = b.y + a.y;
return b;
}
fn quickTwoSum(a: f32, b: f32) -> vec2f {
#ifdef LUMA_FP64_CODE_ELIMINATION_WORKAROUND
let sum = prevent_fp64_optimization((a + b) * fp64arithmetic.ONE);
let err = prevent_fp64_optimization(b - (sum - a) * fp64arithmetic.ONE);
#else
let sum = prevent_fp64_optimization(a + b);
let err = prevent_fp64_optimization(b - (sum - a));
#endif
return vec2f(sum, err);
}
fn twoSum(a: f32, b: f32) -> vec2f {
let s = prevent_fp64_optimization(a + b);
#ifdef LUMA_FP64_CODE_ELIMINATION_WORKAROUND
let v = prevent_fp64_optimization((s * fp64arithmetic.ONE - a) * fp64arithmetic.ONE);
let err =
prevent_fp64_optimization((a - (s - v) * fp64arithmetic.ONE) *
fp64arithmetic.ONE *
fp64arithmetic.ONE *
fp64arithmetic.ONE) +
prevent_fp64_optimization(b - v);
#else
let v = prevent_fp64_optimization(s - a);
let err = prevent_fp64_optimization(a - (s - v)) + prevent_fp64_optimization(b - v);
#endif
return vec2f(s, err);
}
fn twoSub(a: f32, b: f32) -> vec2f {
let s = prevent_fp64_optimization(a - b);
#ifdef LUMA_FP64_CODE_ELIMINATION_WORKAROUND
let v = prevent_fp64_optimization((s * fp64arithmetic.ONE - a) * fp64arithmetic.ONE);
let err =
prevent_fp64_optimization((a - (s - v) * fp64arithmetic.ONE) *
fp64arithmetic.ONE *
fp64arithmetic.ONE *
fp64arithmetic.ONE) -
prevent_fp64_optimization(b + v);
#else
let v = prevent_fp64_optimization(s - a);
let err = prevent_fp64_optimization(a - (s - v)) - prevent_fp64_optimization(b + v);
#endif
return vec2f(s, err);
}
fn twoSqr(a: f32) -> vec2f {
let prod = prevent_fp64_optimization(a * a);
let aFp64 = split(a);
let highProduct = prevent_fp64_optimization(aFp64.x * aFp64.x);
let crossProduct = prevent_fp64_optimization(2.0 * aFp64.x * aFp64.y);
let lowProduct = prevent_fp64_optimization(aFp64.y * aFp64.y);
#ifdef LUMA_FP64_CODE_ELIMINATION_WORKAROUND
let err =
(prevent_fp64_optimization(highProduct - prod) * fp64arithmetic.ONE +
crossProduct * fp64arithmetic.ONE * fp64arithmetic.ONE) +
lowProduct * fp64arithmetic.ONE * fp64arithmetic.ONE * fp64arithmetic.ONE;
#else
let err = ((prevent_fp64_optimization(highProduct - prod) + crossProduct) + lowProduct);
#endif
return vec2f(prod, err);
}
fn twoProd(a: f32, b: f32) -> vec2f {
let prod = prevent_fp64_optimization(a * b);
let aFp64 = split(a);
let bFp64 = split(b);
let highProduct = prevent_fp64_optimization(aFp64.x * bFp64.x);
let crossProduct1 = prevent_fp64_optimization(aFp64.x * bFp64.y);
let crossProduct2 = prevent_fp64_optimization(aFp64.y * bFp64.x);
let lowProduct = prevent_fp64_optimization(aFp64.y * bFp64.y);
#ifdef LUMA_FP64_CODE_ELIMINATION_WORKAROUND
let err1 = (highProduct - prod) * fp64arithmetic.ONE;
let err2 = crossProduct1 * fp64arithmetic.ONE * fp64arithmetic.ONE;
let err3 = crossProduct2 * fp64arithmetic.ONE * fp64arithmetic.ONE * fp64arithmetic.ONE;
let err4 =
lowProduct *
fp64arithmetic.ONE *
fp64arithmetic.ONE *
fp64arithmetic.ONE *
fp64arithmetic.ONE;
#else
let err1 = highProduct - prod;
let err2 = crossProduct1;
let err3 = crossProduct2;
let err4 = lowProduct;
#endif
let err12InputA = prevent_fp64_optimization(err1);
let err12InputB = prevent_fp64_optimization(err2);
let err12 = prevent_fp64_optimization(err12InputA + err12InputB);
let err123InputA = prevent_fp64_optimization(err12);
let err123InputB = prevent_fp64_optimization(err3);
let err123 = prevent_fp64_optimization(err123InputA + err123InputB);
let err1234InputA = prevent_fp64_optimization(err123);
let err1234InputB = prevent_fp64_optimization(err4);
let err = prevent_fp64_optimization(err1234InputA + err1234InputB);
return vec2f(prod, err);
}
fn sum_fp64(a: vec2f, b: vec2f) -> vec2f {
var s = twoSum(a.x, b.x);
let t = twoSum(a.y, b.y);
s.y = prevent_fp64_optimization(s.y + t.x);
s = quickTwoSum(s.x, s.y);
s.y = prevent_fp64_optimization(s.y + t.y);
s = quickTwoSum(s.x, s.y);
return s;
}
fn sub_fp64(a: vec2f, b: vec2f) -> vec2f {
var s = twoSub(a.x, b.x);
let t = twoSub(a.y, b.y);
s.y = prevent_fp64_optimization(s.y + t.x);
s = quickTwoSum(s.x, s.y);
s.y = prevent_fp64_optimization(s.y + t.y);
s = quickTwoSum(s.x, s.y);
return s;
}
fn mul_fp64(a: vec2f, b: vec2f) -> vec2f {
var prod = twoProd(a.x, b.x);
let crossProduct1 = prevent_fp64_optimization(a.x * b.y);
prod.y = prevent_fp64_optimization(prod.y + crossProduct1);
#ifdef LUMA_FP64_HIGH_BITS_OVERFLOW_WORKAROUND
prod = split2(prod);
#endif
prod = quickTwoSum(prod.x, prod.y);
let crossProduct2 = prevent_fp64_optimization(a.y * b.x);
prod.y = prevent_fp64_optimization(prod.y + crossProduct2);
#ifdef LUMA_FP64_HIGH_BITS_OVERFLOW_WORKAROUND
prod = split2(prod);
#endif
prod = quickTwoSum(prod.x, prod.y);
return prod;
}
fn div_fp64(a: vec2f, b: vec2f) -> vec2f {
let xn = prevent_fp64_optimization(1.0 / b.x);
let yn = mul_fp64(a, vec2f(xn, fp64_runtime_zero()));
let diff = prevent_fp64_optimization(sub_fp64(a, mul_fp64(b, yn)).x);
let prod = twoProd(xn, diff);
return sum_fp64(yn, prod);
}
fn sqrt_fp64(a: vec2f) -> vec2f {
if (a.x == 0.0 && a.y == 0.0) {
return vec2f(0.0, 0.0);
}
if (a.x < 0.0) {
let nanValue = fp64_nan(a.x);
return vec2f(nanValue, nanValue);
}
let x = prevent_fp64_optimization(1.0 / sqrt(a.x));
let yn = prevent_fp64_optimization(a.x * x);
#ifdef LUMA_FP64_CODE_ELIMINATION_WORKAROUND
let ynSqr = twoSqr(yn) * fp64arithmetic.ONE;
#else
let ynSqr = twoSqr(yn);
#endif
let diff = prevent_fp64_optimization(sub_fp64(a, ynSqr).x);
let prod = twoProd(prevent_fp64_optimization(x * 0.5), diff);
#ifdef LUMA_FP64_HIGH_BITS_OVERFLOW_WORKAROUND
return sum_fp64(split(yn), prod);
#else
return sum_fp64(vec2f(yn, 0.0), prod);
#endif
}
`;
//# sourceMappingURL=fp64-arithmetic-wgsl.js.map