UNPKG

quaeratin

Version:

An extended precision floating point library (as per Shewchuk) - precision only limited by overflow / underflow

1,590 lines (1,449 loc) 65.5 kB
/******/ // The require scope /******/ var __webpack_require__ = {}; /******/ /************************************************************************/ /******/ /* webpack/runtime/define property getters */ /******/ (() => { /******/ // define getter functions for harmony exports /******/ __webpack_require__.d = (exports, definition) => { /******/ for(var key in definition) { /******/ if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) { /******/ Object.defineProperty(exports, key, { enumerable: true, get: definition[key] }); /******/ } /******/ } /******/ }; /******/ })(); /******/ /******/ /* webpack/runtime/hasOwnProperty shorthand */ /******/ (() => { /******/ __webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop)) /******/ })(); /******/ /************************************************************************/ var __webpack_exports__ = {}; // EXPORTS __webpack_require__.d(__webpack_exports__, { "ZS": () => (/* reexport */ bitLength), "pr": () => (/* reexport */ doubleToBinaryString), "xm": () => (/* reexport */ doubleToOctets), "uP": () => (/* reexport */ eAbs), "_L": () => (/* binding */ eAdd), "lG": () => (/* binding */ eAddDouble), "$0": () => (/* reexport */ eCalculate), "AM": () => (/* reexport */ eCompare), "Zx": () => (/* reexport */ e_compress_eCompress), "w1": () => (/* reexport */ eDiff), "sZ": () => (/* reexport */ eDiv), "lX": () => (/* reexport */ eDivBy2), "$A": () => (/* reexport */ eEstimate), "yl": () => (/* reexport */ eIntDiv), "K9": () => (/* reexport */ eIntPow), "lb": () => (/* reexport */ eIsInteger), "bN": () => (/* reexport */ eLongDivide), "xr": () => (/* binding */ eMult), "aB": () => (/* reexport */ eMultBy2), "kS": () => (/* reexport */ eMultByNeg2), "fX": () => (/* binding */ eMultDouble1), "xS": () => (/* binding */ eMultDouble2), "ab": () => (/* reexport */ eNegativeOf), "Fp": () => (/* reexport */ eProduct), "bx": () => (/* reexport */ eRem), "qv": () => (/* reexport */ e_sign_eSign), "g_": () => (/* reexport */ eSum), "XB": () => (/* reexport */ eToBitlength), "fN": () => (/* reexport */ eToDd), "kq": () => (/* reexport */ expBitLength), "Q7": () => (/* reexport */ expansionProduct), "ts": () => (/* reexport */ exponent), "xK": () => (/* reexport */ fastExpansionSum), "Fz": () => (/* reexport */ fastTwoDiff), "vz": () => (/* reexport */ fast_two_sum_fastTwoSum), "kx": () => (/* reexport */ getHighestSetBit), "hK": () => (/* reexport */ getLowestSetBit), "Yj": () => (/* reexport */ growExpansion), "Ku": () => (/* reexport */ isAdjacent), "m5": () => (/* reexport */ isBitAligned), "V1": () => (/* reexport */ isNonOverlappingAll), "pt": () => (/* reexport */ lsbExponent), "Jw": () => (/* reexport */ msbExponent), "Gn": () => (/* binding */ operators), "IW": () => (/* reexport */ orient2d), "Ds": () => (/* reexport */ parseDouble), "JO": () => (/* reexport */ parseDoubleDetailed), "S4": () => (/* reexport */ reduceSignificand), "Fs": () => (/* reexport */ scaleExpansion), "R9": () => (/* reexport */ scaleExpansion2), "aK": () => (/* reexport */ significand), "Vl": () => (/* reexport */ split), "BH": () => (/* reexport */ twoDiff), "Q6": () => (/* reexport */ two_product_twoProduct), "d9": () => (/* reexport */ two_sum_twoSum) }); ;// CONCATENATED MODULE: ./src/double-expansion/e-sign.ts /** * Returns the sign of the given expansion. * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * From Shewchuk: "A nonoverlapping expansion is desirable because it is easy to * determine its sign (take the sign of the largest component) ... " * * @param e A floating point expansion with zeroes eliminated. */ function e_sign_eSign(e) { return e[e.length - 1]; } ;// CONCATENATED MODULE: ./src/double-representation/double-to-octets.ts // Modified from https://github.com/bartaz/ieee754-visualization/ // under the MIT license // Copyright 2013 Bartek Szopka (original author) /** * Returns the ieee-574 8 bytes composing the given double, starting from the * sign bit and ending in the lsb of the significand. * e.g. 123.456 -> [64, 94, 221, 47, 26, 159, 190, 119] */ function doubleToOctets(number) { const buffer = new ArrayBuffer(8); new DataView(buffer).setFloat64(0, number, false); return Array.from(new Uint8Array(buffer)); } ;// CONCATENATED MODULE: ./src/double-representation/double-to-binary-string.ts // Modified from https://github.com/bartaz/ieee754-visualization/ // under the MIT license // Copyright 2013 Bartek Szopka (original author) function doubleToBinaryString(number) { return octetsToBinaryString(doubleToOctets(number)); } /** * @param octets The 8 bytes composing a double (msb first) */ function octetsToBinaryString(octets) { return octets .map(int8ToBinaryString) .join(''); } /** * intToBinaryString(8) -> "00001000" */ function int8ToBinaryString(i) { let iStr = i.toString(2); for (; iStr.length < 8; iStr = "0" + iStr) ; return iStr; } ;// CONCATENATED MODULE: ./src/double-representation/parse-double.ts // Modified from https://github.com/bartaz/ieee754-visualization/ // under the MIT license // Copyright 2013 Bartek Szopka (original author) /** * Returns the relevant parts of the given IEEE-754 double. The returned * exponent has been normalized (i.e. 1023 ha been subtracted) and the * significand has the hidden bit added if appropriate. * See https://github.com/bartaz/ieee754-visualization */ function parseDouble(x) { const parts = doubleToOctets(x); const p0 = parts[0]; const p1 = parts[1]; const sign = p0 >> 7; const exponent_ = ((p0 & 127) << 4) + ((p1 & 0b11110000) >> 4); //---- Check for negative / positive zero / denormalized numbers. const hiddenMsb = exponent_ === 0 ? 0 : 16; // Note: exponent === 0 => 0 or denormalized number (a.k.a. subnormal number). const exponent = exponent_ === 0 ? exponent_ - 1022 // Subnormals use a biased exponent of 1 (not 0!) : exponent_ - 1023; //---- Break up the significand into bytes const significand = parts.slice(1); significand[0] = (p1 & 15) + hiddenMsb; return { sign, exponent, significand }; } /** * Returns the relevant parts of the given IEEE-754 double. * See https://github.com/bartaz/ieee754-visualization. * This is a slower version of parseDouble that gives binary string * representations of the components. */ function parseDoubleDetailed(x) { const str = doubleToBinaryString(x); // sign{1} exponent{11} fraction{52} === 64 bits (+1 hidden!) const [, sign, exponent, significand] = str.match(/^(.)(.{11})(.{52})$/); const exponent_ = parseInt(exponent, 2); const hidden = exponent_ === 0 ? "0" : "1"; return { full: sign + exponent + hidden + significand, sign, exponent, hidden, significand }; } ;// CONCATENATED MODULE: ./src/double-representation/significand.ts /** * Return the significand of the given double with the hidden bit added (in case * a is not subnormal or 0, etc.) * @param a A double */ function significand(a) { return parseDouble(a).significand; } ;// CONCATENATED MODULE: ./src/double-representation/get-max-set-bit.ts /** * Returns the lowest set bit of the given value in [1, (2**31)-1], * i.e. from 1 up to 2147483647 else if no bit is set (input === 0) returns * NaN, otherwise if the number is out of range returns a non-finite * number. * See https://stackoverflow.com/a/35190288/2010061 */ function getLowestSetBit_(a) { return Math.log2(a & -a); } /** * Returns the lowest set bit of the given number's significand (where the lsb * is bit 0 and the msb is bit 52). If no bit is set (input === 0 or +-inf or * NaN) returns NaN. * See https://stackoverflow.com/a/35190288/2010061 */ function getLowestSetBit(a) { if (a === 0 || !Number.isFinite(a)) { // There is no lowest set bit return NaN; } // Note: the significand includes the hidden bit! const s = significand(a); const len = s.length; for (let i = len - 1; i >= 0; i--) { if (s[i] === 0) { continue; } const l = getLowestSetBit_(s[i]); if (Number.isFinite(l)) { return (8 * (len - i - 1)) + l; } } return NaN; } /** * Returns the highest set bit of the given value in [1, 255], i.e. from 1 up * to 255. If the input number === 0 returns NaN. * See https://stackoverflow.com/a/35190288/2010061 */ function getHighestSetBit_(a) { return a >= 128 ? 7 : a >= 64 ? 6 : a >= 32 ? 5 : a >= 16 ? 4 : a >= 8 ? 3 : a >= 4 ? 2 : a >= 2 ? 1 : a >= 1 ? 0 : NaN; } /** * Returns the highest set bit of the given double. If no bit is set (input * === 0 or +/-inf or NaN) returns NaN. * See https://stackoverflow.com/a/35190288/2010061 */ function getHighestSetBit(a) { if (a === 0 || !Number.isFinite(a)) { // There is no lowest set bit return NaN; } // At this point there must be a highest set bit (always === 52 if the // number is not a subnormal. const s = significand(a); const len = s.length; for (let i = 0; i < len; i++) { const l = getHighestSetBit_(s[i]); if (Number.isFinite(l)) { return (8 * (len - i - 1)) + l; } } return NaN; } ;// CONCATENATED MODULE: ./src/double-representation/exponent.ts /** * Returns the normalized exponent of the given number. * @param a A double */ function exponent(a) { return parseDouble(a).exponent; } ;// CONCATENATED MODULE: ./src/double-representation/msb-exponent.ts /** * Returns the true exponent of the msb that is set of the given number or * NaN if a === 0 or +-inf or NaN. * @param a An array of numbers to check */ function msbExponent(a) { if (a === 0 || !Number.isFinite(a)) { return NaN; } const e = exponent(a); // Will return e for all but subnormal numbers return getHighestSetBit(a) - 52 + e; } ;// CONCATENATED MODULE: ./src/double-expansion/e-compress.ts /** * Returns the result of compressing the given floating point expansion. * * * primarily for internal library use * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * Theorem 23 (Shewchuck): Let e = sum_(i=1)^m(e_i) be a nonoverlapping * expansion of m p-bit components, where m >= 3. Suppose that the components of * e are sorted in order of increasing magnitude, except that any of the e_i may * be zero. Then the following algorithm will produce a nonoverlapping expansion * (nonadjacent if round-to even tiebreaking is used) such that * h = sum_(i=1)^n(h_i) = e, where the components h_i are in order of increasing * magnitude. If h != 0, none of the h_i will be zero. Furthermore, the largest * component h_n approximates h with an error smaller than ulp(h_n). */ function e_compress_eCompress(e) { //return e; const e_ = e.slice(); const m = e_.length; if (m === 1) { return e_; } let Q = e_[m - 1]; let bottom = m; for (let i = m - 2; i >= 0; --i) { const a = Q; const b = e_[i]; Q = a + b; const bv = Q - a; const q = b - bv; if (q) { e_[--bottom] = Q; Q = q; } } let top = 0; for (let i = bottom; i < m; ++i) { const a = e_[i]; const b = Q; Q = a + b; const bv = Q - a; const q = b - bv; if (q) { e_[top++] = q; } } e_[top++] = Q; e_.length = top; return e_; } ;// CONCATENATED MODULE: ./src/basic/reduce-significand.ts /** * Truncates a floating point value's significand and returns the result. * Similar to split, but with the ability to specify the number of bits to keep. * * Theorem 17 (Veltkamp-Dekker): Let a be a p-bit floating-point number, where * p >= 3. Choose a splitting point s such that p/2 <= s <= p-1. Then the * following algorithm will produce a (p-s)-bit value a_hi and a * nonoverlapping (s-1)-bit value a_lo such that abs(a_hi) >= abs(a_lo) and * a = a_hi + a_lo. * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param a a double * @param bits the number of significand bits to leave intact */ function reduceSignificand(a, bits) { const s = 53 - bits; const f = 2 ** s + 1; const c = f * a; const r = c - (c - a); return r; } ;// CONCATENATED MODULE: ./src/double-expansion/e-to-bitlength.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const sign = e_sign_eSign; const compress = e_compress_eCompress; /** * Returns a floating point expansion accurate to the given number of bits. * Extraneous bits are discarded. * @param a a floating point expansion * @param l the number of accurate bits to keep */ // TODO - make faster function eToBitlength(a, l) { a = compress(a); if (sign(a) === 0) { return [0]; } const maxMsb = msbExponent(a[a.length - 1]); let msb = maxMsb; let i = a.length - 1; // start at most significant byte while (i > 0) { const msb_ = msbExponent(a[i - 1]); if (maxMsb - msb_ > l) { break; } msb = msb_; i--; } const keepBits = Math.min(l - (maxMsb - msb), 53); let b = a[i]; b = reduceSignificand(b, keepBits); const result = a.slice(i); result[0] = b; return result; } ;// CONCATENATED MODULE: ./src/double-expansion/e-estimate.ts /** * Returns the result of the given floating point expansion rounded to a double * floating point number. * * The result is within 1 ulps of the actual value, e.g. imagine the worst case * situation where we add (in 4dot4) 1111.1000 + 0.000011111111... The result * will be 1111.1000 whereas as the correct result should be 1111.1001 and we * thus lost 1 ulp of accuracy. It does not matter that the expansion contain * several floats since none is overlapping. * * See Shewchuk https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf * * @param e a floating point expansion */ function eEstimate(e) { let Q = e[0]; for (let i = 1; i < e.length; i++) { Q += e[i]; } return Q; } ;// CONCATENATED MODULE: ./src/double-expansion/fast-expansion-sum.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const fast_expansion_sum_compress = (/* unused pure expression or super */ null && (eCompress)); /** * Returns the result of adding two expansions. * * Theorem 13: Let e = sum_(i=1)^m(e_i) and f = sum_(i=1)^n(f_i) be strongly * nonoverlapping expansions of m and n p-bit components, respectively, where * p >= 4. Suppose that the components of both e and f are sorted in order of * increasing magnitude, except that any of the e_i or f_i may be zero. On a * machine whose arithmetic uses the round-to-even rule, the following algorithm * will produce a strongly nonoverlapping expansion h such that * sum_(i=1)^(m+n)(e_i + f_i) = e + f, where the components of h are also in * order of increasing magnitude, except that any of the h_i may be zero. * * See https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf */ function fastExpansionSum(e, f) { //const g = merge(e,f); // inlined (above line) const lenE = e.length; const lenF = f.length; let i = 0; let j = 0; const g = []; while (i < lenE && j < lenF) { if (e[i] === 0) { i++; continue; } if (f[j] === 0) { j++; continue; } if (Math.abs(e[i]) <= Math.abs(f[j])) { g.push(e[i]); i++; } else { g.push(f[j]); j++; } } while (i < lenE) { g.push(e[i]); i++; } while (j < lenF) { g.push(f[j]); j++; } if (g.length === 0) { return [0]; } // end inlined const len = g.length; if (len === 1) { return g; } //const h: number[] = new Array(len); const h = []; //const q: number; //[h[0], q] = fastTwoSum(g[1], g[0]); // inlined (above line) const a = g[1]; const b = g[0]; let q = a + b; //h[0] = b - (q - a); const hh = b - (q - a); if (hh !== 0) { h.push(hh); } //let j = 0; j = 0; for (let i = 2; i < len; i++) { //[h[i-1], q] = twoSum(q, g[i]); // inlined (above line) const b = g[i]; const R = q + b; const _ = R - q; //h[i-1] = (q - (R - _)) + (b - _); const hh = (q - (R - _)) + (b - _); if (hh !== 0) { h.push(hh); } q = R; } //h[len-1] = q; //h.push(q); if (q !== 0 || h.length === 0) { h.push(q); } //return compress(h); return h; } /** * Returns the result of merging an expansion e and f into a single expansion, * in order of nondecreasing magnitude (possibly with interspersed zeros). * (This function is zero-eliminating) * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param e a floating point expansion * @param f another floating point expansion */ function merge(e, f) { const lenE = e.length; const lenF = f.length; let i = 0; let j = 0; const merged = []; while (i < lenE && j < lenF) { if (e[i] === 0) { i++; continue; } if (f[j] === 0) { j++; continue; } if (Math.abs(e[i]) <= Math.abs(f[j])) { merged.push(e[i]); i++; } else { merged.push(f[j]); j++; } } while (i < lenE) { merged.push(e[i]); i++; } while (j < lenF) { merged.push(f[j]); j++; } if (merged.length === 0) { return [0]; } return merged; } ;// CONCATENATED MODULE: ./src/double-expansion/scale-expansion.ts const f = 134217729; // 2**27 + 1; // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const tp = (/* unused pure expression or super */ null && (twoProduct)); const ts = (/* unused pure expression or super */ null && (twoSum)); const fts = (/* unused pure expression or super */ null && (fastTwoSum)); const scale_expansion_compress = (/* unused pure expression or super */ null && (eCompress)); /** * Returns the result of multiplying an expansion by a double. * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * Theorem 19 (Shwechuk): Let e = sum_(i=1)^m(e_i) be a nonoverlapping expansion * of m p-bit components, and const b be a p-bit value where p >= 4. Suppose that * the components of e are sorted in order of increasing magnitude, except that * any of the e_i may be zero. Then the following algorithm will produce a * nonoverlapping expansion h such that h = sum_(i=1)^(2m)(h_i) = be, where the * components of h are also in order of increasing magnitude, except that any of * the h_i may be zero. Furthermore, if e is nonadjacent and round-to-even * tiebreaking is used, then h is non-adjacent. * * @param e a double floating point expansion * @param b a double */ function scaleExpansion(e, b) { const m = e.length; //const h: number[] = new Array(2*m); let q_; //[h[0], q] = tp(e[0], b); // inlined (above line) const a = e[0]; let q = a * b; const c = f * a; const ah = c - (c - a); const al = a - ah; const d = f * b; const bh = d - (d - b); const bl = b - bh; const h = []; //h[0] = (al*bl) - ((q - (ah*bh)) - (al*bh) - (ah*bl)); const hh = (al * bl) - ((q - (ah * bh)) - (al * bh) - (ah * bl)); if (hh !== 0) { h.push(hh); } for (let i = 1; i < m; i++) { //const [t, T] = tp(e[i], b); // inlined (above line) const a = e[i]; const T = a * b; const c = f * a; const ah = c - (c - a); const al = a - ah; const d = f * b; const bh = d - (d - b); const bl = b - bh; const t = (al * bl) - ((T - (ah * bh)) - (al * bh) - (ah * bl)); //[h[2*i-1], q_] = ts(q, t); // inlined (above line) const x = q + t; const bv = x - q; //h[2*i-1] = (q - (x - bv)) + (t - bv); //h.push((q - (x - bv)) + (t - bv)); const hh = (q - (x - bv)) + (t - bv); if (hh !== 0) { h.push(hh); } q_ = x; //[h[2*i], q] = fts(T, q_); // inlined (above line) const xx = T + q_; //h[2*i] = q_ - (xx - T); //h.push(q_ - (xx - T)); const hhh = q_ - (xx - T); if (hhh !== 0) { h.push(hhh); } q = xx; } //h[2*m - 1] = q; //h.push(q); if (q !== 0 || h.length === 0) { h.push(q); } //return eCompress(h); return h; } /** * Returns the result of multiplying an expansion by a double. * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * Theorem 19 (Shwechuk): Let e = sum_(i=1)^m(e_i) be a nonoverlapping expansion * of m p-bit components, and const b be a p-bit value where p >= 4. Suppose that * the components of e are sorted in order of increasing magnitude, except that * any of the e_i may be zero. Then the following algorithm will produce a * nonoverlapping expansion h such that h = sum_(i=1)^(2m)(h_i) = be, where the * components of h are also in order of increasing magnitude, except that any of * the h_i may be zero. Furthermore, if e is nonadjacent and round-to-even * tiebreaking is used, then h is non-adjacent. * * @param e a double floating point expansion * @param b a double */ function scaleExpansion2(b, e) { const m = e.length; //const h: number[] = new Array(2*m); let q_; //[h[0], q] = tp(e[0], b); // inlined (above line) const a = e[0]; let q = a * b; const c = f * a; const ah = c - (c - a); const al = a - ah; const d = f * b; const bh = d - (d - b); const bl = b - bh; const h = []; //h[0] = (al*bl) - ((q - (ah*bh)) - (al*bh) - (ah*bl)); const hh = (al * bl) - ((q - (ah * bh)) - (al * bh) - (ah * bl)); if (hh !== 0) { h.push(hh); } for (let i = 1; i < m; i++) { //const [t, T] = tp(e[i], b); // inlined (above line) const a = e[i]; const T = a * b; const c = f * a; const ah = c - (c - a); const al = a - ah; const d = f * b; const bh = d - (d - b); const bl = b - bh; const t = (al * bl) - ((T - (ah * bh)) - (al * bh) - (ah * bl)); //[h[2*i-1], q_] = ts(q, t); // inlined (above line) const x = q + t; const bv = x - q; //h[2*i-1] = (q - (x - bv)) + (t - bv); //h.push((q - (x - bv)) + (t - bv)); const hh = (q - (x - bv)) + (t - bv); if (hh !== 0) { h.push(hh); } q_ = x; //[h[2*i], q] = fts(T, q_); // inlined (above line) const xx = T + q_; //h[2*i] = q_ - (xx - T); //h.push(q_ - (xx - T)); const hhh = q_ - (xx - T); if (hhh !== 0) { h.push(hhh); } q = xx; } //h[2*m - 1] = q; //h.push(q); if (q !== 0 || h.length === 0) { h.push(q); } //return eCompress(h); return h; } ;// CONCATENATED MODULE: ./src/double-expansion/expansion-product.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const multByDouble = scaleExpansion; const add = fastExpansionSum; const expansion_product_compress = (/* unused pure expression or super */ null && (eCompress)); /** * Returns the product of two double floating point expansions. * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * As per Shewchuk in the above paper: "To find the product of two expansions * e and f, use SCALE-EXPANSION (with zero elimination) to form the expansions * ef_1, ef_2, ..., then sum these using a distillation tree." * * A distillation tree used with fastExpansionSum will give O(k*log k) vs O(k^2) * operations. * * Implemented naively and not as described by Shewchuk (i.e. the algorithm * takes O(k^2) operations). * @param e a double floating point expansion * @param f another double floating point expansion */ function expansionProduct(e, f) { let sum = [0]; for (let i = 0; i < e.length; i++) { sum = add(sum, multByDouble(f, e[i])); } //return compress(sum); return sum; } ;// CONCATENATED MODULE: ./src/double-expansion/e-negative-of.ts /** * Returns the negative of the given floating point expansion. * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param e a floating point expansion */ function eNegativeOf(e) { const m = e.length; const h = new Array(m); for (let i = 0; i < m; i++) { h[i] = -e[i]; } return h; } ;// CONCATENATED MODULE: ./src/double-expansion/e-diff.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const negativeOf = eNegativeOf; const e_diff_add = fastExpansionSum; /** * Returns the difference between two floating point expansions, i.e. e - f. * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param e a floating point expansion * @param f another floating point expansion */ function eDiff(e, f) { const g = negativeOf(f); return e_diff_add(e, g); } ;// CONCATENATED MODULE: ./src/double-representation/bit-length.ts /** * Returns the bit-length of the significand of the given number in such a way * that trailing zeros are not counted. * @param a A double precision floating point number */ function bitLength(a) { if (a === 0) { return 0; } return getHighestSetBit(a) - getLowestSetBit(a) + 1; } /** * Returns the bit-length of the significand of the given floating point * expansion in such a way that trailing zeros are not counted. * * precondition: subnormals not currently supported * @param a A double precision floating point expansion */ function expBitLength(a) { const a_ = e_compress_eCompress(a); if (e_sign_eSign(a_) === 0) { return 0; } const msbyte = a_[a_.length - 1]; const lsbyte = a_[0]; return exponent(msbyte) - exponent(lsbyte) + (53 - getLowestSetBit(lsbyte)); } ;// CONCATENATED MODULE: ./src/double-expansion/e-div.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const mult = expansionProduct; const toBitlength = eToBitlength; const e_div_bitLength = expBitLength; const diff = eDiff; const estimate = eEstimate; /** * Returns the result of a/b using Goldschmidt division. * * The result will only be exact if b|a, i.e. if b divides a exactly, else the * result will be rounded to the longest bitlength between a and b. * * @param a the numerator * @param b the denominator * * @param expansionLength the bitlength/53 of the final result, e.g. 1 means * standard double precision, 2 means double-double, etc up to a max of about 20 at * which point underflow cease precision improvement. If the division is known * to be exact beforehand (such as in the pseudo remainder sequence algorithm) * then set expansionLength === 0 and an exact division will be done. */ // TODO - test this function properly or replace with a better one function eDiv(N, D, expansionLength) { let D_ = D; let N_ = N; let exact = false; let resultBitlengthUpperBound = 0; if (!expansionLength) { const bitlengthN = e_div_bitLength(N_); const bitlengthD = e_div_bitLength(D_); // resultBitlengthUpperBound is only valid if the division is known // to be exact resultBitlengthUpperBound = bitlengthN - bitlengthD + 1; expansionLength = (resultBitlengthUpperBound / 53) + 1; exact = true; } let F = [1 / estimate(D_)]; // Initial guess - out by 1/2 upls let i = 1; while (true) { N_ = mult(N_, F); // The precision bitlength doubles on each iteration if (i > expansionLength) { // we now have roughly double the needed precision - we actually // only require about the precision and then round properly - this // could be implemented in the future. if (exact) { // We must throw away bits known to be zero. // Any bits > expansionLength * 53 must be thrown away as they // are wrong - all other bits are exact. N_ = toBitlength(N_, resultBitlengthUpperBound); // TODO - below is just for testing - remove later //if (compare(mult(D, N_), N) !== 0) { // console.log(mult(D, N_)) // throw new Error(`division in-exact - probably due to underflow, N: ${N}, D: ${D}, Result: ${N_}, product: ${mult(D, N_)}`); //} return N_; } // Returning only significant bits helps with sign determination later on. return N_.slice(N_.length - expansionLength, N_.length); } D_ = mult(D_, F); F = diff([2], D_); i *= 2; } } ;// CONCATENATED MODULE: ./src/double-expansion/grow-expansion.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const grow_expansion_compress = (/* unused pure expression or super */ null && (eCompress)); /** * Returns the result of adding a double to an expansion. * * Let e be a nonoverlapping expansion of m p-bit components, and let b be a * p-bit value where p >= 3. Suppose that the components e_1, ..., e_m are * sorted in order of *increasing* magnitude, except that any of the ei may be * zero. * Then the following algorithm will produce a nonoverlapping expansion such * that h = sum_i(h_i) = e + b, where the components h_1, ..., h_(m+1) are also * in order of increasing magnitude, except that any of the h_i may be zero. * Furthermore, if e is nonadjacent and round-to-even tiebreaking is used, then * h is nonadjacent. * See https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf * @param e A floating point expansion * @param b Another floating point expansion */ function growExpansion(e, b) { const m = e.length; let q = b; //const h: number[] = new Array(m+1); const h = []; //let j = 0; for (let i = 0; i < m; i++) { // Note the use of twoSum and not fastTwoSum. //[h[i], q] = ts(q, e[i]); const ee = e[i]; const x = q + ee; const bv = x - q; const hh = (q - (x - bv)) + (ee - bv); if (hh !== 0) { h.push(hh); } q = x; } //h[j] = q; if (q !== 0 || h.length === 0) { h.push(q); } //return compress(h); return h; } ;// CONCATENATED MODULE: ./src/basic/two-sum.ts /** * Returns the exact result of adding two doubles. * * * the resulting array is the reverse of the standard twoSum in the literature. * * Theorem 7 (Knuth): Let a and b be p-bit floating-point numbers. Then the * following algorithm will produce a nonoverlapping expansion x + y such that * a + b = x + y, where x is an approximation to a + b and y is the roundoff * error in the calculation of x. * * See https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf */ function two_sum_twoSum(a, b) { const x = a + b; const bv = x - a; return [(a - (x - bv)) + (b - bv), x]; } // inlined //const R = a + b; const _ = R - a; const r = (a - (R - _)) + (b - _); return [r,R] ;// CONCATENATED MODULE: ./src/double-expansion/e-sum.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const e_sum_ts = two_sum_twoSum; const addDouble = growExpansion; const e_sum_add = fastExpansionSum; /** * Returns the result of summing an array of floating point expansions. * * * The result is exact in the form of a non-overlapping floating point * expansion. * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param terms An array of numbers to be summed; A term is represented by a * floating point expansion. */ // The terms parameter were chosen to always be expansions in order to keep the // function monomorhic, but whether it's really worth it I am not sure. function eSum(terms) { let total = [0]; for (let i = 0; i < terms.length; i++) { const term = terms[i]; // add if (term.length === 1) { if (total.length === 1) { total = e_sum_ts(total[0], term[0]); } else { total = addDouble(total, term[0]); } } else { if (total.length === 1) { total = addDouble(term, total[0]); } else { total = e_sum_add(total, term); } } } return total; } ;// CONCATENATED MODULE: ./src/double-expansion/e-long-divide.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const e_long_divide_eNegativeOf = eNegativeOf; const e_long_divide_fastExpansionSum = fastExpansionSum; const e_long_divide_eCompress = e_compress_eCompress; const e_long_divide_growExpansion = growExpansion; const e_long_divide_eSum = eSum; const e_long_divide_scaleExpansion = scaleExpansion; const e_long_divide_eDiff = eDiff; const e_long_divide_sign = Math.sign; function eLongDivide(N, D) { N = e_long_divide_eCompress(N); D = e_long_divide_eCompress(D); // get the most significant double // out by at most 1 ulp, exact if d < MAX_SAFE_INT const d = D[D.length - 1]; // trivial cases if (D.length === 1) { if (d === 0) { throw new Error('division by zero'); } if (d === 1) { return { div: N, rem: [0] }; } if (d === -1) { return { div: e_long_divide_eNegativeOf(N), rem: [0] }; } } const signN = e_long_divide_sign(N[N.length - 1]); if (signN === 0) { return { div: [0], rem: [0] }; } const signD = e_long_divide_sign(d); const divs = []; let oldLen = 0; while (true) { const rems = []; // loop from big `n[i]` to small `n[i]` for (let i = N.length - 1; i >= 0; i--) { const n = N[i]; // `n % d` is the exact rem (for rem < MAX_SAFE_INTEGER) but is preliminary // as it is subject to round-off for rem > MAX_SAFE_INTEGER; thus out by at // most 1/2 ulp // Due to roundoff (and the fact we'e using `d` and not `D`!), `_div` does // not necessarily represent the exact quotient. const div = Math.round((n - (n % d)) / d); // get the remainder by calculating `rem = n - d*div` rems.push(e_long_divide_scaleExpansion(D, div)); // exact if (div === 0) { break; } divs.push(div); } N = e_long_divide_eCompress(e_long_divide_eDiff(N, e_long_divide_eSum(rems))); if (oldLen === divs.length) { break; } oldLen = divs.length; } let rem = N; let div = [0]; for (let i = 0; i < divs.length; i++) { div = e_long_divide_growExpansion(div, divs[i]); } div = e_long_divide_eCompress(div); //---------------------- // fix signs (possibly) //---------------------- //const signDiv = sign(div[div.length-1]); const signRem = e_long_divide_sign(rem[rem.length - 1]); //const signND = signN * signD; // We must have: // sign(div) === sign(n) * sign(d) // sign(rem) === sign(n) // At this point: `signN !== 0` and `signD !== 0` if (signRem !== 0 && signRem !== signN) { if (signN > 0) { if (signD > 0) { // div = div - 1 (div is positive) // rem = rem + D div = e_long_divide_growExpansion(div, -1); rem = e_long_divide_fastExpansionSum(rem, D); } else { // div = div + 1 (div is positive) // rem = rem - D div = e_long_divide_growExpansion(div, +1); rem = e_long_divide_fastExpansionSum(rem, e_long_divide_eNegativeOf(D)); } } else if (signN < 0) { if (signD > 0) { // div = div + 1 (div is negative) // rem = rem - D div = e_long_divide_growExpansion(div, +1); rem = e_long_divide_fastExpansionSum(rem, e_long_divide_eNegativeOf(D)); } else { // div = div - 1 (div is positive) // rem = rem + D div = e_long_divide_growExpansion(div, -1); rem = e_long_divide_fastExpansionSum(rem, D); } } } return { div, rem }; } ;// CONCATENATED MODULE: ./src/double-expansion/e-int-div.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const e_int_div_eLongDivide = eLongDivide; /** * Returns the result of the integer division a/b. * * * **precondition:** a and b must be integers, b !== 0 */ function eIntDiv(a, b) { return e_int_div_eLongDivide(a, b).div; } ;// CONCATENATED MODULE: ./src/double-expansion/e-rem.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const e_rem_eLongDivide = eLongDivide; /** * Returns a % b * * * **precondition:** a and b must be integers, b !== 0 */ function eRem(a, b) { return e_rem_eLongDivide(a, b).rem; } ;// CONCATENATED MODULE: ./src/double-expansion/e-compare.ts /** * Returns 0 if a === b, a +tive value if a > b or a negative value if a < b. * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * "The easiest way to compare two expansions is to subtract one from the other, * and test the sign of the result. An expansion’s sign can be easily tested * because of the nonoverlapping property; simply check the sign of the * expansion's most significant nonzero component..." * * @param a a floating point expansion * @param b another floating point expansion */ function eCompare(a, b) { return e_sign_eSign(eDiff(a, b)); } ;// CONCATENATED MODULE: ./src/double-expansion/e-abs.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const e_abs_sign = (/* unused pure expression or super */ null && (eSign)); const e_abs_negativeOf = eNegativeOf; /** * Returns the absolute value of the given floating point expansion. * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param e a floating point expansion */ function eAbs(e) { if (e[e.length - 1] < 0) { return e_abs_negativeOf(e); } return e; } ;// CONCATENATED MODULE: ./src/basic/fast-two-diff.ts /** * Returns the difference and exact error of subtracting two floating point * numbers. * Uses an EFT (error-free transformation), i.e. a-b === x+y exactly. * The returned result is a non-overlapping expansion (smallest value first!). * * Precondition: abs(a) >= abs(b) - A fast test that can be used is * (a > b) === (a > -b) * * See https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf */ function fastTwoDiff(a, b) { const x = a - b; const y = (a - x) - b; return [y, x]; } ;// CONCATENATED MODULE: ./src/basic/fast-two-sum.ts /** * Returns the sum and exact error of adding two floating point numbers. * Uses an EFT (error-free transformation), i.e. a+b === x+y exactly. * The returned sum is a non-overlapping expansion (smallest value first!). * * Precondition: abs(a) >= abs(b) - A fast test that can be used is * (a > b) === (a > -b) * * See https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf */ function fast_two_sum_fastTwoSum(a, b) { const x = a + b; return [b - (x - a), x]; } // inlined //const R = a + b; const r = b - (R - a); return [r, R]; ;// CONCATENATED MODULE: ./src/double-expansion/e-mult-by-2.ts /** * Returns the result of multiplying a floating point expansion by 2. * * **error free** * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param e a floating point expansion */ function eMultBy2(e) { const e_ = []; for (let i = 0; i < e.length; i++) { e_.push(2 * e[i]); } return e_; } ;// CONCATENATED MODULE: ./src/double-expansion/e-mult-by-neg-2.ts /** * Multiply a floating point expansion by -2. * * **error free** * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param e a floating point expansion */ function eMultByNeg2(e) { const e_ = []; for (let i = 0; i < e.length; i++) { e_.push(-2 * e[i]); } return e_; } ;// CONCATENATED MODULE: ./src/double-expansion/e-div-by-2.ts /** * Returns the result of dividing a floating point expansion by 2. * * **error free** * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param e a floating point expansion */ function eDivBy2(e) { const e_ = []; for (let i = 0; i < e.length; i++) { e_.push(0.5 * e[i]); } return e_; } ;// CONCATENATED MODULE: ./src/basic/split.ts /** * === Math.ceil(p/2) where p is the # of significand bits in a double === 53. */ const split_f = 134217729; // 2**27 + 1; /** * Returns the result of splitting a double into 2 26-bit doubles. * * Theorem 17 (Veltkamp-Dekker): Let a be a p-bit floating-point number, where * p >= 3. Choose a splitting point s such that p/2 <= s <= p-1. Then the * following algorithm will produce a (p-s)-bit value a_hi and a * nonoverlapping (s-1)-bit value a_lo such that abs(a_hi) >= abs(a_lo) and * a = a_hi + a_lo. * * see e.g. [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * @param a A double floating point number */ function split(a) { const c = split_f * a; const a_h = c - (c - a); const a_l = a - a_h; return [a_h, a_l]; } // inlined - input a, output a_h, a_l // const c = f * a; const a_h = c - (c - a); const a_l = a - a_h; return [a_h, a_l]; ;// CONCATENATED MODULE: ./src/basic/two-diff.ts /** * Returns the exact result of subtracting b from a (as a floating point * expansion). * @param a * @param b */ function twoDiff(a, b) { const x = a - b; const bvirt = a - x; const y = (a - (x + bvirt)) + (bvirt - b); return [y, x]; } ;// CONCATENATED MODULE: ./src/basic/two-product.ts const two_product_f = 134217729; // 2**27 + 1; /** * Returns the exact result of multiplying two doubles. * * * the resulting array is the reverse of the standard twoSum in the literature. * * Theorem 18 (Shewchuk): Let a and b be p-bit floating-point numbers, where * p >= 6. Then the following algorithm will produce a nonoverlapping expansion * x + y such that ab = x + y, where x is an approximation to ab and y * represents the roundoff error in the calculation of x. Furthermore, if * round-to-even tiebreaking is used, x and y are non-adjacent. * * See https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf * @param a A double * @param b Another double */ function two_product_twoProduct(a, b) { const x = a * b; //const [ah, al] = split(a); const c = two_product_f * a; const ah = c - (c - a); const al = a - ah; //const [bh, bl] = split(b); const d = two_product_f * b; const bh = d - (d - b); const bl = b - bh; const y = (al * bl) - ((x - (ah * bh)) - (al * bh) - (ah * bl)); //const err1 = x - (ah * bh); //const err2 = err1 - (al * bh); //const err3 = err2 - (ah * bl); //const y = (al * bl) - err3; return [y, x]; } ;// CONCATENATED MODULE: ./src/double-representation/is-bit-aligned.ts /** * Returns true if the given number is bit-aligned in the sense that its a * multiple of a given power of 2, say e, and such that the number, say a, * conforms to: a/2^e < 2^(l-e), where l is the max allowed bit length. * This essentially means the numbers act somewhat like fixed-point numbers * which can drastically speed up some geometric algorithms and also reduce * their complexity. * * Visually: * These numbers (a,b and c) are bit aligned with e === 3 and max * bitlength === 6: * a -> 00|101100|000 * b -> 00|000100|000 * c -> 00|110111|000 * These are not * a -> 01|101100|000 * b -> 00|000100|000 * These are not * a -> 00|101100|000 * b -> 00|000100|100 * These are not * a -> 00|101100|100 * b -> 00|000100|100 * @param as An array of numbers to check * @param maxBitLength The max allowed bitlength * @param gridSpacingExponent The grid spacing === 1^gridSpacingExponent */ function isBitAligned(a, maxBitLength, gridSpacingExponent) { if (a === 0) { return true; } const e = exponent(a); const maxSetBit = getHighestSetBit(a) - 52 + e; const minSetBit = getLowestSetBit(a) - 52 + e; const minBitBigEnough = minSetBit >= gridSpacingExponent; const maxBitSmallEnough = maxSetBit <= maxBitLength - 1 + gridSpacingExponent; return minBitBigEnough && maxBitSmallEnough; } ;// CONCATENATED MODULE: ./src/double-representation/lsb-exponent.ts /** * Returns the true exponent of the lsb that is set of the given number or * NaN if a === 0 or +-inf or NaN. * @param a An array of numbers to check */ function lsbExponent(a) { if (a === 0 || !Number.isFinite(a)) { return NaN; } const e = exponent(a); return getLowestSetBit(a) - 52 + e; } ;// CONCATENATED MODULE: ./src/double-expansion/e-calculate.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const e_calculate_mult = expansionProduct; const e_calculate_tp = two_product_twoProduct; const e_calculate_multByDouble = scaleExpansion; const e_calculate_ts = two_sum_twoSum; const e_calculate_addDouble = growExpansion; const e_calculate_add = fastExpansionSum; const e_calculate_compress = (/* unused pure expression or super */ null && (eCompress)); /** * Return the result of summing an array of terms, each term being an array of * floating point expansions to be multiplied together. * * * The result is exact in the form of a non-overlapping floating point * expansion. * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param terms An array of terms to be summed; A term consists of an * array of floating point expansions to be multiplied together. */ // The terms parameter were chosen to always be expansions in order to keep the // function monomorhic, but whether it's really worth it I am not sure. function eCalculate(terms) { let total = [0]; for (let i = 0; i < terms.length; i++) { const term = terms[i]; let product = term[0]; for (let j = 1; j < term.length; j++) { const multiplicant = term[j]; if (multiplicant.length == 1) { if (product.length === 1) { product = e_calculate_tp(product[0], multiplicant[0]); } else { product = e_calculate_multByDouble(product, multiplicant[0]); } } else if (product.length === 1) { product = e_calculate_multByDouble(multiplicant, product[0]); } else { product = e_calculate_mult(multiplicant, product); } } // add if (product.length === 1) { if (total.length === 1) { total = e_calculate_ts(total[0], product[0]); } else { total = e_calculate_addDouble(total, product[0]); } } else { if (total.length === 1) { total = e_calculate_addDouble(product, total[0]); } else { total = e_calculate_add(total, product); } } } //return compress(total); return total; } ;// CONCATENATED MODULE: ./src/double-expansion/e-product.ts // We *have* to do the below❗ The assignee is a getter❗ The assigned is a pure function❗ const e_product_mult = expansionProduct; const e_product_tp = two_product_twoProduct; const e_product_multByDouble = scaleExpansion; const e_product_compress = e_compress_eCompress; /** * Return the result of multiplying together an array of floating point * expansions. * * * The result is exact in the form of a non-overlapping floating point * expansion. * * * see [Shewchuk](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf) * * @param terms an array of multiplicands */ function eProduct(term) { let product = term[0]; for (let j = 1; j < term.length; j++) { const multiplicant = term[j]; if (multiplicant.length == 1) { if (product.length === 1) { product = e_product_tp(product[0], multiplicant[0]); } else { product = e_product_multByDouble(product, multiplicant[0]); } } else if (product.length ==