UNPKG

ieee-float

Version:

pure javascript IEEE 754 32-bit and 64-bit floating point binary conversion

415 lines (360 loc) 17.7 kB
/** * pure javascript functions to read and write 32-bit and 64-bit IEEE 754 floating-point * * Copyright (C) 2017 Andras Radics * Licensed under the Apache License, Version 2.0 */ 'use strict'; var isBigeCpu = false; var readFloat32Array, writeFloat32Array, readFloat32ArrayRev, writeFloat32ArrayRev; var readFloat64Array, writeFloat64Array, readFloat64ArrayRev, writeFloat64ArrayRev; // test FloatArray existence with && to not throw off code coverage (typeof Float32Array === 'function') && (function(){ var _fp32 = new Float32Array(1); var _b32 = new Uint8Array(_fp32.buffer); _fp32[0] = -1; isBigeCpu = _b32[3] === 0; readFloat32Array = function readFloat32Array( buf, pos ) { pos = pos || 0; if (pos < 0 || pos + 4 > buf.length) return 0; _b32[0] = buf[pos++]; _b32[1] = buf[pos++]; _b32[2] = buf[pos++];_b32[3] = buf[pos]; //_b32[0] = buf[pos+0]; _b32[1] = buf[pos+1]; _b32[2] = buf[pos+2]; _b32[3] = buf[pos+3]; return _fp32[0]; } readFloat32ArrayRev = function readFloat32ArrayRev( buf, pos ) { pos = pos || 0; if (pos < 0 || pos + 4 > buf.length) return 0; _b32[3] = buf[pos++]; _b32[2] = buf[pos++]; _b32[1] = buf[pos++]; _b32[0] = buf[pos]; //_b32[3] = buf[pos+0]; _b32[2] = buf[pos+1]; _b32[1] = buf[pos+2]; _b32[0] = buf[pos+3]; return _fp32[0]; } writeFloat32Array = function writeFloat32Array( buf, v, pos ) { pos = pos || 0; _fp32[0] = v; buf[pos++] = _b32[0]; buf[pos++] = _b32[1]; buf[pos++] = _b32[2]; buf[pos] = _b32[3]; //buf[pos+0] = _b32[0]; buf[pos+1] = _b32[1]; buf[pos+2] = _b32[2]; buf[pos+3] = _b32[3]; } writeFloat32ArrayRev = function writeFloat32ArrayRev( buf, v, pos ) { pos = pos || 0; _fp32[0] = v; buf[pos++] = _b32[3]; buf[pos++] = _b32[2]; buf[pos++] = _b32[1]; buf[pos] = _b32[0]; //buf[pos+0] = _b32[3]; buf[pos+1] = _b32[2]; buf[pos+2] = _b32[1]; buf[pos+3] = _b32[0]; } })(); (typeof Float64Array === 'function') && (function(){ var _fp64 = new Float64Array(1); var _b64 = new Uint8Array(_fp64.buffer); readFloat64Array = function readFloat64Array( buf, pos ) { pos = pos || 0; if (pos < 0 || pos + 8 > buf.length) return 0; //_b64[0] = buf[pos++]; _b64[1] = buf[pos++]; _b64[2] = buf[pos++]; _b64[3] = buf[pos++]; //_b64[4] = buf[pos++]; _b64[5] = buf[pos++]; _b64[6] = buf[pos++]; _b64[7] = buf[pos]; _b64[0] = buf[pos+0]; _b64[1] = buf[pos+1]; _b64[2] = buf[pos+2]; _b64[3] = buf[pos+3]; _b64[4] = buf[pos+4]; _b64[5] = buf[pos+5]; _b64[6] = buf[pos+6]; _b64[7] = buf[pos+7]; return _fp64[0]; } readFloat64ArrayRev = function readFloat64ArrayRev( buf, pos ) { pos = pos || 0; if (pos < 0 || pos + 8 > buf.length) return 0; //_b64[7] = buf[pos++]; _b64[6] = buf[pos++]; _b64[5] = buf[pos++]; _b64[4] = buf[pos++]; //_b64[3] = buf[pos++]; _b64[2] = buf[pos++]; _b64[1] = buf[pos++]; _b64[0] = buf[pos]; _b64[7] = buf[pos+0]; _b64[6] = buf[pos+1]; _b64[5] = buf[pos+2]; _b64[4] = buf[pos+3]; _b64[3] = buf[pos+4]; _b64[2] = buf[pos+5]; _b64[1] = buf[pos+6]; _b64[0] = buf[pos+7]; return _fp64[0]; } writeFloat64Array = function writeFloat64Array( buf, v, pos ) { pos = pos || 0; _fp64[0] = v; buf[pos + 0] = _b64[0]; buf[pos + 1] = _b64[1]; buf[pos + 2] = _b64[2]; buf[pos + 3] = _b64[3]; buf[pos + 4] = _b64[4]; buf[pos + 5] = _b64[5]; buf[pos + 6] = _b64[6]; buf[pos + 7] = _b64[7]; } writeFloat64ArrayRev = function writeFloat64ArrayRev( buf, v, pos ) { pos = pos || 0; _fp64[0] = v; buf[pos + 0] = _b64[7]; buf[pos + 1] = _b64[6]; buf[pos + 2] = _b64[5]; buf[pos + 3] = _b64[4]; buf[pos + 4] = _b64[3]; buf[pos + 5] = _b64[2]; buf[pos + 6] = _b64[1]; buf[pos + 7] = _b64[0]; } })(); // arithmetic operations preserve NaN, but logical ops (, >>, etc) convert them to zero // Assemble the word to generate NaN if any reads are undefined (outside the bounds of the array). function readWord( buf, offs, dirn ) { var a = buf[offs++], b = buf[offs++], c = buf[offs++], d = buf[offs]; return (dirn === 'bige') ? (((((a * 256) + b) * 256) + c) * 256) + d : (((((d * 256) + c) * 256) + b) * 256) + a; } function writeWord( buf, v, offs, dirn ) { var a = (v >>> 24) & 0xff, b = (v >> 16) & 0xff, c = (v >> 8) & 0xff, d = (v) & 0xff; (dirn === 'bige') ? (buf[offs++] = a, buf[offs++] = b, buf[offs++] = c, buf[offs] = d) : (buf[offs++] = d, buf[offs++] = c, buf[offs++] = b, buf[offs] = a) } // write the two-word value [hi,lo] where hi holds the 32 msb bits and lo the 32 lsb bits function writeDoubleWord( buf, hi, lo, offs, dirn ) { if (dirn === 'bige') { writeWord(buf, hi, offs, dirn); writeWord(buf, lo, offs + 4, dirn); } else { writeWord(buf, lo, offs, dirn); writeWord(buf, hi, offs + 4, dirn); } } // given an exponent n, return 2**n // n is always an integer, faster to shift when possible // Note that nodejs Math.pow() is faster than a lookup table (may be caching) var _2eXp = new Array(); for (var i=0; i<1200; i++) _2eXp[i] = Math.pow(2, i); var _2eXn = new Array(); for (var i=0; i<1200; i++) _2eXn[i] = Math.pow(2, -i); function pow2( exp ) { return (exp >= 0) ? _2eXp[exp] : _2eXn[-exp]; //return (exp >= 0) ? (exp < 31 ? (1 << exp) : Math.pow(2, exp)) // : (exp > -31 ? (1 / (1 << -exp)) : Math.pow(2, exp)); } // getFloat() from qbson, https://github.com/andrasq/node-qbson: /* * extract the 64-bit little-endian ieee 754 floating-point value * see http://en.wikipedia.org/wiki/Double-precision_floating-point_format * 1 bit sign + 11 bits exponent + (1 implicit mantissa 1 bit) + 52 mantissa bits */ var _rshift32 = (1 / 0x100000000); // >> 32 for floats var _rshift20 = (1 / 0x100000); // >> 20 for floats var _lshift32 = (1 * 0x100000000); // << 32 var _rshift52 = (1 * _rshift32 * _rshift20); // >> 52 var _rshift1023 = pow2(-1023); // 2^-1023 function readDouble( buf, offset, dirn ) { var w0 = readWord(buf, offset, dirn); var w1 = readWord(buf, offset + 4, dirn); var highWord, lowWord; (dirn === 'bige') ? (highWord = w0, lowWord = w1) : (highWord = w1, lowWord = w0); var mantissa = (highWord & 0x000FFFFF) * _lshift32 + lowWord; var exponent = (highWord & 0x7FF00000) >>> 20; var sign = (highWord >> 31) || 1; // -1, 1, or 1 if NaN var value; if (exponent === 0x000) { // zero if !mantissa, else subnormal (non-normalized reduced precision small value) // recover negative zero -0.0 as distinct from 0.0 // subnormals do not have an implied leading 1 bit and are positioned 1 bit to the left value = mantissa ? (mantissa * pow2(-52 + 1 -1023)) : 0.0; } else if (exponent < 0x7ff) { // normalized value with an implied leading 1 bit and 1023 biased exponent // test for NaN with (mantissa >= 0), and return 0 if NaN ie read from outside buffer bounds value = (mantissa >= 0) ? (1 + mantissa * _rshift52) * pow2(exponent - 1023) : 0.0; } else { // Infinity if zero mantissa (+/- per sign), NaN if nonzero mantissa value = mantissa ? NaN : Infinity; } return sign * value; } // // Note: node-v9 prefers +28% (sign * value), node v6 doesnt care, node v8 likes +16% (-value : value) // // float32: 1 sign + 8 exponent + 24 mantissa (23 stored, 1 implied) // see https://en.wikipedia.org/wiki/Single-precision_floating-point_format // // Exponent Mantissa == 0 Mantissa > 0 Value // 00 +0, -0 denormalized 2^( 1-127) * (0. + (mantissa / 2^23)) // 00.. FE normalized 2^(exp-127) * (1. + (mantissa / 2^23)) // FF +/-Infinity NaN - // var _rshift23 = Math.pow(2, -23); // >> 23 for floats var _rshift127 = Math.pow(2, -127); // 2^-127 function readFloat( buf, offset, dirn ) { var word = readWord(buf, offset, dirn); var mantissa = (word & 0x007FFFFF); var exponent = (word & 0x7F800000) >>> 23; var sign = (word >> 31) || 1; // -1, 1, or 1 if NaN var value; if (exponent === 0x000) { value = mantissa ? mantissa * _rshift23 * 2 * _rshift127 : 0.0; } else if (exponent < 0xff) { value = (1 + mantissa * _rshift23) * pow2(exponent - 127) // * _rshift127; } else { value = mantissa ? NaN : Infinity; } return sign * value; //return (word >>> 31) ? -value : value; } // given a positive value v, normalize it to between 1 and less than 2 with a binary exponent // The exponent is the number of bit places it was shifted, positive if v was >= 2. // The special values 0, -0, NaN, +Infinity and -Infinity are not handled here. // Looping is faster than (Math.log(v) / Math.LN2) in node-v6, v8, and v9. // This function can account for half the time taken to write a double. var _parts = { exp: 0, mant: 0 }; function normalize( v ) { var exp = 0; if (v >= 2) { exp = countDoublings(1, v); v *= pow2(-exp); // if doubled to exactly v/2, adjust up to v if (v >= 2) { v /= 2; exp += 1 } } else if (v < 1) { exp = countDoublings(v, 2); // avoid using pow2 exponents > 1023, they overflow to Infinity if (exp <= 1023) v *= pow2(exp); else { v *= pow2(exp - 100); v *= pow2(100); } exp = -exp; } // TODO: pass in num bits, and normalize straight to mantissa / denorm _parts.exp = exp; _parts.mant = v; return _parts; } // count how many doublings of a are needed for it be close to b. // Returns a shift count that grows (a) to at least (b/2) but less than (b). // Doubling 1 toward v ensures that (v >> n) >= 1 < 2, // and doubling from v toward 2 ensures that (v << n) >= 1 < 2. var _2e192 = Math.pow(2, 192); function countDoublings( a, b ) { var n = 0; while (a * _2e192 < b) { a *= _2e192; n += 192 } while (a * 0x10000000000000000 < b) { a *= 0x10000000000000000; n += 64 } while (a * 0x10000 < b) { a *= 0x10000; n += 16 } while (a * 0x40 < b) { a *= 0x40; n += 6 } while (a * 2 < b) { a *= 2; n += 1 } return n; } // round the fraction in v and scale up to scale = 2^n bits // https://blog.angularindepth.com/how-to-round-binary-fractions-625c8fa3a1af // Rounding can cause the scaled value to exceed 2^n. function roundMantissa( v, scale ) { v *= scale; // round to nearest, but round a 0.5 tie to even (0.5 to 0.0 and 1.5 to 2.0) // round all numbers with a fraction other than 1/2, and round up odd numbers with return ((v - Math.floor(v) !== 0.5) || (v & 1)) ? v + 0.5 : v; } // float32: 1 sign + 8 exponent + (1 implied mantissa 1 bit) + 23 stored mantissa bits // NaN types: quiet Nan = x.ff.8xxx, signaling NaN = x.ff.0xx1 (msb zero, at least one other bit set) // JavaScript built-in NaN is the non-signaling 7fc00000, but arithmetic can yield a negative NaN ffc00000. function writeFloat( buf, v, offset, dirn ) { var norm, word, sign = 0; if (v < 0) { sign = 0x80000000; v = -v; } if (! (v && v < Infinity)) { if (v === 0) { // -0, +0 word = (1/v < 0) ? 0x80000000 : 0x00000000; } else if (v === Infinity) { // -Infinity, +Infinity word = sign | 0x7F800000; } else { // NaN - positive, non-signaling word = 0x7FC00000; } writeWord(buf, word, offset, dirn); } else { norm = normalize(v); // separate exponent and mantissa norm.exp += 127; // bias exponent if (norm.exp <= 0) { // denormalized number if (norm.exp <= -25) { // too small, underflow to zero. -24 might round up though. norm.mant = 0; norm.exp = 0; } else { // denormalize norm.mant = roundMantissa(norm.mant, pow2(22 + norm.exp)); norm.exp = 0; // rounding can carry out and re-normalize the number if (norm.mant >= 0x800000) { norm.mant -= 0x800000; norm.exp += 1 } } } else { norm.mant = roundMantissa(norm.mant - 1, 0x800000); // if rounding overflowed into the hidden 1s place, hide it and adjust the exponent if (norm.mant >= 0x800000) { norm.mant -= 0x800000; norm.exp += 1 } if (norm.exp > 254) { // overflow to Infinity norm.mant = 0; norm.exp = 255; } } word = sign | (norm.exp << 23) | norm.mant; writeWord(buf, word, offset, dirn); } } // double64: 1 bit sign + 11 bits exponent + (1 implied mantissa 1 bit) + 52 stored mantissa bits // Writing doubles is simpler than floats, because the internal javascript 64-bit floats // are identical to the stored representation, and thus will not overflow or underflow. var doubleArray = [0, 0, 0, 0, 0, 0, 0, 0]; var doubleBuf = new Buffer(8); var _2e52 = Math.pow(2, 52); function writeDouble( buf, v, offset, dirn ) { var norm, highWord, lowWord, sign = 0; if (v < 0) { sign = 0x80000000; v = -v; } if (! (v && v < Infinity)) { if (v === 0) { // -0, +0 highWord = (1/v < 0) ? 0x80000000 : 0; lowWord = 0; } else if (v === Infinity) { // -Infinity, +Infinity highWord = (sign + 0x7FF00000); lowWord = 0; } else { // NaN - positive, non-signaling highWord = 0x7FF80000; lowWord = 0; } writeDoubleWord(buf, highWord, lowWord, offset, dirn); } else { norm = normalize(v); // separate exponent and mantissa norm.exp += 1023; // bias exponent if (norm.exp <= 0) { // denormalized // JavaScript numbers can not hold values small enough to underflow // and no need to round, all bits will be written norm.mant *= pow2(51 + norm.exp); norm.exp = 0; } else { // no need to round, all bits will be written norm.mant = (norm.mant - 1) * _2e52; } highWord = sign | (norm.exp << 20) | (norm.mant / 0x100000000); lowWord = norm.mant >>> 0; writeDoubleWord(buf, highWord, lowWord, offset, dirn); } } ;(function install() { var exports = typeof module === 'object' && module.exports || this; exports.readWord = readWord; exports.writeWord = writeWord; exports.writeDoubleWord = writeDoubleWord; exports.readFloat = readFloat; exports.writeFloat = writeFloat; exports.readDouble = readDouble; exports.writeDouble = writeDouble; // expose the implementation to the tests exports._useFloatArray = function( yesno ) { exports._usingFloatArray = yesno; if (yesno) { // software conversion is faster for float32 than Float32Array // Only read via Float32Array if yesno == 'full'. if (yesno == 'full') exports.readFloatLE = isBigeCpu ? readFloat32ArrayRev : readFloat32Array; exports.writeFloatLE = isBigeCpu ? writeFloat32ArrayRev : writeFloat32Array; if (yesno == 'full') exports.readFloatBE = isBigeCpu ? readFloat32Array : readFloat32ArrayRev; exports.writeFloatBE = isBigeCpu ? writeFloat32Array : writeFloat32ArrayRev; exports.readDoubleLE = isBigeCpu ? readFloat64ArrayRev : readFloat64Array; exports.writeDoubleLE = isBigeCpu ? writeFloat64ArrayRev : writeFloat64Array; exports.readDoubleBE = isBigeCpu ? readFloat64Array : readFloat64ArrayRev; exports.writeDoubleBE = isBigeCpu ? writeFloat64Array : writeFloat64ArrayRev; } else { exports._usingFloatArray = ''; exports.readFloatLE = function readFloatLE( buf, offset ) { return exports.readFloat(buf, offset || 0, 'le'); } exports.writeFloatLE = function writeFloatLE( buf, v, offset ) { exports.writeFloat(buf, v, offset || 0, 'le'); }; exports.readFloatBE = function readFloatBE( buf, offset ) { return exports.readFloat(buf, offset || 0, 'bige'); } exports.writeFloatBE = function writeFloatBE( buf, v, offset ) { exports.writeFloat(buf, v, offset || 0, 'bige'); } exports.readDoubleLE = function readDoubleLE( buf, offset ) { return exports.readDouble(buf, offset || 0, 'le'); } exports.writeDoubleLE = function writeDoubleLE( buf, v, offset ) { exports.writeDouble(buf, v, offset || 0, 'le'); } exports.readDoubleBE = function readDoubleBE( buf, offset ) { return exports.readDouble(buf, offset || 0, 'bige'); } exports.writeDoubleBE = function writeDoubleLE( buf, v, offset ) { exports.writeDouble(buf, v, offset || 0, 'bige'); } } } // expose the cpu endianism to the tests exports._getBigeCpu = function() { return isBigeCpu }; exports._setBigeCpu = function(yesno) { isBigeCpu = yesno }; // by default export the software conversion functions, then // if available, convert by casting a FloatArray to a byte array exports._useFloatArray(false); exports._useFloatArray(readFloat32Array && readFloat64Array && 'fastest'); // accelerate access install.prototype = exports; }).call(this);