ieee-float
Version:
pure javascript IEEE 754 32-bit and 64-bit floating point binary conversion
415 lines (360 loc) • 17.7 kB
JavaScript
/**
* pure javascript functions to read and write 32-bit and 64-bit IEEE 754 floating-point
*
* Copyright (C) 2017 Andras Radics
* Licensed under the Apache License, Version 2.0
*/
;
var isBigeCpu = false;
var readFloat32Array, writeFloat32Array, readFloat32ArrayRev, writeFloat32ArrayRev;
var readFloat64Array, writeFloat64Array, readFloat64ArrayRev, writeFloat64ArrayRev;
// test FloatArray existence with && to not throw off code coverage
(typeof Float32Array === 'function') && (function(){
var _fp32 = new Float32Array(1);
var _b32 = new Uint8Array(_fp32.buffer);
_fp32[0] = -1;
isBigeCpu = _b32[3] === 0;
readFloat32Array = function readFloat32Array( buf, pos ) {
pos = pos || 0;
if (pos < 0 || pos + 4 > buf.length) return 0;
_b32[0] = buf[pos++]; _b32[1] = buf[pos++]; _b32[2] = buf[pos++];_b32[3] = buf[pos];
//_b32[0] = buf[pos+0]; _b32[1] = buf[pos+1]; _b32[2] = buf[pos+2]; _b32[3] = buf[pos+3];
return _fp32[0];
}
readFloat32ArrayRev = function readFloat32ArrayRev( buf, pos ) {
pos = pos || 0;
if (pos < 0 || pos + 4 > buf.length) return 0;
_b32[3] = buf[pos++]; _b32[2] = buf[pos++]; _b32[1] = buf[pos++]; _b32[0] = buf[pos];
//_b32[3] = buf[pos+0]; _b32[2] = buf[pos+1]; _b32[1] = buf[pos+2]; _b32[0] = buf[pos+3];
return _fp32[0];
}
writeFloat32Array = function writeFloat32Array( buf, v, pos ) {
pos = pos || 0;
_fp32[0] = v;
buf[pos++] = _b32[0]; buf[pos++] = _b32[1]; buf[pos++] = _b32[2]; buf[pos] = _b32[3];
//buf[pos+0] = _b32[0]; buf[pos+1] = _b32[1]; buf[pos+2] = _b32[2]; buf[pos+3] = _b32[3];
}
writeFloat32ArrayRev = function writeFloat32ArrayRev( buf, v, pos ) {
pos = pos || 0;
_fp32[0] = v;
buf[pos++] = _b32[3]; buf[pos++] = _b32[2]; buf[pos++] = _b32[1]; buf[pos] = _b32[0];
//buf[pos+0] = _b32[3]; buf[pos+1] = _b32[2]; buf[pos+2] = _b32[1]; buf[pos+3] = _b32[0];
}
})();
(typeof Float64Array === 'function') && (function(){
var _fp64 = new Float64Array(1);
var _b64 = new Uint8Array(_fp64.buffer);
readFloat64Array = function readFloat64Array( buf, pos ) {
pos = pos || 0;
if (pos < 0 || pos + 8 > buf.length) return 0;
//_b64[0] = buf[pos++]; _b64[1] = buf[pos++]; _b64[2] = buf[pos++]; _b64[3] = buf[pos++];
//_b64[4] = buf[pos++]; _b64[5] = buf[pos++]; _b64[6] = buf[pos++]; _b64[7] = buf[pos];
_b64[0] = buf[pos+0]; _b64[1] = buf[pos+1]; _b64[2] = buf[pos+2]; _b64[3] = buf[pos+3];
_b64[4] = buf[pos+4]; _b64[5] = buf[pos+5]; _b64[6] = buf[pos+6]; _b64[7] = buf[pos+7];
return _fp64[0];
}
readFloat64ArrayRev = function readFloat64ArrayRev( buf, pos ) {
pos = pos || 0;
if (pos < 0 || pos + 8 > buf.length) return 0;
//_b64[7] = buf[pos++]; _b64[6] = buf[pos++]; _b64[5] = buf[pos++]; _b64[4] = buf[pos++];
//_b64[3] = buf[pos++]; _b64[2] = buf[pos++]; _b64[1] = buf[pos++]; _b64[0] = buf[pos];
_b64[7] = buf[pos+0]; _b64[6] = buf[pos+1]; _b64[5] = buf[pos+2]; _b64[4] = buf[pos+3];
_b64[3] = buf[pos+4]; _b64[2] = buf[pos+5]; _b64[1] = buf[pos+6]; _b64[0] = buf[pos+7];
return _fp64[0];
}
writeFloat64Array = function writeFloat64Array( buf, v, pos ) {
pos = pos || 0;
_fp64[0] = v;
buf[pos + 0] = _b64[0]; buf[pos + 1] = _b64[1]; buf[pos + 2] = _b64[2]; buf[pos + 3] = _b64[3];
buf[pos + 4] = _b64[4]; buf[pos + 5] = _b64[5]; buf[pos + 6] = _b64[6]; buf[pos + 7] = _b64[7];
}
writeFloat64ArrayRev = function writeFloat64ArrayRev( buf, v, pos ) {
pos = pos || 0;
_fp64[0] = v;
buf[pos + 0] = _b64[7]; buf[pos + 1] = _b64[6]; buf[pos + 2] = _b64[5]; buf[pos + 3] = _b64[4];
buf[pos + 4] = _b64[3]; buf[pos + 5] = _b64[2]; buf[pos + 6] = _b64[1]; buf[pos + 7] = _b64[0];
}
})();
// arithmetic operations preserve NaN, but logical ops (, >>, etc) convert them to zero
// Assemble the word to generate NaN if any reads are undefined (outside the bounds of the array).
function readWord( buf, offs, dirn ) {
var a = buf[offs++], b = buf[offs++], c = buf[offs++], d = buf[offs];
return (dirn === 'bige')
? (((((a * 256) + b) * 256) + c) * 256) + d
: (((((d * 256) + c) * 256) + b) * 256) + a;
}
function writeWord( buf, v, offs, dirn ) {
var a = (v >>> 24) & 0xff, b = (v >> 16) & 0xff, c = (v >> 8) & 0xff, d = (v) & 0xff;
(dirn === 'bige')
? (buf[offs++] = a, buf[offs++] = b, buf[offs++] = c, buf[offs] = d)
: (buf[offs++] = d, buf[offs++] = c, buf[offs++] = b, buf[offs] = a)
}
// write the two-word value [hi,lo] where hi holds the 32 msb bits and lo the 32 lsb bits
function writeDoubleWord( buf, hi, lo, offs, dirn ) {
if (dirn === 'bige') {
writeWord(buf, hi, offs, dirn);
writeWord(buf, lo, offs + 4, dirn);
}
else {
writeWord(buf, lo, offs, dirn);
writeWord(buf, hi, offs + 4, dirn);
}
}
// given an exponent n, return 2**n
// n is always an integer, faster to shift when possible
// Note that nodejs Math.pow() is faster than a lookup table (may be caching)
var _2eXp = new Array(); for (var i=0; i<1200; i++) _2eXp[i] = Math.pow(2, i);
var _2eXn = new Array(); for (var i=0; i<1200; i++) _2eXn[i] = Math.pow(2, -i);
function pow2( exp ) {
return (exp >= 0) ? _2eXp[exp] : _2eXn[-exp];
//return (exp >= 0) ? (exp < 31 ? (1 << exp) : Math.pow(2, exp))
// : (exp > -31 ? (1 / (1 << -exp)) : Math.pow(2, exp));
}
// getFloat() from qbson, https://github.com/andrasq/node-qbson:
/*
* extract the 64-bit little-endian ieee 754 floating-point value
* see http://en.wikipedia.org/wiki/Double-precision_floating-point_format
* 1 bit sign + 11 bits exponent + (1 implicit mantissa 1 bit) + 52 mantissa bits
*/
var _rshift32 = (1 / 0x100000000); // >> 32 for floats
var _rshift20 = (1 / 0x100000); // >> 20 for floats
var _lshift32 = (1 * 0x100000000); // << 32
var _rshift52 = (1 * _rshift32 * _rshift20); // >> 52
var _rshift1023 = pow2(-1023); // 2^-1023
function readDouble( buf, offset, dirn ) {
var w0 = readWord(buf, offset, dirn);
var w1 = readWord(buf, offset + 4, dirn);
var highWord, lowWord;
(dirn === 'bige') ? (highWord = w0, lowWord = w1) : (highWord = w1, lowWord = w0);
var mantissa = (highWord & 0x000FFFFF) * _lshift32 + lowWord;
var exponent = (highWord & 0x7FF00000) >>> 20;
var sign = (highWord >> 31) || 1; // -1, 1, or 1 if NaN
var value;
if (exponent === 0x000) {
// zero if !mantissa, else subnormal (non-normalized reduced precision small value)
// recover negative zero -0.0 as distinct from 0.0
// subnormals do not have an implied leading 1 bit and are positioned 1 bit to the left
value = mantissa ? (mantissa * pow2(-52 + 1 -1023)) : 0.0;
}
else if (exponent < 0x7ff) {
// normalized value with an implied leading 1 bit and 1023 biased exponent
// test for NaN with (mantissa >= 0), and return 0 if NaN ie read from outside buffer bounds
value = (mantissa >= 0) ? (1 + mantissa * _rshift52) * pow2(exponent - 1023) : 0.0;
}
else {
// Infinity if zero mantissa (+/- per sign), NaN if nonzero mantissa
value = mantissa ? NaN : Infinity;
}
return sign * value;
}
//
// Note: node-v9 prefers +28% (sign * value), node v6 doesnt care, node v8 likes +16% (-value : value)
//
// float32: 1 sign + 8 exponent + 24 mantissa (23 stored, 1 implied)
// see https://en.wikipedia.org/wiki/Single-precision_floating-point_format
//
// Exponent Mantissa == 0 Mantissa > 0 Value
// 00 +0, -0 denormalized 2^( 1-127) * (0. + (mantissa / 2^23))
// 00.. FE normalized 2^(exp-127) * (1. + (mantissa / 2^23))
// FF +/-Infinity NaN -
//
var _rshift23 = Math.pow(2, -23); // >> 23 for floats
var _rshift127 = Math.pow(2, -127); // 2^-127
function readFloat( buf, offset, dirn ) {
var word = readWord(buf, offset, dirn);
var mantissa = (word & 0x007FFFFF);
var exponent = (word & 0x7F800000) >>> 23;
var sign = (word >> 31) || 1; // -1, 1, or 1 if NaN
var value;
if (exponent === 0x000) {
value = mantissa ? mantissa * _rshift23 * 2 * _rshift127 : 0.0;
}
else if (exponent < 0xff) {
value = (1 + mantissa * _rshift23) * pow2(exponent - 127) // * _rshift127;
}
else {
value = mantissa ? NaN : Infinity;
}
return sign * value;
//return (word >>> 31) ? -value : value;
}
// given a positive value v, normalize it to between 1 and less than 2 with a binary exponent
// The exponent is the number of bit places it was shifted, positive if v was >= 2.
// The special values 0, -0, NaN, +Infinity and -Infinity are not handled here.
// Looping is faster than (Math.log(v) / Math.LN2) in node-v6, v8, and v9.
// This function can account for half the time taken to write a double.
var _parts = { exp: 0, mant: 0 };
function normalize( v ) {
var exp = 0;
if (v >= 2) {
exp = countDoublings(1, v);
v *= pow2(-exp);
// if doubled to exactly v/2, adjust up to v
if (v >= 2) { v /= 2; exp += 1 }
}
else if (v < 1) {
exp = countDoublings(v, 2);
// avoid using pow2 exponents > 1023, they overflow to Infinity
if (exp <= 1023) v *= pow2(exp);
else { v *= pow2(exp - 100); v *= pow2(100); }
exp = -exp;
}
// TODO: pass in num bits, and normalize straight to mantissa / denorm
_parts.exp = exp;
_parts.mant = v;
return _parts;
}
// count how many doublings of a are needed for it be close to b.
// Returns a shift count that grows (a) to at least (b/2) but less than (b).
// Doubling 1 toward v ensures that (v >> n) >= 1 < 2,
// and doubling from v toward 2 ensures that (v << n) >= 1 < 2.
var _2e192 = Math.pow(2, 192);
function countDoublings( a, b ) {
var n = 0;
while (a * _2e192 < b) { a *= _2e192; n += 192 }
while (a * 0x10000000000000000 < b) { a *= 0x10000000000000000; n += 64 }
while (a * 0x10000 < b) { a *= 0x10000; n += 16 }
while (a * 0x40 < b) { a *= 0x40; n += 6 }
while (a * 2 < b) { a *= 2; n += 1 }
return n;
}
// round the fraction in v and scale up to scale = 2^n bits
// https://blog.angularindepth.com/how-to-round-binary-fractions-625c8fa3a1af
// Rounding can cause the scaled value to exceed 2^n.
function roundMantissa( v, scale ) {
v *= scale;
// round to nearest, but round a 0.5 tie to even (0.5 to 0.0 and 1.5 to 2.0)
// round all numbers with a fraction other than 1/2, and round up odd numbers with
return ((v - Math.floor(v) !== 0.5) || (v & 1)) ? v + 0.5 : v;
}
// float32: 1 sign + 8 exponent + (1 implied mantissa 1 bit) + 23 stored mantissa bits
// NaN types: quiet Nan = x.ff.8xxx, signaling NaN = x.ff.0xx1 (msb zero, at least one other bit set)
// JavaScript built-in NaN is the non-signaling 7fc00000, but arithmetic can yield a negative NaN ffc00000.
function writeFloat( buf, v, offset, dirn ) {
var norm, word, sign = 0;
if (v < 0) { sign = 0x80000000; v = -v; }
if (! (v && v < Infinity)) {
if (v === 0) { // -0, +0
word = (1/v < 0) ? 0x80000000 : 0x00000000;
}
else if (v === Infinity) { // -Infinity, +Infinity
word = sign | 0x7F800000;
}
else { // NaN - positive, non-signaling
word = 0x7FC00000;
}
writeWord(buf, word, offset, dirn);
}
else {
norm = normalize(v); // separate exponent and mantissa
norm.exp += 127; // bias exponent
if (norm.exp <= 0) { // denormalized number
if (norm.exp <= -25) { // too small, underflow to zero. -24 might round up though.
norm.mant = 0;
norm.exp = 0;
} else { // denormalize
norm.mant = roundMantissa(norm.mant, pow2(22 + norm.exp));
norm.exp = 0; // rounding can carry out and re-normalize the number
if (norm.mant >= 0x800000) { norm.mant -= 0x800000; norm.exp += 1 }
}
} else {
norm.mant = roundMantissa(norm.mant - 1, 0x800000);
// if rounding overflowed into the hidden 1s place, hide it and adjust the exponent
if (norm.mant >= 0x800000) { norm.mant -= 0x800000; norm.exp += 1 }
if (norm.exp > 254) { // overflow to Infinity
norm.mant = 0;
norm.exp = 255;
}
}
word = sign | (norm.exp << 23) | norm.mant;
writeWord(buf, word, offset, dirn);
}
}
// double64: 1 bit sign + 11 bits exponent + (1 implied mantissa 1 bit) + 52 stored mantissa bits
// Writing doubles is simpler than floats, because the internal javascript 64-bit floats
// are identical to the stored representation, and thus will not overflow or underflow.
var doubleArray = [0, 0, 0, 0, 0, 0, 0, 0];
var doubleBuf = new Buffer(8);
var _2e52 = Math.pow(2, 52);
function writeDouble( buf, v, offset, dirn ) {
var norm, highWord, lowWord, sign = 0;
if (v < 0) { sign = 0x80000000; v = -v; }
if (! (v && v < Infinity)) {
if (v === 0) { // -0, +0
highWord = (1/v < 0) ? 0x80000000 : 0;
lowWord = 0;
}
else if (v === Infinity) { // -Infinity, +Infinity
highWord = (sign + 0x7FF00000);
lowWord = 0;
}
else { // NaN - positive, non-signaling
highWord = 0x7FF80000;
lowWord = 0;
}
writeDoubleWord(buf, highWord, lowWord, offset, dirn);
}
else {
norm = normalize(v); // separate exponent and mantissa
norm.exp += 1023; // bias exponent
if (norm.exp <= 0) { // denormalized
// JavaScript numbers can not hold values small enough to underflow
// and no need to round, all bits will be written
norm.mant *= pow2(51 + norm.exp);
norm.exp = 0;
}
else {
// no need to round, all bits will be written
norm.mant = (norm.mant - 1) * _2e52;
}
highWord = sign | (norm.exp << 20) | (norm.mant / 0x100000000);
lowWord = norm.mant >>> 0;
writeDoubleWord(buf, highWord, lowWord, offset, dirn);
}
}
;(function install() {
var exports = typeof module === 'object' && module.exports || this;
exports.readWord = readWord;
exports.writeWord = writeWord;
exports.writeDoubleWord = writeDoubleWord;
exports.readFloat = readFloat;
exports.writeFloat = writeFloat;
exports.readDouble = readDouble;
exports.writeDouble = writeDouble;
// expose the implementation to the tests
exports._useFloatArray = function( yesno ) {
exports._usingFloatArray = yesno;
if (yesno) {
// software conversion is faster for float32 than Float32Array
// Only read via Float32Array if yesno == 'full'.
if (yesno == 'full') exports.readFloatLE = isBigeCpu ? readFloat32ArrayRev : readFloat32Array;
exports.writeFloatLE = isBigeCpu ? writeFloat32ArrayRev : writeFloat32Array;
if (yesno == 'full') exports.readFloatBE = isBigeCpu ? readFloat32Array : readFloat32ArrayRev;
exports.writeFloatBE = isBigeCpu ? writeFloat32Array : writeFloat32ArrayRev;
exports.readDoubleLE = isBigeCpu ? readFloat64ArrayRev : readFloat64Array;
exports.writeDoubleLE = isBigeCpu ? writeFloat64ArrayRev : writeFloat64Array;
exports.readDoubleBE = isBigeCpu ? readFloat64Array : readFloat64ArrayRev;
exports.writeDoubleBE = isBigeCpu ? writeFloat64Array : writeFloat64ArrayRev;
}
else {
exports._usingFloatArray = '';
exports.readFloatLE = function readFloatLE( buf, offset ) { return exports.readFloat(buf, offset || 0, 'le'); }
exports.writeFloatLE = function writeFloatLE( buf, v, offset ) { exports.writeFloat(buf, v, offset || 0, 'le'); };
exports.readFloatBE = function readFloatBE( buf, offset ) { return exports.readFloat(buf, offset || 0, 'bige'); }
exports.writeFloatBE = function writeFloatBE( buf, v, offset ) { exports.writeFloat(buf, v, offset || 0, 'bige'); }
exports.readDoubleLE = function readDoubleLE( buf, offset ) { return exports.readDouble(buf, offset || 0, 'le'); }
exports.writeDoubleLE = function writeDoubleLE( buf, v, offset ) { exports.writeDouble(buf, v, offset || 0, 'le'); }
exports.readDoubleBE = function readDoubleBE( buf, offset ) { return exports.readDouble(buf, offset || 0, 'bige'); }
exports.writeDoubleBE = function writeDoubleLE( buf, v, offset ) { exports.writeDouble(buf, v, offset || 0, 'bige'); }
}
}
// expose the cpu endianism to the tests
exports._getBigeCpu = function() { return isBigeCpu };
exports._setBigeCpu = function(yesno) { isBigeCpu = yesno };
// by default export the software conversion functions, then
// if available, convert by casting a FloatArray to a byte array
exports._useFloatArray(false);
exports._useFloatArray(readFloat32Array && readFloat64Array && 'fastest');
// accelerate access
install.prototype = exports;
}).call(this);