UNPKG

fp16

Version:

Half-precision 16-bit floating point numbers

56 lines (55 loc) 2.11 kB
import { float16Emin, float16Emax, float32Emax, float32Precision, float32View, } from "./utils.js"; export function setFloat16(view, offset, value, littleEndian) { if (isNaN(value)) { return view.setUint16(offset, 0x7e00, littleEndian); } else if (value === Infinity) { return view.setUint16(offset, 0x7c00, littleEndian); } else if (value === -Infinity) { return view.setUint16(offset, 0xfc00, littleEndian); } else if (Object.is(value, 0)) { return view.setUint16(offset, 0, littleEndian); } else if (Object.is(value, -0)) { return view.setUint16(offset, 0x8000, littleEndian); } float32View.setFloat32(0, value, littleEndian); const i32 = float32View.getInt32(0, littleEndian); const sign = i32 & (1 << 31); const exponent = i32 & 0x7f800000; const mantissa = i32 & 0x007fffff; const exponentValue = (exponent >>> float32Precision) - float32Emax; const e = exponentValue + float16Emax; if (float16Emax < exponentValue) { // overflow to +/- Infinity return view.setUint16(offset, (sign >>> 16) | 0x7c00, littleEndian); } if (exponentValue < float16Emin) { if (14 - e > 24) { // cannot be rounded to a subnormal number; underflow to +/- 0 return view.setUint16(0, sign >>> 16, littleEndian); } else { // can be rounded to a subnormal number const c = mantissa | 0x00800000; const m = c >>> (14 - e); const r = 1 << (13 - e); if (c & r && c & (3 * r - 1)) { return view.setUint16(offset, (sign >>> 16) | (m + 1), littleEndian); } else { return view.setUint16(offset, (sign >>> 16) | m, littleEndian); } } } const r = 0x00001000; let i16 = (sign >>> 16) | (e << 10) | (mantissa >> 13); if (mantissa & r && mantissa & (3 * r - 1)) { return view.setUint16(offset, i16 + 1, littleEndian); } else { return view.setUint16(offset, i16, littleEndian); } }