UNPKG

@technobuddha/library

Version:
65 lines (64 loc) 2.58 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.decodeUTF8 = void 0; var constants_1 = require("../constants"); /** * Decode a UTF8 encoded string into unicode * * @param input the utf encoded string * @returns the decoded strings (which is encoded as UTF-16 by javascript) */ function decodeUTF8(input) { var result = constants_1.empty; for (var i = 0; i < input.length; ++i) { var c0 = input.charCodeAt(i); var c1 = void 0; var c2 = void 0; var c3 = void 0; if (c0 > 0x007F) { if (c0 > 0x00BF && c0 < 0x00E0) { c1 = input.charCodeAt(++i); if (i >= input.length) throw new Error('Incomplete 2-byte sequence'); if ((c1 & 0xC0) !== 0x80) throw new Error('Incorrect 2 byte sequence'); c0 = (c0 & 0x001F) << 6 | (c1 & 0x003F); } else if (c0 >= 0x00E0 && c0 < 0x00F0) { c1 = input.charCodeAt(++i); c2 = input.charCodeAt(++i); if (i >= input.length) throw new Error('Incomplete 3-byte sequence'); if ((c1 & 0xC0) !== 0x80 || (c2 & 0xC0) !== 0x80) throw new Error('Incorrect 3 byte sequence'); c0 = (c0 & 0x000F) << 12 | (c1 & 0x003F) << 6 | (c2 & 0x003F); } else if (c0 >= 0x00F0 && c0 < 0x00F8) { c1 = input.charCodeAt(++i); c2 = input.charCodeAt(++i); c3 = input.charCodeAt(++i); if (i >= input.length) throw new Error('incomplete 4 byte sequence'); if ((c1 & 0xC0) !== 0x80 || (c2 & 0xC0) !== 0x80 || (c3 & 0xc0) !== 0x80) throw new Error('Incorrect 3 byte sequence'); c0 = (c0 & 0x000f) << 18 | (c1 & 0x003F) << 12 | (c2 & 0x003F) << 6 | (c3 & 0x003F); } else { throw new Error("unknown multibyte start 0x" + c0.toString(16) + " @" + i); } } if (c0 <= 0xFFFF) { result += String.fromCharCode(c0); } else if (c0 <= 0x0010FFFF) { c0 -= 0x00010000; result += String.fromCharCode(c0 >> 10 | 0xD800) + String.fromCharCode(c0 & 0x03FF | 0xDC00); } else { throw new Error("code point 0x" + c0.toString(16) + " exceeds UTF-16 reach"); } } return result; } exports.decodeUTF8 = decodeUTF8; exports.default = decodeUTF8;