UNPKG

@technobuddha/library

Version:
59 lines (49 loc) 2.32 kB
import { empty } from '../constants'; /** * Decode a UTF8 encoded string into unicode * * @param input the utf encoded string * @returns the decoded strings (which is encoded as UTF-16 by javascript) */ export function decodeUTF8(input: string): string { let result = empty; for(let i = 0; i < input.length; ++i) { let c0: number = input.charCodeAt(i); let c1: number; let c2: number; let c3: number; if(c0 > 0x007F) { if(c0 > 0x00BF && c0 < 0x00E0) { c1 = input.charCodeAt(++i); if(i >= input.length) throw new Error('Incomplete 2-byte sequence'); if((c1 & 0xC0) !== 0x80) throw new Error('Incorrect 2 byte sequence'); c0 = (c0 & 0x001F) << 6 | (c1 & 0x003F); } else if(c0 >= 0x00E0 && c0 < 0x00F0) { c1 = input.charCodeAt(++i); c2 = input.charCodeAt(++i); if(i >= input.length) throw new Error('Incomplete 3-byte sequence'); if((c1 & 0xC0) !== 0x80 || (c2 & 0xC0) !== 0x80) throw new Error('Incorrect 3 byte sequence'); c0 = (c0 & 0x000F) << 12 | (c1 & 0x003F) << 6 | (c2 & 0x003F); } else if(c0 >= 0x00F0 && c0 < 0x00F8) { c1 = input.charCodeAt(++i); c2 = input.charCodeAt(++i); c3 = input.charCodeAt(++i); if(i >= input.length) throw new Error('incomplete 4 byte sequence'); if((c1 & 0xC0) !== 0x80 || (c2 & 0xC0) !== 0x80 || (c3 & 0xc0) !== 0x80) throw new Error('Incorrect 3 byte sequence'); c0 = (c0 & 0x000f) << 18 | (c1 & 0x003F) << 12 | (c2 & 0x003F) << 6 | (c3 & 0x003F); } else { throw new Error(`unknown multibyte start 0x${c0.toString(16)} @${i}`); } } if(c0 <= 0xFFFF) { result += String.fromCharCode(c0); } else if(c0 <= 0x0010FFFF) { c0 -= 0x00010000; result += String.fromCharCode(c0 >> 10 | 0xD800) + String.fromCharCode(c0 & 0x03FF | 0xDC00); } else { throw new Error(`code point 0x${c0.toString(16)} exceeds UTF-16 reach`); } } return result; } export default decodeUTF8;