@cloudpss/ubjson

// https://github.com/ehmicky/string-byte-length/blob/main/src/char_code.js // Last ASCII character (1 byte) const LAST_ASCII_CODEPOINT = 0x80; // Last 2-bytes character const LAST_TWO_BYTES_CODEPOINT = 0x800; // Others are 3 bytes characters // However, U+d800 to U+dbff: // - Followed by U+dc00 to U+dfff -> 4 bytes together (astral character) // - Otherwise -> 3 bytes (like above) const FIRST_HIGH_SURROGATE = 0xd800; const LAST_HIGH_SURROGATE = 0xdbff; const FIRST_LOW_SURROGATE = 0xdc00; const LAST_LOW_SURROGATE = 0xdfff; /** 计算使用的空间 */ export function jsStringByteLength(string: string): number { const charLength = string.length; let byteLength = charLength; for (let charIndex = 0; charIndex < charLength; charIndex += 1) { const codepoint = string.charCodeAt(charIndex); if (codepoint < LAST_ASCII_CODEPOINT) { continue; } if (codepoint < LAST_TWO_BYTES_CODEPOINT) { byteLength += 1; continue; } byteLength += 2; if (codepoint < FIRST_HIGH_SURROGATE || codepoint > LAST_HIGH_SURROGATE) { continue; } // When out-of-bound, this returns NaN, which is `false` with the // next condition const nextCodepoint = string.charCodeAt(charIndex + 1); // High surrogates should be followed by low surrogates. // However, JavaScript strings allow invalid surrogates, which are counted // as a normal 3-byte character. This should not happen often in real code // though. if (nextCodepoint < FIRST_LOW_SURROGATE || nextCodepoint > LAST_LOW_SURROGATE) { continue; } charIndex += 1; } return byteLength; } /** 计算使用的空间 */ export function nodeStringByteLength(string: string): number { return Buffer.byteLength(string, 'utf8'); } /** 计算使用的空间 */ export let stringByteLength: (v: string) => number; /** * Encode string into utf-8. * Provided `buf` MUST have enough space. */ export function jsEncodeInto(v: string, buf: Uint8Array, offset: number): number { let ptr = offset; const strLen = v.length; for (let i = 0; i < strLen; i++) { const c1 = v.charCodeAt(i); if (c1 < LAST_ASCII_CODEPOINT) { buf[ptr++] = c1; } else if (c1 < LAST_TWO_BYTES_CODEPOINT) { buf[ptr++] = 0xc0 | (c1 >> 6); buf[ptr++] = 0x80 | (c1 & 0x3f); } else if (c1 < FIRST_HIGH_SURROGATE || c1 > LAST_LOW_SURROGATE) { buf[ptr++] = 0xe0 | (c1 >> 12); buf[ptr++] = 0x80 | ((c1 >> 6) & 0x3f); buf[ptr++] = 0x80 | (c1 & 0x3f); } else if (c1 > LAST_HIGH_SURROGATE) { // low surrogate without high surrogate buf[ptr++] = 0xef; buf[ptr++] = 0xbf; buf[ptr++] = 0xbd; } else { const c2 = v.charCodeAt(++i); if (i >= strLen || c2 < FIRST_LOW_SURROGATE || c2 > LAST_LOW_SURROGATE) { // high surrogate not followed by low surrogate buf[ptr++] = 0xef; buf[ptr++] = 0xbf; buf[ptr++] = 0xbd; i--; continue; } const c = ((c1 & 0x3ff) << 10) + (c2 & 0x3ff) + 0x1_0000; buf[ptr++] = 0xf0 | (c >> 18); buf[ptr++] = 0x80 | ((c >> 12) & 0x3f); buf[ptr++] = 0x80 | ((c >> 6) & 0x3f); buf[ptr++] = 0x80 | (c & 0x3f); } } return ptr - offset; } /* c8 ignore next 1 */ let TEXT_ENCODER: TextEncoder | null; /** * Encode string into utf-8. * Provided `buf` MUST have enough space. */ export function nativeEncodeInto(v: string, buf: Uint8Array, offset: number): number { const encoded = TEXT_ENCODER!.encodeInto(v, buf.subarray(offset)); return encoded.written; } /** * Encode string into utf-8. * Provided `buf` MUST have enough space. */ export function myEncodeInto(v: string, buf: Uint8Array, offset: number): number { if (v.length < 55) { return jsEncodeInto(v, buf, offset); } return nativeEncodeInto(v, buf, offset); } /** * Encode string into utf-8. * Provided `buf` MUST have enough space. */ export let encodeInto: (v: string, buf: Uint8Array, offset: number) => number; /** * Encode string into utf-8. */ export let encode: (v: string) => Uint8Array; /** 重设环境 */ export function resetEnv(): void { TEXT_ENCODER = typeof TextEncoder == 'function' ? new TextEncoder() : null; stringByteLength = typeof Buffer == 'function' && typeof Buffer.byteLength == 'function' ? nodeStringByteLength : jsStringByteLength; encodeInto = typeof TEXT_ENCODER?.encodeInto == 'function' ? myEncodeInto : jsEncodeInto; encode = typeof Buffer == 'function' && Buffer.from ? (v) => Buffer.from(v, 'utf8') : TEXT_ENCODER ? (v) => TEXT_ENCODER!.encode(v) : (v) => { const buf = new Uint8Array(stringByteLength(v)); jsEncodeInto(v, buf, 0); return buf; }; } resetEnv();