@cloudpss/ubjson
Version:
222 lines (210 loc) • 6.94 kB
text/typescript
const CHUNK_SIZE = 0x1000;
const REPLACE_CHAR = 0xfffd;
const { fromCharCode } = String;
/** 解码 */
export function jsDecode(bytes: Uint8Array, begin: number, end: number): string {
let offset = begin;
const units: number[] = [];
let result = '';
while (offset < end) {
const byte1 = bytes[offset++]!;
if ((byte1 & 0x80) === 0) {
// 1 byte
units.push(byte1);
} else if ((byte1 & 0xe0) === 0xc0) {
// 2 bytes
const byte2 = bytes[offset++]! & 0x3f;
units.push(((byte1 & 0x1f) << 6) | byte2);
} else if ((byte1 & 0xf0) === 0xe0) {
// 3 bytes
const byte2 = bytes[offset++]! & 0x3f;
const byte3 = bytes[offset++]! & 0x3f;
units.push(((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3);
} else if ((byte1 & 0xf8) === 0xf0) {
// 4 bytes
const byte2 = bytes[offset++]! & 0x3f;
const byte3 = bytes[offset++]! & 0x3f;
const byte4 = bytes[offset++]! & 0x3f;
let unit = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4;
if (unit > 0xffff) {
unit -= 0x1_0000;
units.push(((unit >>> 10) & 0x3ff) | 0xd800);
unit = 0xdc00 | (unit & 0x3ff);
}
units.push(unit);
} else {
units.push(REPLACE_CHAR);
}
if (units.length >= CHUNK_SIZE) {
result += fromCharCode(...units);
units.length = 0;
}
}
if (units.length > 0) {
result += fromCharCode(...units);
}
return result;
}
/** 解码 Ascii */
export function longStringInJS(buf: Uint8Array, begin: number, length: number): string | undefined {
const bytes = [];
for (let i = 0; i < length; i++) {
const byte = buf[begin++]!;
if (byte & 0x80) {
return;
}
bytes.push(byte);
}
return fromCharCode(...bytes);
}
/** 解码 Ascii */
export function shortStringInJS(buf: Uint8Array, begin: number, length: number): string | undefined {
if (length < 4) {
if (length < 2) {
if (length === 0) return '';
const a = buf[begin]!;
if ((a & 0x80) > 0) {
return;
}
return fromCharCode(a);
}
const a = buf[begin++]!;
const b = buf[begin++]!;
if ((a & 0x80) > 0 || (b & 0x80) > 0) {
return;
}
if (length < 3) return fromCharCode(a, b);
const c = buf[begin++]!;
if ((c & 0x80) > 0) {
return;
}
return fromCharCode(a, b, c);
}
const a = buf[begin++]!;
const b = buf[begin++]!;
const c = buf[begin++]!;
const d = buf[begin++]!;
if ((a & 0x80) > 0 || (b & 0x80) > 0 || (c & 0x80) > 0 || (d & 0x80) > 0) {
return;
}
if (length < 6) {
if (length === 4) return fromCharCode(a, b, c, d);
const e = buf[begin++]!;
if ((e & 0x80) > 0) {
return;
}
return fromCharCode(a, b, c, d, e);
}
const e = buf[begin++]!;
const f = buf[begin++]!;
if ((e & 0x80) > 0 || (f & 0x80) > 0) {
return;
}
if (length < 8) {
if (length < 7) return fromCharCode(a, b, c, d, e, f);
const g = buf[begin++]!;
if ((g & 0x80) > 0) {
return;
}
return fromCharCode(a, b, c, d, e, f, g);
}
const g = buf[begin++]!;
const h = buf[begin++]!;
if ((g & 0x80) > 0 || (h & 0x80) > 0) {
return;
}
if (length < 10) {
if (length === 8) return fromCharCode(a, b, c, d, e, f, g, h);
const i = buf[begin++]!;
if ((i & 0x80) > 0) {
return;
}
return fromCharCode(a, b, c, d, e, f, g, h, i);
}
const i = buf[begin++]!;
const j = buf[begin++]!;
if ((i & 0x80) > 0 || (j & 0x80) > 0) {
return;
}
if (length < 12) {
if (length < 11) return fromCharCode(a, b, c, d, e, f, g, h, i, j);
const k = buf[begin++]!;
if ((k & 0x80) > 0) {
return;
}
return fromCharCode(a, b, c, d, e, f, g, h, i, j, k);
}
const k = buf[begin++]!;
const l = buf[begin++]!;
if ((k & 0x80) > 0 || (l & 0x80) > 0) {
return;
}
if (length < 14) {
if (length === 12) return fromCharCode(a, b, c, d, e, f, g, h, i, j, k, l);
const m = buf[begin++]!;
if ((m & 0x80) > 0) {
begin -= 13;
return;
}
return fromCharCode(a, b, c, d, e, f, g, h, i, j, k, l, m);
}
const m = buf[begin++]!;
const n = buf[begin++]!;
if ((m & 0x80) > 0 || (n & 0x80) > 0) {
return;
}
if (length < 15) return fromCharCode(a, b, c, d, e, f, g, h, i, j, k, l, m, n);
const o = buf[begin++]!;
if ((o & 0x80) > 0) {
return;
}
return fromCharCode(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o);
}
let TEXT_DECODER: TextDecoder | null;
/** 解码 */
export function nativeDecode(data: Uint8Array, begin: number, end: number): string {
return TEXT_DECODER!.decode(data.subarray(begin, end));
}
/** Undocumented */
type BufferUndocumented = Buffer & {
utf8Slice: (this: Uint8Array, start: number, end: number) => string;
};
let utf8Slice: ((this: Uint8Array, start: number, end: number) => string) | null;
/** 解码 */
export function nodeDecode(data: Uint8Array, begin: number, end: number): string {
return utf8Slice!.call(data, begin, end);
}
let longDecode: (data: Uint8Array, begin: number, end: number) => string = nativeDecode;
/**
* 默认解码器阈值
* @see /benchmark/string-decoder.js
*/
const DEFAULT_TEXT_DECODER_THRESHOLD = 16;
/** 字符串解码,无缓存 */
export function decode(data: Uint8Array, begin: number, end: number): string {
const length = end - begin;
if (length < 16) {
const result = shortStringInJS(data, begin, length);
if (result != null) return result;
}
// 只有小字符串有优化价值
if (length < DEFAULT_TEXT_DECODER_THRESHOLD) {
// if (length < 32) {
// const result = longStringInJS(data, begin, length);
// if (result != null) return result;
// }
// 为小字符串优化
return jsDecode(data, begin, end);
}
// 使用系统解码
return longDecode(data, begin, end);
}
/** 重设环境 */
export function resetEnv(): void {
TEXT_DECODER =
typeof TextDecoder == 'function' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: false }) : null;
const hasBuffer = typeof Buffer == 'function';
utf8Slice = hasBuffer ? (Buffer.prototype as BufferUndocumented).utf8Slice : null;
longDecode = typeof utf8Slice == 'function' ? nodeDecode : TEXT_DECODER ? nativeDecode : jsDecode;
}
resetEnv();