@msgpack/msgpack
Version:
MessagePack for ECMA-262/JavaScript/TypeScript
177 lines • 6.13 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.utf8Count = utf8Count;
exports.utf8EncodeJs = utf8EncodeJs;
exports.utf8EncodeTE = utf8EncodeTE;
exports.utf8Encode = utf8Encode;
exports.utf8DecodeJs = utf8DecodeJs;
exports.utf8DecodeTD = utf8DecodeTD;
exports.utf8Decode = utf8Decode;
function utf8Count(str) {
const strLength = str.length;
let byteLength = 0;
let pos = 0;
while (pos < strLength) {
let value = str.charCodeAt(pos++);
if ((value & 0xffffff80) === 0) {
// 1-byte
byteLength++;
continue;
}
else if ((value & 0xfffff800) === 0) {
// 2-bytes
byteLength += 2;
}
else {
// handle surrogate pair
if (value >= 0xd800 && value <= 0xdbff) {
// high surrogate
if (pos < strLength) {
const extra = str.charCodeAt(pos);
if ((extra & 0xfc00) === 0xdc00) {
++pos;
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
}
}
}
if ((value & 0xffff0000) === 0) {
// 3-byte
byteLength += 3;
}
else {
// 4-byte
byteLength += 4;
}
}
}
return byteLength;
}
function utf8EncodeJs(str, output, outputOffset) {
const strLength = str.length;
let offset = outputOffset;
let pos = 0;
while (pos < strLength) {
let value = str.charCodeAt(pos++);
if ((value & 0xffffff80) === 0) {
// 1-byte
output[offset++] = value;
continue;
}
else if ((value & 0xfffff800) === 0) {
// 2-bytes
output[offset++] = ((value >> 6) & 0x1f) | 0xc0;
}
else {
// handle surrogate pair
if (value >= 0xd800 && value <= 0xdbff) {
// high surrogate
if (pos < strLength) {
const extra = str.charCodeAt(pos);
if ((extra & 0xfc00) === 0xdc00) {
++pos;
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
}
}
}
if ((value & 0xffff0000) === 0) {
// 3-byte
output[offset++] = ((value >> 12) & 0x0f) | 0xe0;
output[offset++] = ((value >> 6) & 0x3f) | 0x80;
}
else {
// 4-byte
output[offset++] = ((value >> 18) & 0x07) | 0xf0;
output[offset++] = ((value >> 12) & 0x3f) | 0x80;
output[offset++] = ((value >> 6) & 0x3f) | 0x80;
}
}
output[offset++] = (value & 0x3f) | 0x80;
}
}
// TextEncoder and TextDecoder are standardized in whatwg encoding:
// https://encoding.spec.whatwg.org/
// and available in all the modern browsers:
// https://caniuse.com/textencoder
// They are available in Node.js since v12 LTS as well:
// https://nodejs.org/api/globals.html#textencoder
const sharedTextEncoder = new TextEncoder();
// This threshold should be determined by benchmarking, which might vary in engines and input data.
// Run `npx ts-node benchmark/encode-string.ts` for details.
const TEXT_ENCODER_THRESHOLD = 50;
function utf8EncodeTE(str, output, outputOffset) {
sharedTextEncoder.encodeInto(str, output.subarray(outputOffset));
}
function utf8Encode(str, output, outputOffset) {
if (str.length > TEXT_ENCODER_THRESHOLD) {
utf8EncodeTE(str, output, outputOffset);
}
else {
utf8EncodeJs(str, output, outputOffset);
}
}
const CHUNK_SIZE = 4096;
function utf8DecodeJs(bytes, inputOffset, byteLength) {
let offset = inputOffset;
const end = offset + byteLength;
const units = [];
let result = "";
while (offset < end) {
const byte1 = bytes[offset++];
if ((byte1 & 0x80) === 0) {
// 1 byte
units.push(byte1);
}
else if ((byte1 & 0xe0) === 0xc0) {
// 2 bytes
const byte2 = bytes[offset++] & 0x3f;
units.push(((byte1 & 0x1f) << 6) | byte2);
}
else if ((byte1 & 0xf0) === 0xe0) {
// 3 bytes
const byte2 = bytes[offset++] & 0x3f;
const byte3 = bytes[offset++] & 0x3f;
units.push(((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3);
}
else if ((byte1 & 0xf8) === 0xf0) {
// 4 bytes
const byte2 = bytes[offset++] & 0x3f;
const byte3 = bytes[offset++] & 0x3f;
const byte4 = bytes[offset++] & 0x3f;
let unit = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4;
if (unit > 0xffff) {
unit -= 0x10000;
units.push(((unit >>> 10) & 0x3ff) | 0xd800);
unit = 0xdc00 | (unit & 0x3ff);
}
units.push(unit);
}
else {
units.push(byte1);
}
if (units.length >= CHUNK_SIZE) {
result += String.fromCharCode(...units);
units.length = 0;
}
}
if (units.length > 0) {
result += String.fromCharCode(...units);
}
return result;
}
const sharedTextDecoder = new TextDecoder();
// This threshold should be determined by benchmarking, which might vary in engines and input data.
// Run `npx ts-node benchmark/decode-string.ts` for details.
const TEXT_DECODER_THRESHOLD = 200;
function utf8DecodeTD(bytes, inputOffset, byteLength) {
const stringBytes = bytes.subarray(inputOffset, inputOffset + byteLength);
return sharedTextDecoder.decode(stringBytes);
}
function utf8Decode(bytes, inputOffset, byteLength) {
if (byteLength > TEXT_DECODER_THRESHOLD) {
return utf8DecodeTD(bytes, inputOffset, byteLength);
}
else {
return utf8DecodeJs(bytes, inputOffset, byteLength);
}
}
//# sourceMappingURL=utf8.cjs.map
;