UNPKG

@bare-ts/lib

Version:

TypeScript library for BARE, a compact and simple binary-serialization format

170 lines (169 loc) 6.41 kB
//! Copyright (c) 2022 Victorien Elvinger //! Licensed under the MIT License (https://mit-license.org/) import { BareError } from "../core/bare-error.js"; import { check, reserve } from "../core/byte-cursor.js"; import { assert, DEV } from "../util/assert.js"; import { INVALID_UTF8_STRING, TEXT_DECODER_THRESHOLD, TEXT_ENCODER_THRESHOLD, } from "../util/constants.js"; import { isU32 } from "../util/validator.js"; import { readUnsafeU8FixedArray, writeU8FixedArray } from "./u8-array.js"; import { readUintSafe32, writeUintSafe32 } from "./uint.js"; export function readString(bc) { return readFixedString(bc, readUintSafe32(bc)); } export function writeString(bc, x) { if (x.length < TEXT_ENCODER_THRESHOLD) { const byteLen = utf8ByteLength(x); writeUintSafe32(bc, byteLen); reserve(bc, byteLen); writeUtf8Js(bc, x); } else { const strBytes = UTF8_ENCODER.encode(x); writeUintSafe32(bc, strBytes.length); writeU8FixedArray(bc, strBytes); } } export function readFixedString(bc, byteLen) { if (DEV) { assert(isU32(byteLen)); } if (byteLen < TEXT_DECODER_THRESHOLD) { return readUtf8Js(bc, byteLen); } try { return UTF8_DECODER.decode(readUnsafeU8FixedArray(bc, byteLen)); } catch (_cause) { throw new BareError(bc.offset, INVALID_UTF8_STRING); } } export function writeFixedString(bc, x) { if (x.length < TEXT_ENCODER_THRESHOLD) { const byteLen = utf8ByteLength(x); reserve(bc, byteLen); writeUtf8Js(bc, x); } else { writeU8FixedArray(bc, UTF8_ENCODER.encode(x)); } } function readUtf8Js(bc, byteLen) { // `check` asserts that `byteLen` is a `u32` check(bc, byteLen); let result = ""; const bytes = bc.bytes; let offset = bc.offset; const upperOffset = offset + byteLen; while (offset < upperOffset) { let codePoint = bytes[offset++]; if (codePoint > 0x7f) { let malformed = true; const byte1 = codePoint; if (offset < upperOffset && codePoint < 0xe0) { // 110x_xxxx 10xx_xxxx const byte2 = bytes[offset++]; codePoint = ((byte1 & 0x1f) << 6) | (byte2 & 0x3f); malformed = codePoint >> 7 === 0 || // non-canonical char byte1 >> 5 !== 0b110 || // invalid tag byte2 >> 6 !== 0b10; // invalid tag } else if (offset + 1 < upperOffset && codePoint < 0xf0) { // 1110_xxxx 10xx_xxxx 10xx_xxxx const byte2 = bytes[offset++]; const byte3 = bytes[offset++]; codePoint = ((byte1 & 0xf) << 12) | ((byte2 & 0x3f) << 6) | (byte3 & 0x3f); malformed = codePoint >> 11 === 0 || // non-canonical char or missing data codePoint >> 11 === 0x1b || // surrogate char (0xD800 <= codePoint <= 0xDFFF) byte1 >> 4 !== 0b1110 || // invalid tag byte2 >> 6 !== 0b10 || // invalid tag byte3 >> 6 !== 0b10; // invalid tag } else if (offset + 2 < upperOffset) { // 1110_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx const byte2 = bytes[offset++]; const byte3 = bytes[offset++]; const byte4 = bytes[offset++]; codePoint = ((byte1 & 0x7) << 18) | ((byte2 & 0x3f) << 12) | ((byte3 & 0x3f) << 6) | (byte4 & 0x3f); malformed = codePoint >> 16 === 0 || // non-canonical char or missing data codePoint > 0x10ffff || // too large code point byte1 >> 3 !== 0b11110 || // invalid tag byte2 >> 6 !== 0b10 || // invalid tag byte3 >> 6 !== 0b10 || // invalid tag byte4 >> 6 !== 0b10; // invalid tag } if (malformed) { throw new BareError(bc.offset, INVALID_UTF8_STRING); } } result += String.fromCodePoint(codePoint); } bc.offset = offset; return result; } function writeUtf8Js(bc, s) { const bytes = bc.bytes; let offset = bc.offset; let i = 0; while (i < s.length) { const codePoint = s.codePointAt(i++); // i is a valid index if (codePoint < 0x80) { bytes[offset++] = codePoint; } else { if (codePoint < 0x800) { bytes[offset++] = 0xc0 | (codePoint >> 6); } else { if (codePoint < 0x10_000) { bytes[offset++] = 0xe0 | (codePoint >> 12); } else { bytes[offset++] = 0xf0 | (codePoint >> 18); bytes[offset++] = 0x80 | ((codePoint >> 12) & 0x3f); i++; // surrogate pair encoded as two ucs2 chars } bytes[offset++] = 0x80 | ((codePoint >> 6) & 0x3f); } bytes[offset++] = 0x80 | (codePoint & 0x3f); } } bc.offset = offset; } function utf8ByteLength(s) { let result = s.length; for (let i = 0; i < s.length; i++) { const codePoint = s.codePointAt(i); // i is a valid index if (codePoint > 0x7f) { result++; if (codePoint > 0x7ff) { result++; if (codePoint > 0xff_ff) { i++; // surrogate pair encoded as two ucs2 chars } } } } return result; } /** * UTF-8 decoding and encoding using API that is supported in Node >= 12 and * modern browsers: * https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder/write * https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder/read * * If you're running in an environment where it's not available, * please use a polyfill, such as: * https://github.com/anonyco/FastestSmallestTextEncoderDecoder */ const UTF8_DECODER = /* @__PURE__ */ new TextDecoder("utf-8", { fatal: true }); const UTF8_ENCODER = /* @__PURE__ */ new TextEncoder();