UNPKG

@kayahr/text-encoding

Version:
140 lines 4.38 kB
/* * Copyright (C) 2021 Klaus Reimer <k@ailis.de> * See LICENSE.md for licensing information. */ import { ByteBuffer } from "./ByteBuffer.js"; import { FINISHED } from "./constants.js"; import { getEncoding } from "./Encoding.js"; /** * Converts string to code points. * * @param string - Input string of UTF-16 code units. * @returns Code points. */ function stringToCodePoints(string) { const n = string.length; let i = 0; const codePoints = []; while (i < n) { const c = string.charCodeAt(i); if (c < 0xD800 || c > 0xDFFF) { codePoints.push(c); } else if (c >= 0xDC00 && c <= 0xDFFF) { codePoints.push(0xFFFD); } else if (c >= 0xD800 && c <= 0xDBFF) { if (i === n - 1) { codePoints.push(0xFFFD); } else { const d = string.charCodeAt(i + 1); if (d >= 0xDC00 && d <= 0xDFFF) { const a = c & 0x3FF; const b = d & 0x3FF; codePoints.push(0x10000 + (a << 10) + b); i++; } else { codePoints.push(0xFFFD); } } } i++; } return codePoints; } /** * The TextEncoder represents an encoder for a specific text encoding, such as UTF-8, ISO-8859-2, KOI8-R, GBK, etc. * An encoder takes a string and emits an array of encoded bytes. */ export class TextEncoder { enc; encoder = null; /** * Creates a new encoder for the given encoding. * * @param label - The encoding label. Defaults to UTF-8 */ constructor(label = "utf-8") { this.enc = getEncoding(label); } /** @returns The name of the encoding. */ get encoding() { return this.enc.getName(); } /** * Encodes the given string and returns the encoded bytes. * * @param input - The string to encode. * @returns The encoded bytes. */ encode(input = "") { // Initialize encoder if not already done this.encoder ??= this.enc.createEncoder(); // Encode the input string const inputStream = new ByteBuffer(stringToCodePoints(input)); const output = []; let result; while (true) { result = this.encoder.encode(inputStream); if (result === FINISHED) { break; } if (Array.isArray(result)) { output.push(...result); } else { output.push(result); } } // Create and return byte array with encoded string return new Uint8Array(output); } /** @inheritdoc */ encodeInto(source, destination) { // Initialize encoder if not already done this.encoder ??= this.enc.createEncoder(); // Encode the input string const inputStream = new ByteBuffer(stringToCodePoints(source)); let result; let read = 0; let written = 0; while (written < destination.byteLength) { result = this.encoder.encode(inputStream); if (result === FINISHED) { break; } if (Array.isArray(result)) { if (result.length + written > destination.byteLength) { break; } destination.set(result, written); written += result.length; } else { destination[written++] = result; } read++; } // Report back number of code points read and bytes written return { read, written }; } } /** * Creates and returns a new text encoder for the given encoding. When encoding is utf-8 then the built-in * text encoder (which only supports utf-8) is returned. Otherwise our own implementation is returned for this * specific encoding. * * @param label - The encoding label. Defaults to "utf-8". * @returns The created text encoder. */ export function createTextEncoder(label = "utf-8") { if (label === "utf-8" && typeof globalThis.TextEncoder === "function") { return new globalThis.TextEncoder(); } else { return new TextEncoder(label); } } //# sourceMappingURL=TextEncoder.js.map