UNPKG

functionalscript

Version:

FunctionalScript is a purely functional subset of JavaScript

131 lines (130 loc) 5.34 kB
import { type List, type Thunk } from '../../types/list/module.f.ts'; /** * Represent an unsigned UTF16, used to store one word UTF-16 (code unit). */ export type U16 = number; /** * [0, 0x10_FFFF]: 16+5 = 21 bits * * 121_0000_0000: 16+16+9 = 41 bits */ /** * Represent an Unicode code point. * Has range: from 0x0000 to 0x10_FFFF (21 bits). */ export type CodePoint = number; /** * Converts a UTF-16 sequence to its corresponding Unicode code points. * * This function handles: * 1. Single U16 values in the Basic Multilingual Plane (BMP) [0x0000–0xFFFF]. * 2. Surrogate pairs representing code points in the Supplementary Plane [0x10000–0x10FFFF]. * 3. Invalid input sequences by applying an error mask to the resulting code point. * * @param utf16 - A list of UTF-16 code units (U16) to convert. * @returns A list of Unicode code points. Each code point corresponds to one or more U16 * values in the input. Invalid sequences are marked with the `errorMask`. * @example * * ```ts * const exampleUtf16: List<U16> = [ * 0x0041, // 'A' (BMP, single U16) * 0xD83D, 0xDE00, // 😀 (Emoji, surrogate pair) * 0xD800, // Unpaired high surrogate * 0xDC00, // Unpaired low surrogate * ] * * const codePoints = toCodePointList(exampleUtf16) * codePoints.forEach((codePoint) => { * if (codePoint & errorMask) { * console.log(`Invalid sequence detected: ${codePoint.toString(16).toUpperCase()}`) * } else { * console.log(`Code Point: U+${codePoint.toString(16).toUpperCase()}`) * } * }) * ``` */ export declare const fromCodePointList: (input: List<CodePoint>) => Thunk<U16>; /** * Converts a list of UTF-16 code units to a list of Unicode code points (CodePoint). * This function processes each UTF-16 code unit, decoding them into their corresponding Unicode code points. * The input list of `U16` values may represent characters in the Basic Multilingual Plane (BMP) or supplementary planes, * with surrogate pairs handled correctly. The function also handles EOF (`null`). * @param input - A list of UTF-16 code units (`U16`), possibly containing surrogate pairs. * @returns A list of Unicode code points (`CodePoint`), one for each valid code unit or surrogate pair. * * @example * * ```ts * const utf16List: List<U16> = [0x0041, 0xD83D, 0xDE00] // 'A' and 😀 (surrogate pair) * const codePoints = toCodePointList(utf16List) * ``` */ export declare const toCodePointList: (input: List<U16>) => List<CodePoint>; /** * Converts a string to a list of UTF-16 code units (U16). * * This function processes each character in the input string and converts it to its corresponding UTF-16 code unit(s). * Characters in the Basic Multilingual Plane (BMP) will produce a single `U16`, while supplementary plane characters * (those requiring surrogate pairs) will produce two `U16` values. * @param s - The input string to convert to UTF-16 code units. * @returns A list of UTF-16 code units (`U16`) representing the string. * * @example * * ```js * const inputString = "Hello, 😀" * const utf16List = stringToList(inputString) * ``` */ export declare const stringToList: (s: string) => List<U16>; /** * Converts a string to a list of Unicode code points (CodePoint). * This function first converts the string to a list of UTF-16 code units (U16) using `stringToList`, * then it converts the UTF-16 code units to Unicode code points using `toCodePointList`. This is useful for handling * Unicode characters, including supplementary characters represented by surrogate pairs in UTF-16. * * @param input - The input string to convert. * @returns A list of Unicode code points (`CodePoint`) corresponding to the characters in the string. * * @example * * ```js * const inputString = "Hello, 😀" * const codePoints = stringToCodePointList(inputString) * ``` */ export declare const stringToCodePointList: (input: string) => List<CodePoint>; /** * Converts a list of UTF-16 code units (U16) to a string. * This function takes a list of `U16` values (UTF-16 code units) and reconstructs the original string by mapping * each code unit back to its character using `String.fromCharCode`. The resulting characters are concatenated * to form the final string. * * @param input - A list of UTF-16 code units (`U16`). * @returns A string representing the characters encoded by the input UTF-16 code units. * * @example * * ```ts * const utf16List: List<U16> = [0x0041, 0x0042, 0x0043] // 'ABC' * const outputString = listToString(utf16List) * ``` */ export declare const listToString: (input: List<U16>) => string; /** * Converts a list of Unicode code points (CodePoint) to a string. * This function first converts the list of Unicode code points to a list of UTF-16 code units using `fromCodePointList`, * then it uses `listToString` to reconstruct the string from the UTF-16 code units. * * @param input - A list of Unicode code points (`CodePoint`). * @returns A string representing the characters encoded by the input code points. * * @example * * ```ts * const codePoints: List<CodePoint> = [0x48, 0x65, 0x6C, 0x6C, 0x6F] * const outputString = codePointListToString(codePoints) * ``` */ export declare const codePointListToString: (input: List<CodePoint>) => string;