functionalscript
Version:
FunctionalScript is a purely functional subset of JavaScript
131 lines (130 loc) • 5.34 kB
TypeScript
import { type List, type Thunk } from '../../types/list/module.f.ts';
/**
* Represent an unsigned UTF16, used to store one word UTF-16 (code unit).
*/
export type U16 = number;
/**
* [0, 0x10_FFFF]: 16+5 = 21 bits
*
* 121_0000_0000: 16+16+9 = 41 bits
*/
/**
* Represent an Unicode code point.
* Has range: from 0x0000 to 0x10_FFFF (21 bits).
*/
export type CodePoint = number;
/**
* Converts a UTF-16 sequence to its corresponding Unicode code points.
*
* This function handles:
* 1. Single U16 values in the Basic Multilingual Plane (BMP) [0x0000–0xFFFF].
* 2. Surrogate pairs representing code points in the Supplementary Plane [0x10000–0x10FFFF].
* 3. Invalid input sequences by applying an error mask to the resulting code point.
*
* @param utf16 - A list of UTF-16 code units (U16) to convert.
* @returns A list of Unicode code points. Each code point corresponds to one or more U16
* values in the input. Invalid sequences are marked with the `errorMask`.
* @example
*
* ```ts
* const exampleUtf16: List<U16> = [
* 0x0041, // 'A' (BMP, single U16)
* 0xD83D, 0xDE00, // 😀 (Emoji, surrogate pair)
* 0xD800, // Unpaired high surrogate
* 0xDC00, // Unpaired low surrogate
* ]
*
* const codePoints = toCodePointList(exampleUtf16)
* codePoints.forEach((codePoint) => {
* if (codePoint & errorMask) {
* console.log(`Invalid sequence detected: ${codePoint.toString(16).toUpperCase()}`)
* } else {
* console.log(`Code Point: U+${codePoint.toString(16).toUpperCase()}`)
* }
* })
* ```
*/
export declare const fromCodePointList: (input: List<CodePoint>) => Thunk<U16>;
/**
* Converts a list of UTF-16 code units to a list of Unicode code points (CodePoint).
* This function processes each UTF-16 code unit, decoding them into their corresponding Unicode code points.
* The input list of `U16` values may represent characters in the Basic Multilingual Plane (BMP) or supplementary planes,
* with surrogate pairs handled correctly. The function also handles EOF (`null`).
* @param input - A list of UTF-16 code units (`U16`), possibly containing surrogate pairs.
* @returns A list of Unicode code points (`CodePoint`), one for each valid code unit or surrogate pair.
*
* @example
*
* ```ts
* const utf16List: List<U16> = [0x0041, 0xD83D, 0xDE00] // 'A' and 😀 (surrogate pair)
* const codePoints = toCodePointList(utf16List)
* ```
*/
export declare const toCodePointList: (input: List<U16>) => List<CodePoint>;
/**
* Converts a string to a list of UTF-16 code units (U16).
*
* This function processes each character in the input string and converts it to its corresponding UTF-16 code unit(s).
* Characters in the Basic Multilingual Plane (BMP) will produce a single `U16`, while supplementary plane characters
* (those requiring surrogate pairs) will produce two `U16` values.
* @param s - The input string to convert to UTF-16 code units.
* @returns A list of UTF-16 code units (`U16`) representing the string.
*
* @example
*
* ```js
* const inputString = "Hello, 😀"
* const utf16List = stringToList(inputString)
* ```
*/
export declare const stringToList: (s: string) => List<U16>;
/**
* Converts a string to a list of Unicode code points (CodePoint).
* This function first converts the string to a list of UTF-16 code units (U16) using `stringToList`,
* then it converts the UTF-16 code units to Unicode code points using `toCodePointList`. This is useful for handling
* Unicode characters, including supplementary characters represented by surrogate pairs in UTF-16.
*
* @param input - The input string to convert.
* @returns A list of Unicode code points (`CodePoint`) corresponding to the characters in the string.
*
* @example
*
* ```js
* const inputString = "Hello, 😀"
* const codePoints = stringToCodePointList(inputString)
* ```
*/
export declare const stringToCodePointList: (input: string) => List<CodePoint>;
/**
* Converts a list of UTF-16 code units (U16) to a string.
* This function takes a list of `U16` values (UTF-16 code units) and reconstructs the original string by mapping
* each code unit back to its character using `String.fromCharCode`. The resulting characters are concatenated
* to form the final string.
*
* @param input - A list of UTF-16 code units (`U16`).
* @returns A string representing the characters encoded by the input UTF-16 code units.
*
* @example
*
* ```ts
* const utf16List: List<U16> = [0x0041, 0x0042, 0x0043] // 'ABC'
* const outputString = listToString(utf16List)
* ```
*/
export declare const listToString: (input: List<U16>) => string;
/**
* Converts a list of Unicode code points (CodePoint) to a string.
* This function first converts the list of Unicode code points to a list of UTF-16 code units using `fromCodePointList`,
* then it uses `listToString` to reconstruct the string from the UTF-16 code units.
*
* @param input - A list of Unicode code points (`CodePoint`).
* @returns A string representing the characters encoded by the input code points.
*
* @example
*
* ```ts
* const codePoints: List<CodePoint> = [0x48, 0x65, 0x6C, 0x6C, 0x6F]
* const outputString = codePointListToString(codePoints)
* ```
*/
export declare const codePointListToString: (input: List<CodePoint>) => string;