UNPKG

@keymanapp/common-types

Version:

Keyman Developer keyboard file types

212 lines 7.5 kB
/** * Utilities for transform and marker processing */ /** for use with markers, means an ordering can be determined */ export interface OrderedStringList { /** @returns the ordering of an item (0..), or -1 if not found */ getItemOrder(item: string): number; } /** * Class for helping with markers */ export declare class MarkerParser { /** * A marker id has the same constraint as a key id. TODO-LDML: Needs to be reflected in the spec */ static readonly ID: RegExp; /** * Special marker reference referring to any marker */ static readonly ANY_MARKER = "\\m{.}"; /** * id of the 'any' marker */ static readonly ANY_MARKER_ID = "."; /** * Marker sentinel as a string - U+FFFF */ static readonly SENTINEL: string; /** Marker sentinel as a regex match */ static readonly SENTINEL_MATCH: string; /** * Marker code as a string - U+0008 */ static readonly MARKER_CODE: string; /** Marker code as a regex match */ static readonly MARKER_CODE_MATCH: string; /** Minimum ID (trailing code unit) */ static readonly MIN_MARKER_INDEX = 1; /** Index meaning 'any marker' == `\m{.}` */ static readonly ANY_MARKER_INDEX = 55295; /** Maximum usable marker index */ static readonly MAX_MARKER_INDEX: number; /** Max count of markers */ static readonly MAX_MARKER_COUNT: number; private static anyMarkerMatch; /** Expression that matches any marker */ static readonly ANY_MARKER_MATCH: string; /** * Pattern for matching a marker reference, OR the special marker \m{.} */ static readonly REFERENCE: RegExp; /** * Pattern for matching a broken marker reference (assuming REFERENCE was not matched) */ static readonly BROKEN_REFERENCE: RegExp; /** * parse a string into marker references * @param str input string such as "\m{a} … \m{.}" * @returns `[]` or an array of all markers referenced */ static allReferences(str: string): string[]; /** * parse a string for broken marker references * @param str input string such as "\m{a} … \m{.}" * @returns `[]` or an array of all broken markers referenced */ static allBrokenReferences(str: string): string[]; private static markerCodeToString; /** @returns string for marker #n */ static markerOutput(n: number, forMatch?: boolean): string; /** @returns all marker strings as sentinel values */ static toSentinelString(s: string, markers?: OrderedStringList, forMatch?: boolean): string; /** * NFD a string, respecting markers. * @param s input string * @param forMatch true if regex, false if individual * @returns the normalized string */ static nfd_markers(s: string, forMatch?: boolean): string; /** * NFD a safe subset of a string, respecting markers * @param s input string * @param map output array of marker chars * @param forMatch true if used for regexes * @returns the updated string */ static nfd_markers_segment(s: string, map: MarkerMap, forMatch?: boolean): string; /** return the string s but with a marker sequence before it */ static prepend_marker(s: string, marker: number, forMatch?: boolean): string; /** * Add back all markers in the map to the string * @param s input string * @param map output: the marker map * @param forMatch if true, use regex format */ static add_back_markers(s: string, map: MarkerMap, forMatch?: boolean): string; /** * Remove (and parse) markers from a string * @param s input string * @param map output map containing marker locations * @param forMatch true if regex * @returns the original string, without any markers */ static remove_markers(s: string, map: MarkerMap, forMatch?: boolean): string; /** * analyze the string to see if it begins with a marker * @param s input string * @param forMatch true if regex * @returns parsed marker details */ static parse_next_marker(s: string, forMatch?: boolean): MarkerResult; } /** special noncharacter value denoting end of string */ export declare const MARKER_BEFORE_EOT = "\uFFFE"; export interface MarkerEntry { /** code point 'glued' to, or MARKER_BEFORE_EOT */ ch?: string; /** marker number, 1-based */ marker?: number; /** true if processed */ processed?: boolean; /** true if the end of the entries */ end?: boolean; } /** list of marker entries, from remove_markers */ export type MarkerMap = Array<MarkerEntry>; /** return type from parse_next_marker */ export interface MarkerResult { marker?: number; match?: string; } /** * Class for helping with markers */ export declare class VariableParser { /** * A marker id has the same constraint as a key id. TODO-LDML: Needs to be reflected in the spec */ static readonly ID: RegExp; /** * Pattern for matching a string reference `$(str)` */ static readonly STRING_REFERENCE: RegExp; /** * Pattern for matching a set reference `$[set]` */ static readonly SET_REFERENCE: RegExp; /** * Pattern for matching a capture set reference `($[set])` */ static readonly CAPTURE_SET_REFERENCE: RegExp; /** * `$[1:variable]` * This regex matches the whole string. */ static readonly MAPPED_SET_REFERENCE: RegExp; /** * parse a string into references * @param str input string * @returns `[]` or an array of all string references referenced */ static allStringReferences(str: string): string[]; /** * parse a string into references * @param str input string * @returns `[]` or an array of all string references referenced */ static allSetReferences(str: string): string[]; /** * Split an input string into a proper set * @param str input string * @returns */ static setSplitter(str: string): string[]; } /** for ElementParser.segment() */ export declare enum ElementType { codepoint = ".", escaped = "\\", uset = "[", string = "*" } /** one portion of a segmented element string */ export declare class ElementSegment { segment: string; readonly type: ElementType; /** * @param segment the string in the segment * @param type type of segment. Will be calculated if not provided. */ constructor(segment: string, type?: ElementType); /** unescaped format */ get unescaped(): string; } /** Class for helping with Element strings (i.e. reorder) */ export declare class ElementParser { /** * Matches any complex UnicodeSet that would otherwise be misinterpreted * by `MATCH_ELEMENT_SEGMENTS` due to nested `[]`'s. * For example, `[[a-z]-[aeiou]]` could be * mis-segmented into `[[a-z]`, `-`, `[aeiou]`, `]` */ static readonly MATCH_NESTED_SQUARE_BRACKETS: RegExp; /** Match (segment) UnicodeSets OR hex escapes OR single Unicode codepoints */ static readonly MATCH_ELEMENT_SEGMENTS: RegExp; /** Does it start with a UnicodeSet? Used to test the segments. */ static readonly MATCH_USET: RegExp; /** Does it start with an escaped char? Used to test the segments. */ static readonly MATCH_ESCAPED: RegExp; /** Split a string into ElementSegments */ static segment(str: string): ElementSegment[]; } //# sourceMappingURL=pattern-parser.d.ts.map