@keymanapp/common-types
Version:
Keyman Developer keyboard file types
212 lines • 7.5 kB
TypeScript
/**
* Utilities for transform and marker processing
*/
/** for use with markers, means an ordering can be determined */
export interface OrderedStringList {
/** @returns the ordering of an item (0..), or -1 if not found */
getItemOrder(item: string): number;
}
/**
* Class for helping with markers
*/
export declare class MarkerParser {
/**
* A marker id has the same constraint as a key id. TODO-LDML: Needs to be reflected in the spec
*/
static readonly ID: RegExp;
/**
* Special marker reference referring to any marker
*/
static readonly ANY_MARKER = "\\m{.}";
/**
* id of the 'any' marker
*/
static readonly ANY_MARKER_ID = ".";
/**
* Marker sentinel as a string - U+FFFF
*/
static readonly SENTINEL: string;
/** Marker sentinel as a regex match */
static readonly SENTINEL_MATCH: string;
/**
* Marker code as a string - U+0008
*/
static readonly MARKER_CODE: string;
/** Marker code as a regex match */
static readonly MARKER_CODE_MATCH: string;
/** Minimum ID (trailing code unit) */
static readonly MIN_MARKER_INDEX = 1;
/** Index meaning 'any marker' == `\m{.}` */
static readonly ANY_MARKER_INDEX = 55295;
/** Maximum usable marker index */
static readonly MAX_MARKER_INDEX: number;
/** Max count of markers */
static readonly MAX_MARKER_COUNT: number;
private static anyMarkerMatch;
/** Expression that matches any marker */
static readonly ANY_MARKER_MATCH: string;
/**
* Pattern for matching a marker reference, OR the special marker \m{.}
*/
static readonly REFERENCE: RegExp;
/**
* Pattern for matching a broken marker reference (assuming REFERENCE was not matched)
*/
static readonly BROKEN_REFERENCE: RegExp;
/**
* parse a string into marker references
* @param str input string such as "\m{a} … \m{.}"
* @returns `[]` or an array of all markers referenced
*/
static allReferences(str: string): string[];
/**
* parse a string for broken marker references
* @param str input string such as "\m{a} … \m{.}"
* @returns `[]` or an array of all broken markers referenced
*/
static allBrokenReferences(str: string): string[];
private static markerCodeToString;
/** @returns string for marker #n */
static markerOutput(n: number, forMatch?: boolean): string;
/** @returns all marker strings as sentinel values */
static toSentinelString(s: string, markers?: OrderedStringList, forMatch?: boolean): string;
/**
* NFD a string, respecting markers.
* @param s input string
* @param forMatch true if regex, false if individual
* @returns the normalized string
*/
static nfd_markers(s: string, forMatch?: boolean): string;
/**
* NFD a safe subset of a string, respecting markers
* @param s input string
* @param map output array of marker chars
* @param forMatch true if used for regexes
* @returns the updated string
*/
static nfd_markers_segment(s: string, map: MarkerMap, forMatch?: boolean): string;
/** return the string s but with a marker sequence before it */
static prepend_marker(s: string, marker: number, forMatch?: boolean): string;
/**
* Add back all markers in the map to the string
* @param s input string
* @param map output: the marker map
* @param forMatch if true, use regex format
*/
static add_back_markers(s: string, map: MarkerMap, forMatch?: boolean): string;
/**
* Remove (and parse) markers from a string
* @param s input string
* @param map output map containing marker locations
* @param forMatch true if regex
* @returns the original string, without any markers
*/
static remove_markers(s: string, map: MarkerMap, forMatch?: boolean): string;
/**
* analyze the string to see if it begins with a marker
* @param s input string
* @param forMatch true if regex
* @returns parsed marker details
*/
static parse_next_marker(s: string, forMatch?: boolean): MarkerResult;
}
/** special noncharacter value denoting end of string */
export declare const MARKER_BEFORE_EOT = "\uFFFE";
export interface MarkerEntry {
/** code point 'glued' to, or MARKER_BEFORE_EOT */
ch?: string;
/** marker number, 1-based */
marker?: number;
/** true if processed */
processed?: boolean;
/** true if the end of the entries */
end?: boolean;
}
/** list of marker entries, from remove_markers */
export type MarkerMap = Array<MarkerEntry>;
/** return type from parse_next_marker */
export interface MarkerResult {
marker?: number;
match?: string;
}
/**
* Class for helping with markers
*/
export declare class VariableParser {
/**
* A marker id has the same constraint as a key id. TODO-LDML: Needs to be reflected in the spec
*/
static readonly ID: RegExp;
/**
* Pattern for matching a string reference `$(str)`
*/
static readonly STRING_REFERENCE: RegExp;
/**
* Pattern for matching a set reference `$[set]`
*/
static readonly SET_REFERENCE: RegExp;
/**
* Pattern for matching a capture set reference `($[set])`
*/
static readonly CAPTURE_SET_REFERENCE: RegExp;
/**
* `$[1:variable]`
* This regex matches the whole string.
*/
static readonly MAPPED_SET_REFERENCE: RegExp;
/**
* parse a string into references
* @param str input string
* @returns `[]` or an array of all string references referenced
*/
static allStringReferences(str: string): string[];
/**
* parse a string into references
* @param str input string
* @returns `[]` or an array of all string references referenced
*/
static allSetReferences(str: string): string[];
/**
* Split an input string into a proper set
* @param str input string
* @returns
*/
static setSplitter(str: string): string[];
}
/** for ElementParser.segment() */
export declare enum ElementType {
codepoint = ".",
escaped = "\\",
uset = "[",
string = "*"
}
/** one portion of a segmented element string */
export declare class ElementSegment {
segment: string;
readonly type: ElementType;
/**
* @param segment the string in the segment
* @param type type of segment. Will be calculated if not provided.
*/
constructor(segment: string, type?: ElementType);
/** unescaped format */
get unescaped(): string;
}
/** Class for helping with Element strings (i.e. reorder) */
export declare class ElementParser {
/**
* Matches any complex UnicodeSet that would otherwise be misinterpreted
* by `MATCH_ELEMENT_SEGMENTS` due to nested `[]`'s.
* For example, `[[a-z]-[aeiou]]` could be
* mis-segmented into `[[a-z]`, `-`, `[aeiou]`, `]` */
static readonly MATCH_NESTED_SQUARE_BRACKETS: RegExp;
/** Match (segment) UnicodeSets OR hex escapes OR single Unicode codepoints */
static readonly MATCH_ELEMENT_SEGMENTS: RegExp;
/** Does it start with a UnicodeSet? Used to test the segments. */
static readonly MATCH_USET: RegExp;
/** Does it start with an escaped char? Used to test the segments. */
static readonly MATCH_ESCAPED: RegExp;
/** Split a string into ElementSegments */
static segment(str: string): ElementSegment[];
}
//# sourceMappingURL=pattern-parser.d.ts.map