@keymanapp/kmc-model
Version:
Keyman Developer lexical model compiler
58 lines • 3.1 kB
TypeScript
import { LexicalModelTypes } from '@keymanapp/common-types';
import CasingForm = LexicalModelTypes.CasingForm;
import CasingFunction = LexicalModelTypes.CasingFunction;
/**
* Converts wordforms into an indexable form. It does this by
* normalizing the letter case of characters INDIVIDUALLY (to disregard
* context-sensitive case transformations), normalizing to NFKD form,
* and removing common diacritical marks.
*
* This is a very speculative implementation, that might work with
* your language. We don't guarantee that this will be perfect for your
* language, but it's a start.
*
* This uses String.prototype.normalize() to convert normalize into NFKD.
* NFKD neutralizes some funky distinctions, e.g., ꬲ, e, e should all be the
* same character; plus, it's an easy way to separate a Latin character from
* its diacritics; Even then, orthographies regularly use code points
* that, under NFKD normalization, do NOT decompose appropriately for your
* language (e.g., SENĆOŦEN, Plains Cree in syllabics).
*
* Use this in early iterations of the model. For a production lexical model,
* you will probably write/generate your own key function, tailored to your
* language. There is a chance the default will work properly out of the box.
*/
export declare function defaultSearchTermToKey(wordform: string): string;
/**
* Converts wordforms into an indexable form. It does this by
* normalizing the letter case of characters INDIVIDUALLY (to disregard
* context-sensitive case transformations), normalizing to NFKD form,
* and removing common diacritical marks.
*
* This is a very speculative implementation, that might work with
* your language. We don't guarantee that this will be perfect for your
* language, but it's a start.
*
* This uses String.prototype.normalize() to convert normalize into NFKD.
* NFKD neutralizes some funky distinctions, e.g., ꬲ, e, e should all be the
* same character; plus, it's an easy way to separate a Latin character from
* its diacritics; Even then, orthographies regularly use code points
* that, under NFKD normalization, do NOT decompose appropriately for your
* language (e.g., SENĆOŦEN, Plains Cree in syllabics).
*
* Use this in early iterations of the model. For a production lexical model,
* you will probably write/generate your own key function, tailored to your
* language. There is a chance the default will work properly out of the box.
*/
export declare function defaultCasedSearchTermToKey(wordform: string, applyCasing: CasingFunction): string;
/**
* Specifies default casing behavior for lexical models when `languageUsesCasing` is
* set to true.
* @param casing One of 'lower' (lowercased), 'upper' (uppercased), or 'initial'.
*
* 'initial' is designed to cover cases like sentence-initial & proper noun capitalization in English.
* This may be overwritten as appropriate in model-specific implementations.
* @param text The text to be modified.
*/
export declare function defaultApplyCasing(casing: CasingForm, text: string): string;
//# sourceMappingURL=model-defaults.d.ts.map