@keymanapp/kmc-model

Version:

Keyman Developer lexical model compiler

58 lines • 3.1 kB

TypeScript

import { LexicalModelTypes } from '@keymanapp/common-types'; import CasingForm = LexicalModelTypes.CasingForm; import CasingFunction = LexicalModelTypes.CasingFunction; /** * Converts wordforms into an indexable form. It does this by * normalizing the letter case of characters INDIVIDUALLY (to disregard * context-sensitive case transformations), normalizing to NFKD form, * and removing common diacritical marks. * * This is a very speculative implementation, that might work with * your language. We don't guarantee that this will be perfect for your * language, but it's a start. * * This uses String.prototype.normalize() to convert normalize into NFKD. * NFKD neutralizes some funky distinctions, e.g., ꬲ, ｅ, e should all be the * same character; plus, it's an easy way to separate a Latin character from * its diacritics; Even then, orthographies regularly use code points * that, under NFKD normalization, do NOT decompose appropriately for your * language (e.g., SENĆOŦEN, Plains Cree in syllabics). * * Use this in early iterations of the model. For a production lexical model, * you will probably write/generate your own key function, tailored to your * language. There is a chance the default will work properly out of the box. */ export declare function defaultSearchTermToKey(wordform: string): string; /** * Converts wordforms into an indexable form. It does this by * normalizing the letter case of characters INDIVIDUALLY (to disregard * context-sensitive case transformations), normalizing to NFKD form, * and removing common diacritical marks. * * This is a very speculative implementation, that might work with * your language. We don't guarantee that this will be perfect for your * language, but it's a start. * * This uses String.prototype.normalize() to convert normalize into NFKD. * NFKD neutralizes some funky distinctions, e.g., ꬲ, ｅ, e should all be the * same character; plus, it's an easy way to separate a Latin character from * its diacritics; Even then, orthographies regularly use code points * that, under NFKD normalization, do NOT decompose appropriately for your * language (e.g., SENĆOŦEN, Plains Cree in syllabics). * * Use this in early iterations of the model. For a production lexical model, * you will probably write/generate your own key function, tailored to your * language. There is a chance the default will work properly out of the box. */ export declare function defaultCasedSearchTermToKey(wordform: string, applyCasing: CasingFunction): string; /** * Specifies default casing behavior for lexical models when `languageUsesCasing` is * set to true. * @param casing One of 'lower' (lowercased), 'upper' (uppercased), or 'initial'. * * 'initial' is designed to cover cases like sentence-initial & proper noun capitalization in English. * This may be overwritten as appropriate in model-specific implementations. * @param text The text to be modified. */ export declare function defaultApplyCasing(casing: CasingForm, text: string): string; //# sourceMappingURL=model-defaults.d.ts.map