UNPKG

@botonic/plugin-contentful

Version:

Botonic Plugin Contentful is one of the **[available](https://github.com/hubtype/botonic/tree/master/packages)** plugins for Botonic. **[Contentful](http://www.contentful.com)** is a CMS (Content Management System) which manages contents of a great variet

83 lines (82 loc) 2.98 kB
import { Locale } from './locales'; import { stopWordsFor, tokenizerPerLocale } from './tokens'; /** * Both tokens and stem will be converted to the <code>stem</code> * Tokens will be searched case-insensitively. */ export declare class StemmingBlackList { readonly stem: string; readonly tokens: string[]; constructor(stem: string, tokens: string[]); normalize(normalizer: (str: string) => string): StemmingBlackList; isBlackListed(token: string): boolean; } export declare class Word { readonly token: string; readonly stem: string; readonly isStopWord: boolean; /** * @param token lowercase, with i18n characters converted to ascii and after executing Preprocessor * @param stem lowercase, stemmed. Same as token for stopwords */ constructor(token: string, stem: string, isStopWord?: boolean); static joinedTokens(words: Word[], withStopwords: boolean): string; static StopWord(token: string): Word; } export declare class EmptyTextException extends Error { constructor(txt: string); } export declare class NormalizedUtterance { /** raw is actually lowercased and trimmed*/ readonly raw: string; readonly words: Word[]; private readonly onlyStopWords; /** Without stopwords */ readonly stems: string[]; /** * @param onlyStopWords: true iff all tokens are stop words */ constructor( /** raw is actually lowercased and trimmed*/ raw: string, words: Word[], onlyStopWords?: boolean); hasOnlyStopWords(): boolean; hasSameStems(other: NormalizedUtterance): boolean; joinedTokens(withStopWords: boolean): string; } export declare abstract class Preprocessor { abstract preprocess(txt: string): string; } export declare class NopPreprocessor { preprocess(txt: string): string; } /** * Removes dots within acronyms, even if missing last dot, * or immediately followed by a different separator */ export declare class AcronymPreprocessor implements Preprocessor { private static readonly DOT; private SEPS_NO_DOT; constructor(separators: string); preprocess(txt: string): string; private splitWordsAndSeparators; private preprocessWord; } export declare class Normalizer { private readonly tokenizer; private readonly separatorsRegex; private readonly preprocessor; private stopWordsPerLocale; private stemmingBlackListPerLocale; /** * preprocessor: Applied before tokenizing. Applied also to separators and stem words */ constructor(stemmingBlackListPerLocale?: { [locale: string]: StemmingBlackList[]; }, stopWordsForLocale?: typeof stopWordsFor, tokenizer?: typeof tokenizerPerLocale, separatorsRegex?: RegExp, preprocessor?: AcronymPreprocessor); /** * @throws EmptyTextException if the text is empty or only contains separators */ normalize(locale: Locale, raw: string): NormalizedUtterance; private normalizeWord; private getBlackListStem; }