@botonic/plugin-contentful
Version:
Botonic Plugin Contentful is one of the **[available](https://github.com/hubtype/botonic/tree/master/packages)** plugins for Botonic. **[Contentful](http://www.contentful.com)** is a CMS (Content Management System) which manages contents of a great variet
83 lines (82 loc) • 2.98 kB
TypeScript
import { Locale } from './locales';
import { stopWordsFor, tokenizerPerLocale } from './tokens';
/**
* Both tokens and stem will be converted to the <code>stem</code>
* Tokens will be searched case-insensitively.
*/
export declare class StemmingBlackList {
readonly stem: string;
readonly tokens: string[];
constructor(stem: string, tokens: string[]);
normalize(normalizer: (str: string) => string): StemmingBlackList;
isBlackListed(token: string): boolean;
}
export declare class Word {
readonly token: string;
readonly stem: string;
readonly isStopWord: boolean;
/**
* @param token lowercase, with i18n characters converted to ascii and after executing Preprocessor
* @param stem lowercase, stemmed. Same as token for stopwords
*/
constructor(token: string, stem: string, isStopWord?: boolean);
static joinedTokens(words: Word[], withStopwords: boolean): string;
static StopWord(token: string): Word;
}
export declare class EmptyTextException extends Error {
constructor(txt: string);
}
export declare class NormalizedUtterance {
/** raw is actually lowercased and trimmed*/
readonly raw: string;
readonly words: Word[];
private readonly onlyStopWords;
/** Without stopwords */
readonly stems: string[];
/**
* @param onlyStopWords: true iff all tokens are stop words
*/
constructor(
/** raw is actually lowercased and trimmed*/
raw: string, words: Word[], onlyStopWords?: boolean);
hasOnlyStopWords(): boolean;
hasSameStems(other: NormalizedUtterance): boolean;
joinedTokens(withStopWords: boolean): string;
}
export declare abstract class Preprocessor {
abstract preprocess(txt: string): string;
}
export declare class NopPreprocessor {
preprocess(txt: string): string;
}
/**
* Removes dots within acronyms, even if missing last dot,
* or immediately followed by a different separator
*/
export declare class AcronymPreprocessor implements Preprocessor {
private static readonly DOT;
private SEPS_NO_DOT;
constructor(separators: string);
preprocess(txt: string): string;
private splitWordsAndSeparators;
private preprocessWord;
}
export declare class Normalizer {
private readonly tokenizer;
private readonly separatorsRegex;
private readonly preprocessor;
private stopWordsPerLocale;
private stemmingBlackListPerLocale;
/**
* preprocessor: Applied before tokenizing. Applied also to separators and stem words
*/
constructor(stemmingBlackListPerLocale?: {
[locale: string]: StemmingBlackList[];
}, stopWordsForLocale?: typeof stopWordsFor, tokenizer?: typeof tokenizerPerLocale, separatorsRegex?: RegExp, preprocessor?: AcronymPreprocessor);
/**
* @throws EmptyTextException if the text is empty or only contains separators
*/
normalize(locale: Locale, raw: string): NormalizedUtterance;
private normalizeWord;
private getBlackListStem;
}