@orama/orama
Version:
A complete search engine and RAG pipeline in your browser, server, or edge network with support for full-text, vector, and hybrid search in less than 2kb.
16 lines (15 loc) • 788 B
TypeScript
import type { Optional } from '../../types.js';
import { Stemmer, Tokenizer, DefaultTokenizerConfig } from '../../types.js';
import { Language } from './languages.js';
export interface DefaultTokenizer extends Tokenizer {
language: Language;
stemmer?: Stemmer;
tokenizeSkipProperties: Set<string>;
stemmerSkipProperties: Set<string>;
stopWords?: string[];
allowDuplicates: boolean;
normalizationCache: Map<string, string>;
normalizeToken(this: DefaultTokenizer, prop: Optional<string>, token: string, withCache: Optional<boolean>): string;
}
export declare function normalizeToken(this: DefaultTokenizer, prop: string, token: string, withCache?: boolean): string;
export declare function createTokenizer(config?: DefaultTokenizerConfig): DefaultTokenizer;