UNPKG

llm-text-splitter

Version:

A super simple text splitter for RAG applications

55 lines (54 loc) 1.5 kB
/** * Enum representing the available splitter types. */ export type SplitterType = 'sentence' | 'paragraph' | 'markdown'; /** * Interface defining the options for the `splitter` function. */ export interface SplitOptions { /** * Minimum length of a chunk. * @default 0 */ minLength?: number; /** * Maximum length of a chunk. * @default 5000 */ maxLength?: number; /** * Number of characters to overlap between chunks. * @default 0 */ overlap?: number; /** * The type of splitter to use. Can be 'sentence', 'paragraph', or 'markdown'. * @default 'sentence' */ splitter?: SplitterType; /** * Custom regular expression to use for splitting. If provided, `splitter` will be ignored. */ regex?: RegExp | string; /** * Whether to remove extra spaces from the chunks. * @default false */ removeExtraSpaces?: boolean; } export declare class Splitter { private options; private static readonly REGEX; constructor(options?: SplitOptions); private findBreakPoint; private getOverlapText; private splitChunk; private handleChunkSize; private getRegExp; /** * Splits a given text into chunks based on the options provided in the constructor. * @param text The text to split. * @returns An array of strings, where each string is a chunk of the original text. */ split(text: string, options?: SplitOptions): string[]; }