llm-text-splitter
Version:
A super simple text splitter for RAG applications
55 lines (54 loc) • 1.5 kB
TypeScript
/**
* Enum representing the available splitter types.
*/
export type SplitterType = 'sentence' | 'paragraph' | 'markdown';
/**
* Interface defining the options for the `splitter` function.
*/
export interface SplitOptions {
/**
* Minimum length of a chunk.
* @default 0
*/
minLength?: number;
/**
* Maximum length of a chunk.
* @default 5000
*/
maxLength?: number;
/**
* Number of characters to overlap between chunks.
* @default 0
*/
overlap?: number;
/**
* The type of splitter to use. Can be 'sentence', 'paragraph', or 'markdown'.
* @default 'sentence'
*/
splitter?: SplitterType;
/**
* Custom regular expression to use for splitting. If provided, `splitter` will be ignored.
*/
regex?: RegExp | string;
/**
* Whether to remove extra spaces from the chunks.
* @default false
*/
removeExtraSpaces?: boolean;
}
export declare class Splitter {
private options;
private static readonly REGEX;
constructor(options?: SplitOptions);
private findBreakPoint;
private getOverlapText;
private splitChunk;
private handleChunkSize;
private getRegExp;
/**
* Splits a given text into chunks based on the options provided in the constructor.
* @param text The text to split.
* @returns An array of strings, where each string is a chunk of the original text.
*/
split(text: string, options?: SplitOptions): string[];
}