vector-chunker
Version:
A flexible text and data chunking library for vector databases and LLMs
26 lines (24 loc) • 593 B
text/typescript
export interface ChunkOptions {
chunkSize?: number;
allowOversized?: boolean;
format?: 'text' | 'json';
// New options for vector DB support
overlap?: number;
splitOn?: 'sentence' | 'paragraph' | 'word' | 'character';
metadata?: Record<string, any>;
preserveContext?: boolean;
}
export interface ChunkMetadata {
id: string;
index: number;
totalChunks: number;
parentId?: string;
previousChunk?: string;
nextChunk?: string;
context?: string;
originalSize: number;
}
export interface VectorChunk<T> {
content: T;
metadata: ChunkMetadata;
}