@robypag/langchain-splitter
Version:
A small wrapper module to simplify files and buffers tokenization using langchain
14 lines (11 loc) • 787 B
TypeScript
import { Document } from 'langchain/document';
import { Readable } from 'node:stream';
interface DocumentChunk {
id?: string | undefined;
metadata: Record<string, any>;
content: string;
}
type TokenizeFunction = (filePath: string, chunkSize: number, chunkOverlap: number, options?: Record<string, any>) => Promise<Document[]>;
declare function tokenizeFile(filePath: string, chunkOverlap?: number, chunkSize?: number, options?: Record<string, any>): Promise<DocumentChunk[]>;
declare function tokenizeFromBufferOrString(content: Buffer | string | Readable, extension: string, chunkOverlap?: number, chunkSize?: number, options?: Record<string, any>): Promise<DocumentChunk[]>;
export { type DocumentChunk, type TokenizeFunction, tokenizeFile, tokenizeFromBufferOrString };