UNPKG

@robypag/langchain-splitter

Version:

A small wrapper module to simplify files and buffers tokenization using langchain

14 lines (11 loc) 787 B
import { Document } from 'langchain/document'; import { Readable } from 'node:stream'; interface DocumentChunk { id?: string | undefined; metadata: Record<string, any>; content: string; } type TokenizeFunction = (filePath: string, chunkSize: number, chunkOverlap: number, options?: Record<string, any>) => Promise<Document[]>; declare function tokenizeFile(filePath: string, chunkOverlap?: number, chunkSize?: number, options?: Record<string, any>): Promise<DocumentChunk[]>; declare function tokenizeFromBufferOrString(content: Buffer | string | Readable, extension: string, chunkOverlap?: number, chunkSize?: number, options?: Record<string, any>): Promise<DocumentChunk[]>; export { type DocumentChunk, type TokenizeFunction, tokenizeFile, tokenizeFromBufferOrString };