UNPKG

@mastra/rag

Version:

The Retrieval-Augmented Generation (RAG) module contains document processing and embedding utilities.

46 lines 1.55 kB
import type { TiktokenModel, TiktokenEncoding } from 'js-tiktoken'; import { TextTransformer } from './text'; interface Tokenizer { overlap: number; tokensPerChunk: number; decode: (tokens: number[]) => string; encode: (text: string) => number[]; } export declare function splitTextOnTokens({ text, tokenizer }: { text: string; tokenizer: Tokenizer; }): string[]; export declare class TokenTransformer extends TextTransformer { private tokenizer; private allowedSpecial; private disallowedSpecial; constructor({ encodingName, modelName, allowedSpecial, disallowedSpecial, options, }: { encodingName: TiktokenEncoding; modelName?: TiktokenModel; allowedSpecial?: Set<string> | 'all'; disallowedSpecial?: Set<string> | 'all'; options: { size?: number; overlap?: number; lengthFunction?: (text: string) => number; keepSeparator?: boolean | 'start' | 'end'; addStartIndex?: boolean; stripWhitespace?: boolean; }; }); splitText({ text }: { text: string; }): string[]; static fromTikToken({ encodingName, modelName, options, }: { encodingName?: TiktokenEncoding; modelName?: TiktokenModel; options?: { size?: number; overlap?: number; allowedSpecial?: Set<string> | 'all'; disallowedSpecial?: Set<string> | 'all'; }; }): TokenTransformer; } export {}; //# sourceMappingURL=token.d.ts.map