@mastra/rag
Version:
The Retrieval-Augmented Generation (RAG) module contains document processing and embedding utilities.
36 lines • 1.28 kB
TypeScript
import type { TiktokenModel, TiktokenEncoding, Tiktoken } from 'js-tiktoken';
import type { TokenChunkOptions } from '../types.js';
import { TextTransformer } from './text.js';
interface Tokenizer {
overlap: number;
tokensPerChunk: number;
decode: (tokens: number[]) => string;
encode: (text: string) => number[];
}
export declare function splitTextOnTokens({ text, tokenizer }: {
text: string;
tokenizer: Tokenizer;
}): string[];
export declare class TokenTransformer extends TextTransformer {
private tokenizer;
private allowedArray;
private disallowedArray;
constructor({ encodingName, modelName, tokenizer: existingTokenizer, allowedSpecial, disallowedSpecial, options, }: {
encodingName?: TiktokenEncoding;
modelName?: TiktokenModel;
tokenizer?: Tiktoken;
allowedSpecial?: Set<string> | 'all';
disallowedSpecial?: Set<string> | 'all';
options: TokenChunkOptions;
});
splitText({ text }: {
text: string;
}): string[];
static fromTikToken({ encodingName, modelName, options, }: {
encodingName?: TiktokenEncoding;
modelName?: TiktokenModel;
options?: TokenChunkOptions;
}): TokenTransformer;
}
export {};
//# sourceMappingURL=token.d.ts.map