@mastra/rag
Version:
The Retrieval-Augmented Generation (RAG) module contains document processing and embedding utilities.
66 lines • 1.93 kB
TypeScript
import type { TiktokenEncoding, TiktokenModel } from 'js-tiktoken';
import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs } from './extractors';
export declare enum Language {
CPP = "cpp",
GO = "go",
JAVA = "java",
KOTLIN = "kotlin",
JS = "js",
TS = "ts",
PHP = "php",
PROTO = "proto",
PYTHON = "python",
RST = "rst",
RUBY = "ruby",
RUST = "rust",
SCALA = "scala",
SWIFT = "swift",
MARKDOWN = "markdown",
LATEX = "latex",
HTML = "html",
SOL = "sol",
CSHARP = "csharp",
COBOL = "cobol",
C = "c",
LUA = "lua",
PERL = "perl",
HASKELL = "haskell",
ELIXIR = "elixir",
POWERSHELL = "powershell"
}
export type ExtractParams = {
title?: TitleExtractorsArgs | boolean;
summary?: SummaryExtractArgs | boolean;
questions?: QuestionAnswerExtractArgs | boolean;
keywords?: KeywordExtractArgs | boolean;
};
export type ChunkOptions = {
headers?: [string, string][];
returnEachLine?: boolean;
sections?: [string, string][];
separator?: string;
separators?: string[];
isSeparatorRegex?: boolean;
size?: number;
maxSize?: number;
minSize?: number;
overlap?: number;
lengthFunction?: (text: string) => number;
keepSeparator?: boolean | 'start' | 'end';
addStartIndex?: boolean;
stripWhitespace?: boolean;
language?: Language;
ensureAscii?: boolean;
convertLists?: boolean;
encodingName?: TiktokenEncoding;
modelName?: TiktokenModel;
allowedSpecial?: Set<string> | 'all';
disallowedSpecial?: Set<string> | 'all';
stripHeaders?: boolean;
};
export type ChunkStrategy = 'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex';
export interface ChunkParams extends ChunkOptions {
strategy?: ChunkStrategy;
extract?: ExtractParams;
}
//# sourceMappingURL=types.d.ts.map