@mastra/rag
Version:
The Retrieval-Augmented Generation (RAG) module contains document processing and embedding utilities.
149 lines • 4.22 kB
TypeScript
import type { TiktokenEncoding, TiktokenModel } from 'js-tiktoken';
import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs } from './extractors/index.js';
export declare enum Language {
CPP = "cpp",
GO = "go",
JAVA = "java",
KOTLIN = "kotlin",
JS = "js",
TS = "ts",
PHP = "php",
PROTO = "proto",
PYTHON = "python",
RST = "rst",
RUBY = "ruby",
RUST = "rust",
SCALA = "scala",
SWIFT = "swift",
MARKDOWN = "markdown",
LATEX = "latex",
HTML = "html",
SOL = "sol",
CSHARP = "csharp",
COBOL = "cobol",
C = "c",
LUA = "lua",
PERL = "perl",
HASKELL = "haskell",
ELIXIR = "elixir",
POWERSHELL = "powershell"
}
export type ExtractParams = {
title?: TitleExtractorsArgs | boolean;
summary?: SummaryExtractArgs | boolean;
questions?: QuestionAnswerExtractArgs | boolean;
keywords?: KeywordExtractArgs | boolean;
};
export type BaseChunkOptions = {
/**
* @deprecated Use `maxSize` instead. Will be removed in next major version.
*/
size?: number;
maxSize?: number;
overlap?: number;
lengthFunction?: (text: string) => number;
keepSeparator?: boolean | 'start' | 'end';
addStartIndex?: boolean;
stripWhitespace?: boolean;
};
export type CharacterChunkOptions = BaseChunkOptions & {
separator?: string;
isSeparatorRegex?: boolean;
};
export type RecursiveChunkOptions = BaseChunkOptions & {
separators?: string[];
isSeparatorRegex?: boolean;
language?: Language;
};
export type TokenChunkOptions = BaseChunkOptions & {
encodingName?: TiktokenEncoding;
modelName?: TiktokenModel;
allowedSpecial?: Set<string> | 'all';
disallowedSpecial?: Set<string> | 'all';
};
export type MarkdownChunkOptions = BaseChunkOptions & {
headers?: [string, string][];
returnEachLine?: boolean;
stripHeaders?: boolean;
};
export type SemanticMarkdownChunkOptions = BaseChunkOptions & {
joinThreshold?: number;
encodingName?: TiktokenEncoding;
modelName?: TiktokenModel;
allowedSpecial?: Set<string> | 'all';
disallowedSpecial?: Set<string> | 'all';
};
export type HTMLChunkOptions = BaseChunkOptions & ({
headers: [string, string][];
sections?: never;
returnEachLine?: boolean;
} | {
sections: [string, string][];
headers?: never;
}) & {
returnEachLine?: boolean;
};
export type JsonChunkOptions = BaseChunkOptions & {
minSize?: number;
ensureAscii?: boolean;
convertLists?: boolean;
};
export type LatexChunkOptions = BaseChunkOptions & {};
export type SentenceChunkOptions = BaseChunkOptions & {
maxSize: number;
minSize?: number;
targetSize?: number;
sentenceEnders?: string[];
fallbackToWords?: boolean;
fallbackToCharacters?: boolean;
};
export type StrategyOptions = {
recursive: RecursiveChunkOptions;
character: CharacterChunkOptions;
token: TokenChunkOptions;
markdown: MarkdownChunkOptions;
html: HTMLChunkOptions;
json: JsonChunkOptions;
latex: LatexChunkOptions;
sentence: SentenceChunkOptions;
'semantic-markdown': SemanticMarkdownChunkOptions;
};
export type ChunkStrategy = 'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex' | 'sentence' | 'semantic-markdown';
export type ChunkParams = ({
strategy?: 'character';
} & CharacterChunkOptions & {
extract?: ExtractParams;
}) | ({
strategy: 'recursive';
} & RecursiveChunkOptions & {
extract?: ExtractParams;
}) | ({
strategy: 'token';
} & TokenChunkOptions & {
extract?: ExtractParams;
}) | ({
strategy: 'markdown';
} & MarkdownChunkOptions & {
extract?: ExtractParams;
}) | ({
strategy: 'html';
} & HTMLChunkOptions & {
extract?: ExtractParams;
}) | ({
strategy: 'json';
} & JsonChunkOptions & {
extract?: ExtractParams;
}) | ({
strategy: 'latex';
} & LatexChunkOptions & {
extract?: ExtractParams;
}) | ({
strategy: 'sentence';
} & SentenceChunkOptions & {
extract?: ExtractParams;
}) | ({
strategy: 'semantic-markdown';
} & SemanticMarkdownChunkOptions & {
extract?: ExtractParams;
});
//# sourceMappingURL=types.d.ts.map