UNPKG

@elizaos/plugin-knowledge

Version:
1,260 lines (1,221 loc) 157 kB
import { convertPdfToTextFromBuffer, extractTextFromFileBuffer, fetchUrlContent, generateContentBasedId, isBinaryContentType, loadDocsFromPath, looksLikeBase64, normalizeS3Url, v4_default } from "./chunk-RFXW7QQK.js"; // src/index.ts import { logger as logger8 } from "@elizaos/core"; // src/types.ts import z from "zod"; var ModelConfigSchema = z.object({ // Provider configuration // NOTE: If EMBEDDING_PROVIDER is not specified, the plugin automatically assumes // plugin-openai is being used and will use OPENAI_EMBEDDING_MODEL and // OPENAI_EMBEDDING_DIMENSIONS for configuration EMBEDDING_PROVIDER: z.enum(["openai", "google"]).optional(), TEXT_PROVIDER: z.enum(["openai", "anthropic", "openrouter", "google"]).optional(), // API keys OPENAI_API_KEY: z.string().optional(), ANTHROPIC_API_KEY: z.string().optional(), OPENROUTER_API_KEY: z.string().optional(), GOOGLE_API_KEY: z.string().optional(), // Base URLs (optional for most providers) OPENAI_BASE_URL: z.string().optional(), ANTHROPIC_BASE_URL: z.string().optional(), OPENROUTER_BASE_URL: z.string().optional(), GOOGLE_BASE_URL: z.string().optional(), // Model names TEXT_EMBEDDING_MODEL: z.string(), TEXT_MODEL: z.string().optional(), // Token limits MAX_INPUT_TOKENS: z.string().or(z.number()).transform((val) => typeof val === "string" ? parseInt(val, 10) : val), MAX_OUTPUT_TOKENS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 4096), // Embedding dimension // For OpenAI: Only applies to text-embedding-3-small and text-embedding-3-large models // Default: 1536 dimensions EMBEDDING_DIMENSION: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 1536), // Contextual Knowledge settings CTX_KNOWLEDGE_ENABLED: z.boolean().default(false) }); var KnowledgeServiceType = { KNOWLEDGE: "knowledge" }; // src/config.ts import z2 from "zod"; import { logger } from "@elizaos/core"; function validateModelConfig(runtime) { try { const getSetting = (key, defaultValue) => { if (runtime) { return runtime.getSetting(key) || defaultValue; } return process.env[key] || defaultValue; }; const ctxKnowledgeEnabledSetting = getSetting("CTX_KNOWLEDGE_ENABLED"); const cleanSetting = ctxKnowledgeEnabledSetting?.toString().trim().toLowerCase(); const ctxKnowledgeEnabled = cleanSetting === "true"; logger.debug( `[Document Processor] CTX_KNOWLEDGE_ENABLED: '${ctxKnowledgeEnabledSetting}' \u2192 ${ctxKnowledgeEnabled} (runtime: ${!!runtime})` ); const embeddingProvider = getSetting("EMBEDDING_PROVIDER"); const assumePluginOpenAI = !embeddingProvider; if (assumePluginOpenAI) { const openaiApiKey2 = getSetting("OPENAI_API_KEY"); const openaiEmbeddingModel = getSetting("OPENAI_EMBEDDING_MODEL"); if (openaiApiKey2 && openaiEmbeddingModel) { logger.debug( "[Document Processor] EMBEDDING_PROVIDER not specified, using configuration from plugin-openai" ); } else { logger.debug( "[Document Processor] EMBEDDING_PROVIDER not specified. Assuming embeddings are provided by another plugin (e.g., plugin-google-genai)." ); } } const finalEmbeddingProvider = embeddingProvider; const textEmbeddingModel = getSetting("TEXT_EMBEDDING_MODEL") || getSetting("OPENAI_EMBEDDING_MODEL") || "text-embedding-3-small"; const embeddingDimension = getSetting("EMBEDDING_DIMENSION") || getSetting("OPENAI_EMBEDDING_DIMENSIONS") || "1536"; const openaiApiKey = getSetting("OPENAI_API_KEY"); const config = ModelConfigSchema.parse({ EMBEDDING_PROVIDER: finalEmbeddingProvider, TEXT_PROVIDER: getSetting("TEXT_PROVIDER"), OPENAI_API_KEY: openaiApiKey, ANTHROPIC_API_KEY: getSetting("ANTHROPIC_API_KEY"), OPENROUTER_API_KEY: getSetting("OPENROUTER_API_KEY"), GOOGLE_API_KEY: getSetting("GOOGLE_API_KEY"), OPENAI_BASE_URL: getSetting("OPENAI_BASE_URL"), ANTHROPIC_BASE_URL: getSetting("ANTHROPIC_BASE_URL"), OPENROUTER_BASE_URL: getSetting("OPENROUTER_BASE_URL"), GOOGLE_BASE_URL: getSetting("GOOGLE_BASE_URL"), TEXT_EMBEDDING_MODEL: textEmbeddingModel, TEXT_MODEL: getSetting("TEXT_MODEL"), MAX_INPUT_TOKENS: getSetting("MAX_INPUT_TOKENS", "4000"), MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"), EMBEDDING_DIMENSION: embeddingDimension, CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled }); validateConfigRequirements(config, assumePluginOpenAI); return config; } catch (error) { if (error instanceof z2.ZodError) { const issues = error.issues.map((issue) => `${issue.path.join(".")}: ${issue.message}`).join(", "); throw new Error(`Model configuration validation failed: ${issues}`); } throw error; } } function validateConfigRequirements(config, assumePluginOpenAI) { const embeddingProvider = config.EMBEDDING_PROVIDER; if (embeddingProvider === "openai" && !config.OPENAI_API_KEY) { throw new Error('OPENAI_API_KEY is required when EMBEDDING_PROVIDER is set to "openai"'); } if (embeddingProvider === "google" && !config.GOOGLE_API_KEY) { throw new Error('GOOGLE_API_KEY is required when EMBEDDING_PROVIDER is set to "google"'); } if (!embeddingProvider) { logger.debug( "[Document Processor] No EMBEDDING_PROVIDER specified. Embeddings will be handled by the runtime." ); } if (assumePluginOpenAI && config.OPENAI_API_KEY && !config.TEXT_EMBEDDING_MODEL) { throw new Error("OPENAI_EMBEDDING_MODEL is required when using plugin-openai configuration"); } if (config.CTX_KNOWLEDGE_ENABLED) { logger.debug("[Document Processor] CTX validation: Checking text generation settings..."); if (config.TEXT_PROVIDER === "openai" && !config.OPENAI_API_KEY) { throw new Error('OPENAI_API_KEY is required when TEXT_PROVIDER is set to "openai"'); } if (config.TEXT_PROVIDER === "anthropic" && !config.ANTHROPIC_API_KEY) { throw new Error('ANTHROPIC_API_KEY is required when TEXT_PROVIDER is set to "anthropic"'); } if (config.TEXT_PROVIDER === "openrouter" && !config.OPENROUTER_API_KEY) { throw new Error('OPENROUTER_API_KEY is required when TEXT_PROVIDER is set to "openrouter"'); } if (config.TEXT_PROVIDER === "google" && !config.GOOGLE_API_KEY) { throw new Error('GOOGLE_API_KEY is required when TEXT_PROVIDER is set to "google"'); } if (config.TEXT_PROVIDER === "openrouter") { const modelName = config.TEXT_MODEL?.toLowerCase() || ""; if (modelName.includes("claude") || modelName.includes("gemini")) { logger.debug( `[Document Processor] Using ${modelName} with OpenRouter. This configuration supports document caching for improved performance.` ); } } } else { logger.info("[Document Processor] Contextual Knowledge is DISABLED!"); logger.info("[Document Processor] This means documents will NOT be enriched with context."); if (assumePluginOpenAI) { logger.info( "[Document Processor] Embeddings will be handled by the runtime (e.g., plugin-openai, plugin-google-genai)." ); } else { logger.info( "[Document Processor] Using configured embedding provider for basic embeddings only." ); } } } async function getProviderRateLimits(runtime) { const config = validateModelConfig(runtime); const getSetting = (key, defaultValue) => { if (runtime) { return runtime.getSetting(key) || defaultValue; } return process.env[key] || defaultValue; }; const maxConcurrentRequests = parseInt(getSetting("MAX_CONCURRENT_REQUESTS", "30"), 10); const requestsPerMinute = parseInt(getSetting("REQUESTS_PER_MINUTE", "60"), 10); const tokensPerMinute = parseInt(getSetting("TOKENS_PER_MINUTE", "150000"), 10); const primaryProvider = config.TEXT_PROVIDER || config.EMBEDDING_PROVIDER; logger.debug( `[Document Processor] Rate limiting for ${primaryProvider}: ${requestsPerMinute} RPM, ${tokensPerMinute} TPM, ${maxConcurrentRequests} concurrent` ); switch (primaryProvider) { case "anthropic": return { maxConcurrentRequests, requestsPerMinute, tokensPerMinute, provider: "anthropic" }; case "openai": return { maxConcurrentRequests, requestsPerMinute: Math.min(requestsPerMinute, 3e3), tokensPerMinute: Math.min(tokensPerMinute, 15e4), provider: "openai" }; case "google": return { maxConcurrentRequests, requestsPerMinute: Math.min(requestsPerMinute, 60), tokensPerMinute: Math.min(tokensPerMinute, 1e5), provider: "google" }; default: return { maxConcurrentRequests, requestsPerMinute, tokensPerMinute, provider: primaryProvider || "unknown" }; } } // src/service.ts import { createUniqueUuid, logger as logger4, MemoryType as MemoryType2, ModelType as ModelType2, Semaphore, Service, splitChunks as splitChunks2 } from "@elizaos/core"; // src/document-processor.ts import { MemoryType, ModelType, logger as logger3, splitChunks } from "@elizaos/core"; // src/ctx-embeddings.ts var DEFAULT_CHUNK_TOKEN_SIZE = 500; var DEFAULT_CHUNK_OVERLAP_TOKENS = 100; var DEFAULT_CHARS_PER_TOKEN = 3.5; var CONTEXT_TARGETS = { DEFAULT: { MIN_TOKENS: 60, MAX_TOKENS: 120 }, PDF: { MIN_TOKENS: 80, MAX_TOKENS: 150 }, MATH_PDF: { MIN_TOKENS: 100, MAX_TOKENS: 180 }, CODE: { MIN_TOKENS: 100, MAX_TOKENS: 200 }, TECHNICAL: { MIN_TOKENS: 80, MAX_TOKENS: 160 } }; var SYSTEM_PROMPTS = { DEFAULT: "You are a precision text augmentation tool. Your task is to expand a given text chunk with its direct context from a larger document. You must: 1) Keep the original chunk intact; 2) Add critical context from surrounding text; 3) Never summarize or rephrase the original chunk; 4) Create contextually rich output for improved semantic retrieval.", CODE: "You are a precision code augmentation tool. Your task is to expand a given code chunk with necessary context from the larger codebase. You must: 1) Keep the original code chunk intact with exact syntax and indentation; 2) Add relevant imports, function signatures, or class definitions; 3) Include critical surrounding code context; 4) Create contextually rich output that maintains correct syntax.", PDF: "You are a precision document augmentation tool. Your task is to expand a given PDF text chunk with its direct context from the larger document. You must: 1) Keep the original chunk intact; 2) Add section headings, references, or figure captions; 3) Include text that immediately precedes and follows the chunk; 4) Create contextually rich output that maintains the document's original structure.", MATH_PDF: "You are a precision mathematical content augmentation tool. Your task is to expand a given mathematical text chunk with essential context. You must: 1) Keep original mathematical notations and expressions exactly as they appear; 2) Add relevant definitions, theorems, or equations from elsewhere in the document; 3) Preserve all LaTeX or mathematical formatting; 4) Create contextually rich output for improved mathematical comprehension.", TECHNICAL: "You are a precision technical documentation augmentation tool. Your task is to expand a technical document chunk with critical context. You must: 1) Keep the original chunk intact including all technical terminology; 2) Add relevant configuration examples, parameter definitions, or API references; 3) Include any prerequisite information; 4) Create contextually rich output that maintains technical accuracy." }; var CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE = ` <document> {doc_content} </document> Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. Follow these guidelines: 1. Identify the document's main topic and key information relevant to understanding this chunk 2. Include 2-3 sentences before the chunk that provide essential context 3. Include 2-3 sentences after the chunk that complete thoughts or provide resolution 4. For technical documents, include any definitions or explanations of terms used in the chunk 5. For narrative content, include character or setting information needed to understand the chunk 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. Do not use phrases like "this chunk discusses" - directly present the context 8. The total length should be between {min_tokens} and {max_tokens} tokens 9. Format the response as a single coherent paragraph Provide ONLY the enriched chunk text in your response:`; var CACHED_CHUNK_PROMPT_TEMPLATE = ` Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. Follow these guidelines: 1. Identify the document's main topic and key information relevant to understanding this chunk 2. Include 2-3 sentences before the chunk that provide essential context 3. Include 2-3 sentences after the chunk that complete thoughts or provide resolution 4. For technical documents, include any definitions or explanations of terms used in the chunk 5. For narrative content, include character or setting information needed to understand the chunk 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. Do not use phrases like "this chunk discusses" - directly present the context 8. The total length should be between {min_tokens} and {max_tokens} tokens 9. Format the response as a single coherent paragraph Provide ONLY the enriched chunk text in your response:`; var CACHED_CODE_CHUNK_PROMPT_TEMPLATE = ` Here is the chunk of code we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this code chunk by adding critical surrounding context. Follow these guidelines: 1. Preserve ALL code syntax, indentation, and comments exactly as they appear 2. Include any import statements, function definitions, or class declarations that this code depends on 3. Add necessary type definitions or interfaces that are referenced in this chunk 4. Include any crucial comments from elsewhere in the document that explain this code 5. If there are key variable declarations or initializations earlier in the document, include those 6. Keep the original chunk COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Do NOT include implementation details for functions that are only called but not defined in this chunk Provide ONLY the enriched code chunk in your response:`; var CACHED_MATH_PDF_PROMPT_TEMPLATE = ` Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. This document contains mathematical content that requires special handling. Follow these guidelines: 1. Preserve ALL mathematical notation exactly as it appears in the chunk 2. Include any defining equations, variables, or parameters mentioned earlier in the document that relate to this chunk 3. Add section/subsection names or figure references if they help situate the chunk 4. If variables or symbols are defined elsewhere in the document, include these definitions 5. If mathematical expressions appear corrupted, try to infer their meaning from context 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Format the response as a coherent mathematical explanation Provide ONLY the enriched chunk text in your response:`; var CACHED_TECHNICAL_PROMPT_TEMPLATE = ` Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. This appears to be technical documentation that requires special handling. Follow these guidelines: 1. Preserve ALL technical terminology, product names, and version numbers exactly as they appear 2. Include any prerequisite information or requirements mentioned earlier in the document 3. Add section/subsection headings or navigation path to situate this chunk within the document structure 4. Include any definitions of technical terms, acronyms, or jargon used in this chunk 5. If this chunk references specific configurations, include relevant parameter explanations 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Format the response maintaining any hierarchical structure present in the original Provide ONLY the enriched chunk text in your response:`; var MATH_PDF_PROMPT_TEMPLATE = ` <document> {doc_content} </document> Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. This document contains mathematical content that requires special handling. Follow these guidelines: 1. Preserve ALL mathematical notation exactly as it appears in the chunk 2. Include any defining equations, variables, or parameters mentioned earlier in the document that relate to this chunk 3. Add section/subsection names or figure references if they help situate the chunk 4. If variables or symbols are defined elsewhere in the document, include these definitions 5. If mathematical expressions appear corrupted, try to infer their meaning from context 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Format the response as a coherent mathematical explanation Provide ONLY the enriched chunk text in your response:`; var CODE_PROMPT_TEMPLATE = ` <document> {doc_content} </document> Here is the chunk of code we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this code chunk by adding critical surrounding context. Follow these guidelines: 1. Preserve ALL code syntax, indentation, and comments exactly as they appear 2. Include any import statements, function definitions, or class declarations that this code depends on 3. Add necessary type definitions or interfaces that are referenced in this chunk 4. Include any crucial comments from elsewhere in the document that explain this code 5. If there are key variable declarations or initializations earlier in the document, include those 6. Keep the original chunk COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Do NOT include implementation details for functions that are only called but not defined in this chunk Provide ONLY the enriched code chunk in your response:`; var TECHNICAL_PROMPT_TEMPLATE = ` <document> {doc_content} </document> Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. This appears to be technical documentation that requires special handling. Follow these guidelines: 1. Preserve ALL technical terminology, product names, and version numbers exactly as they appear 2. Include any prerequisite information or requirements mentioned earlier in the document 3. Add section/subsection headings or navigation path to situate this chunk within the document structure 4. Include any definitions of technical terms, acronyms, or jargon used in this chunk 5. If this chunk references specific configurations, include relevant parameter explanations 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Format the response maintaining any hierarchical structure present in the original Provide ONLY the enriched chunk text in your response:`; function getContextualizationPrompt(docContent, chunkContent, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS, promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE) { if (!docContent || !chunkContent) { console.warn("Document content or chunk content is missing for contextualization."); return "Error: Document or chunk content missing."; } const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN); if (chunkTokens > maxTokens * 0.7) { maxTokens = Math.ceil(chunkTokens * 1.3); minTokens = chunkTokens; } return promptTemplate.replace("{doc_content}", docContent).replace("{chunk_content}", chunkContent).replace("{min_tokens}", minTokens.toString()).replace("{max_tokens}", maxTokens.toString()); } function getCachingContextualizationPrompt(chunkContent, contentType, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS) { if (!chunkContent) { console.warn("Chunk content is missing for contextualization."); return { prompt: "Error: Chunk content missing.", systemPrompt: SYSTEM_PROMPTS.DEFAULT }; } const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN); if (chunkTokens > maxTokens * 0.7) { maxTokens = Math.ceil(chunkTokens * 1.3); minTokens = chunkTokens; } let promptTemplate = CACHED_CHUNK_PROMPT_TEMPLATE; let systemPrompt = SYSTEM_PROMPTS.DEFAULT; if (contentType) { if (contentType.includes("javascript") || contentType.includes("typescript") || contentType.includes("python") || contentType.includes("java") || contentType.includes("c++") || contentType.includes("code")) { promptTemplate = CACHED_CODE_CHUNK_PROMPT_TEMPLATE; systemPrompt = SYSTEM_PROMPTS.CODE; } else if (contentType.includes("pdf")) { if (containsMathematicalContent(chunkContent)) { promptTemplate = CACHED_MATH_PDF_PROMPT_TEMPLATE; systemPrompt = SYSTEM_PROMPTS.MATH_PDF; } else { systemPrompt = SYSTEM_PROMPTS.PDF; } } else if (contentType.includes("markdown") || contentType.includes("text/html") || isTechnicalDocumentation(chunkContent)) { promptTemplate = CACHED_TECHNICAL_PROMPT_TEMPLATE; systemPrompt = SYSTEM_PROMPTS.TECHNICAL; } } const formattedPrompt = promptTemplate.replace("{chunk_content}", chunkContent).replace("{min_tokens}", minTokens.toString()).replace("{max_tokens}", maxTokens.toString()); return { prompt: formattedPrompt, systemPrompt }; } function getPromptForMimeType(mimeType, docContent, chunkContent) { let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS; let maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS; let promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE; if (mimeType.includes("pdf")) { if (containsMathematicalContent(docContent)) { minTokens = CONTEXT_TARGETS.MATH_PDF.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.MATH_PDF.MAX_TOKENS; promptTemplate = MATH_PDF_PROMPT_TEMPLATE; console.debug("Using mathematical PDF prompt template"); } else { minTokens = CONTEXT_TARGETS.PDF.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.PDF.MAX_TOKENS; console.debug("Using standard PDF settings"); } } else if (mimeType.includes("javascript") || mimeType.includes("typescript") || mimeType.includes("python") || mimeType.includes("java") || mimeType.includes("c++") || mimeType.includes("code")) { minTokens = CONTEXT_TARGETS.CODE.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.CODE.MAX_TOKENS; promptTemplate = CODE_PROMPT_TEMPLATE; console.debug("Using code prompt template"); } else if (isTechnicalDocumentation(docContent) || mimeType.includes("markdown") || mimeType.includes("text/html")) { minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS; promptTemplate = TECHNICAL_PROMPT_TEMPLATE; } return getContextualizationPrompt(docContent, chunkContent, minTokens, maxTokens, promptTemplate); } function getCachingPromptForMimeType(mimeType, chunkContent) { let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS; let maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS; if (mimeType.includes("pdf")) { if (containsMathematicalContent(chunkContent)) { minTokens = CONTEXT_TARGETS.MATH_PDF.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.MATH_PDF.MAX_TOKENS; } else { minTokens = CONTEXT_TARGETS.PDF.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.PDF.MAX_TOKENS; } } else if (mimeType.includes("javascript") || mimeType.includes("typescript") || mimeType.includes("python") || mimeType.includes("java") || mimeType.includes("c++") || mimeType.includes("code")) { minTokens = CONTEXT_TARGETS.CODE.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.CODE.MAX_TOKENS; } else if (isTechnicalDocumentation(chunkContent) || mimeType.includes("markdown") || mimeType.includes("text/html")) { minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS; } return getCachingContextualizationPrompt(chunkContent, mimeType, minTokens, maxTokens); } function containsMathematicalContent(content) { const latexMathPatterns = [ /\$\$.+?\$\$/s, // Display math: $$ ... $$ /\$.+?\$/g, // Inline math: $ ... $ /\\begin\{equation\}/, // LaTeX equation environment /\\begin\{align\}/, // LaTeX align environment /\\sum_/, // Summation /\\int/, // Integral /\\frac\{/, // Fraction /\\sqrt\{/, // Square root /\\alpha|\\beta|\\gamma|\\delta|\\theta|\\lambda|\\sigma/, // Greek letters /\\nabla|\\partial/ // Differential operators ]; const generalMathPatterns = [ /[≠≤≥±∞∫∂∑∏√∈∉⊆⊇⊂⊃∪∩]/, // Mathematical symbols /\b[a-zA-Z]\^[0-9]/, // Simple exponents (e.g., x^2) /\(\s*-?\d+(\.\d+)?\s*,\s*-?\d+(\.\d+)?\s*\)/, // Coordinates /\b[xyz]\s*=\s*-?\d+(\.\d+)?/, // Simple equations /\[\s*-?\d+(\.\d+)?\s*,\s*-?\d+(\.\d+)?\s*\]/, // Vectors/matrices /\b\d+\s*×\s*\d+/ // Dimensions with × symbol ]; for (const pattern of latexMathPatterns) { if (pattern.test(content)) { return true; } } for (const pattern of generalMathPatterns) { if (pattern.test(content)) { return true; } } const mathKeywords = [ "theorem", "lemma", "proof", "equation", "function", "derivative", "integral", "matrix", "vector", "algorithm", "constraint", "coefficient" ]; const contentLower = content.toLowerCase(); const mathKeywordCount = mathKeywords.filter((keyword) => contentLower.includes(keyword)).length; return mathKeywordCount >= 2; } function isTechnicalDocumentation(content) { const technicalPatterns = [ /\b(version|v)\s*\d+\.\d+(\.\d+)?/i, // Version numbers /\b(api|sdk|cli)\b/i, // Technical acronyms /\b(http|https|ftp):\/\//i, // URLs /\b(GET|POST|PUT|DELETE)\b/, // HTTP methods /<\/?[a-z][\s\S]*>/i, // HTML/XML tags /\bREADME\b|\bCHANGELOG\b/i, // Common doc file names /\b(config|configuration)\b/i, // Configuration references /\b(parameter|param|argument|arg)\b/i // Parameter references ]; const docHeadings = [ /\b(Introduction|Overview|Getting Started|Installation|Usage|API Reference|Troubleshooting)\b/i ]; for (const pattern of [...technicalPatterns, ...docHeadings]) { if (pattern.test(content)) { return true; } } const listPatterns = [ /\d+\.\s.+\n\d+\.\s.+/, // Numbered lists /•\s.+\n•\s.+/, // Bullet points with • /\*\s.+\n\*\s.+/, // Bullet points with * /-\s.+\n-\s.+/ // Bullet points with - ]; for (const pattern of listPatterns) { if (pattern.test(content)) { return true; } } return false; } function getChunkWithContext(chunkContent, generatedContext) { if (!generatedContext || generatedContext.trim() === "") { console.warn("Generated context is empty. Falling back to original chunk content."); return chunkContent; } return generatedContext.trim(); } // src/llm.ts import { generateText as aiGenerateText, embed } from "ai"; import { createOpenAI } from "@ai-sdk/openai"; import { createAnthropic } from "@ai-sdk/anthropic"; import { createOpenRouter } from "@openrouter/ai-sdk-provider"; import { google } from "@ai-sdk/google"; import { logger as logger2 } from "@elizaos/core"; async function generateText(prompt, system, overrideConfig) { const config = validateModelConfig(); const provider = overrideConfig?.provider || config.TEXT_PROVIDER; const modelName = overrideConfig?.modelName || config.TEXT_MODEL; const maxTokens = overrideConfig?.maxTokens || config.MAX_OUTPUT_TOKENS; const autoCacheContextualRetrieval = overrideConfig?.autoCacheContextualRetrieval !== false; try { switch (provider) { case "anthropic": return await generateAnthropicText(prompt, system, modelName, maxTokens); case "openai": return await generateOpenAIText(prompt, system, modelName, maxTokens); case "openrouter": return await generateOpenRouterText( prompt, system, modelName, maxTokens, overrideConfig?.cacheDocument, overrideConfig?.cacheOptions, autoCacheContextualRetrieval ); case "google": return await generateGoogleText(prompt, system, modelName, maxTokens, config); default: throw new Error(`Unsupported text provider: ${provider}`); } } catch (error) { logger2.error(`[Document Processor] ${provider} ${modelName} error:`, error); throw error; } } async function generateAnthropicText(prompt, system, modelName, maxTokens) { const config = validateModelConfig(); const anthropic = createAnthropic({ apiKey: config.ANTHROPIC_API_KEY, baseURL: config.ANTHROPIC_BASE_URL }); const modelInstance = anthropic(modelName); const maxRetries = 3; for (let attempt = 0; attempt < maxRetries; attempt++) { try { const result = await aiGenerateText({ model: modelInstance, prompt, system, temperature: 0.3, maxTokens }); const totalTokens = result.usage.promptTokens + result.usage.completionTokens; logger2.debug( `[Document Processor] ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})` ); return result; } catch (error) { const isRateLimit = error?.status === 429 || error?.message?.includes("rate limit") || error?.message?.includes("429"); if (isRateLimit && attempt < maxRetries - 1) { const delay = Math.pow(2, attempt + 1) * 1e3; logger2.warn( `[Document Processor] Rate limit hit (${modelName}): attempt ${attempt + 1}/${maxRetries}, retrying in ${Math.round(delay / 1e3)}s` ); await new Promise((resolve) => setTimeout(resolve, delay)); continue; } throw error; } } throw new Error("Max retries exceeded for Anthropic text generation"); } async function generateOpenAIText(prompt, system, modelName, maxTokens) { const config = validateModelConfig(); const openai = createOpenAI({ apiKey: config.OPENAI_API_KEY, baseURL: config.OPENAI_BASE_URL }); const modelInstance = openai.chat(modelName); const result = await aiGenerateText({ model: modelInstance, prompt, system, temperature: 0.3, maxTokens }); const totalTokens = result.usage.promptTokens + result.usage.completionTokens; logger2.debug( `[Document Processor] OpenAI ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})` ); return result; } async function generateGoogleText(prompt, system, modelName, maxTokens, config) { const googleProvider = google; if (config.GOOGLE_API_KEY) { process.env.GOOGLE_GENERATIVE_AI_API_KEY = config.GOOGLE_API_KEY; } const modelInstance = googleProvider(modelName); const result = await aiGenerateText({ model: modelInstance, prompt, system, temperature: 0.3, maxTokens }); const totalTokens = result.usage.promptTokens + result.usage.completionTokens; logger2.debug( `[Document Processor] Google ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})` ); return result; } async function generateOpenRouterText(prompt, system, modelName, maxTokens, cacheDocument, cacheOptions, autoCacheContextualRetrieval = true) { const config = validateModelConfig(); const openrouter = createOpenRouter({ apiKey: config.OPENROUTER_API_KEY, baseURL: config.OPENROUTER_BASE_URL }); const modelInstance = openrouter.chat(modelName); const isClaudeModel = modelName.toLowerCase().includes("claude"); const isGeminiModel = modelName.toLowerCase().includes("gemini"); const isGemini25Model = modelName.toLowerCase().includes("gemini-2.5"); const supportsCaching = isClaudeModel || isGeminiModel; let documentForCaching = cacheDocument; if (!documentForCaching && autoCacheContextualRetrieval && supportsCaching) { const docMatch = prompt.match(/<document>([\s\S]*?)<\/document>/); if (docMatch && docMatch[1]) { documentForCaching = docMatch[1].trim(); logger2.debug( `[Document Processor] Auto-detected document for caching (${documentForCaching.length} chars)` ); } } if (documentForCaching && supportsCaching) { const effectiveCacheOptions = cacheOptions || { type: "ephemeral" }; let promptText = prompt; if (promptText.includes("<document>")) { promptText = promptText.replace(/<document>[\s\S]*?<\/document>/, "").trim(); } if (isClaudeModel) { return await generateClaudeWithCaching( promptText, system, modelInstance, modelName, maxTokens, documentForCaching ); } else if (isGeminiModel) { return await generateGeminiWithCaching( promptText, system, modelInstance, modelName, maxTokens, documentForCaching, isGemini25Model ); } } logger2.debug("[Document Processor] Using standard request without caching"); return await generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens); } async function generateClaudeWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching) { logger2.debug(`[Document Processor] Using explicit prompt caching with Claude ${modelName}`); const messages = [ // System message with cached document (if system is provided) system ? { role: "system", content: [ { type: "text", text: system }, { type: "text", text: documentForCaching, cache_control: { type: "ephemeral" } } ] } : ( // User message with cached document (if no system message) { role: "user", content: [ { type: "text", text: "Document for context:" }, { type: "text", text: documentForCaching, cache_control: { type: "ephemeral" } }, { type: "text", text: promptText } ] } ), // Only add user message if system was provided (otherwise we included user above) system ? { role: "user", content: [ { type: "text", text: promptText } ] } : null ].filter(Boolean); logger2.debug("[Document Processor] Using Claude-specific caching structure"); const result = await aiGenerateText({ model: modelInstance, messages, temperature: 0.3, maxTokens, providerOptions: { openrouter: { usage: { include: true } } } }); logCacheMetrics(result); const totalTokens = result.usage.promptTokens + result.usage.completionTokens; logger2.debug( `[Document Processor] OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})` ); return result; } async function generateGeminiWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching, isGemini25Model) { const usingImplicitCaching = isGemini25Model; const estimatedDocTokens = Math.ceil(documentForCaching.length / 4); const minTokensForImplicitCache = modelName.toLowerCase().includes("flash") ? 1028 : 2048; const likelyTriggersCaching = estimatedDocTokens >= minTokensForImplicitCache; if (usingImplicitCaching) { logger2.debug(`[Document Processor] Using Gemini 2.5 implicit caching with ${modelName}`); logger2.debug( `[Document Processor] Gemini 2.5 models automatically cache large prompts (no cache_control needed)` ); if (likelyTriggersCaching) { logger2.debug( `[Document Processor] Document ~${estimatedDocTokens} tokens exceeds ${minTokensForImplicitCache} token threshold for caching` ); } else { logger2.debug( `[Document Processor] Document ~${estimatedDocTokens} tokens may not meet ${minTokensForImplicitCache} token threshold for caching` ); } } else { logger2.debug(`[Document Processor] Using standard prompt format with Gemini ${modelName}`); logger2.debug( `[Document Processor] Note: Only Gemini 2.5 models support automatic implicit caching` ); } const geminiSystemPrefix = system ? `${system} ` : ""; const geminiPrompt = `${geminiSystemPrefix}${documentForCaching} ${promptText}`; const result = await aiGenerateText({ model: modelInstance, prompt: geminiPrompt, temperature: 0.3, maxTokens, providerOptions: { openrouter: { usage: { include: true // Include usage info to see cache metrics } } } }); logCacheMetrics(result); const totalTokens = result.usage.promptTokens + result.usage.completionTokens; const cachingType = usingImplicitCaching ? "implicit" : "standard"; logger2.debug( `[Document Processor] OpenRouter ${modelName} (${cachingType} caching): ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})` ); return result; } async function generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens) { const result = await aiGenerateText({ model: modelInstance, prompt, system, temperature: 0.3, maxTokens, providerOptions: { openrouter: { usage: { include: true // Include usage info to see cache metrics } } } }); const totalTokens = result.usage.promptTokens + result.usage.completionTokens; logger2.debug( `[Document Processor] OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.promptTokens}\u2192${result.usage.completionTokens})` ); return result; } function logCacheMetrics(result) { if (result.usage && result.usage.cacheTokens) { logger2.debug( `[Document Processor] Cache metrics - tokens: ${result.usage.cacheTokens}, discount: ${result.usage.cacheDiscount}` ); } } // src/document-processor.ts function estimateTokens(text) { return Math.ceil(text.length / 4); } function getCtxKnowledgeEnabled(runtime) { let result; let source; let rawValue; if (runtime) { rawValue = runtime.getSetting("CTX_KNOWLEDGE_ENABLED"); const cleanValue = rawValue?.toString().trim().toLowerCase(); result = cleanValue === "true"; source = "runtime.getSetting()"; } else { rawValue = process.env.CTX_KNOWLEDGE_ENABLED; const cleanValue = rawValue?.toString().trim().toLowerCase(); result = cleanValue === "true"; source = "process.env"; } if (process.env.NODE_ENV === "development" && rawValue && !result) { logger3.debug(`[Document Processor] CTX config mismatch - ${source}: '${rawValue}' \u2192 ${result}`); } return result; } function shouldUseCustomLLM() { const textProvider = process.env.TEXT_PROVIDER; const textModel = process.env.TEXT_MODEL; if (!textProvider || !textModel) { return false; } switch (textProvider.toLowerCase()) { case "openrouter": return !!process.env.OPENROUTER_API_KEY; case "openai": return !!process.env.OPENAI_API_KEY; case "anthropic": return !!process.env.ANTHROPIC_API_KEY; case "google": return !!process.env.GOOGLE_API_KEY; default: return false; } } var useCustomLLM = shouldUseCustomLLM(); async function processFragmentsSynchronously({ runtime, documentId, fullDocumentText, agentId, contentType, roomId, entityId, worldId, documentTitle }) { if (!fullDocumentText || fullDocumentText.trim() === "") { logger3.warn(`No text content available to chunk for document ${documentId}.`); return 0; } const chunks = await splitDocumentIntoChunks(fullDocumentText); if (chunks.length === 0) { logger3.warn(`No chunks generated from text for ${documentId}. No fragments to save.`); return 0; } const docName = documentTitle || documentId.substring(0, 8); logger3.info(`[Document Processor] "${docName}": Split into ${chunks.length} chunks`); const providerLimits = await getProviderRateLimits(); const CONCURRENCY_LIMIT = Math.min(30, providerLimits.maxConcurrentRequests || 30); const rateLimiter = createRateLimiter( providerLimits.requestsPerMinute || 60, providerLimits.tokensPerMinute ); logger3.debug( `[Document Processor] Rate limits: ${providerLimits.requestsPerMinute} RPM, ${providerLimits.tokensPerMinute} TPM (${providerLimits.provider}, concurrency: ${CONCURRENCY_LIMIT})` ); const { savedCount, failedCount } = await processAndSaveFragments({ runtime, documentId, chunks, fullDocumentText, contentType, agentId, roomId: roomId || agentId, entityId: entityId || agentId, worldId: worldId || agentId, concurrencyLimit: CONCURRENCY_LIMIT, rateLimiter, documentTitle }); const successRate = (savedCount / chunks.length * 100).toFixed(1); if (failedCount > 0) { logger3.warn( `[Document Processor] "${docName}": ${failedCount}/${chunks.length} chunks failed processing` ); } logger3.info( `[Document Processor] "${docName}" complete: ${savedCount}/${chunks.length} fragments saved (${successRate}% success)` ); logKnowledgeGenerationSummary({ documentId, totalChunks: chunks.length, savedCount, failedCount, successRate: parseFloat(successRate), ctxEnabled: getCtxKnowledgeEnabled(runtime), providerLimits }); return savedCount; } async function extractTextFromDocument(fileBuffer, contentType, originalFilename) { if (!fileBuffer || fileBuffer.length === 0) { throw new Error(`Empty file buffer provided for ${originalFilename}. Cannot extract text.`); } try { if (contentType === "application/pdf") { logger3.debug(`Extracting text from PDF: ${originalFilename}`); return await convertPdfToTextFromBuffer(fileBuffer, originalFilename); } else { logger3.debug(`Extracting text from non-PDF: ${originalFilename} (Type: ${contentType})`); if (contentType.includes("text/") || contentType.includes("application/json") || contentType.includes("application/xml")) { try { return fileBuffer.toString("utf8"); } catch (textError) { logger3.warn( `Failed to decode ${originalFilename} as UTF-8, falling back to binary extraction` ); } } return await extractTextFromFileBuffer(fileBuffer, contentType, originalFilename); } } catch (error) { logger3.error(`Error extracting text from ${originalFilename}: ${error.message}`); throw new Error(`Failed to extract text from ${originalFilename}: ${error.message}`); } } function createDocumentMemory({ text, agentId, clientDocumentId, originalFilename, contentType, worldId, fileSize, documentId, customMetadata }) { const fileExt = originalFilename.split(".").pop()?.toLowerCase() || ""; const title = originalFilename.replace(`.${fileExt}`, ""); const docId = documentId || v4_default(); return { id: docId, agentId, roomId: agentId, worldId, entityId: agentId, content: { text }, metadata: { type: MemoryType.DOCUMENT, documentId: clientDocumentId, originalFilename, contentType, title, fileExt, fileSize, source: "rag-service-main-upload", timestamp: Date.now(), // Merge custom metadata if provided ...customMetadata || {} } }; } async function splitDocumentIntoChunks(documentText) { const tokenChunkSize = DEFAULT_CHUNK_TOKEN_SIZE; const tokenChunkOverlap = DEFAULT_CHUNK_OVERLAP_TOKENS; const targetCharChunkSize = Math.round(tokenChunkSize * DEFAULT_CHARS_PER_TOKEN); const targetCharChunkOverlap = Math.round(tokenChunkOverlap * DEFAULT_CHARS_PER_TOKEN); logger3.debug( `Using core splitChunks with settings: tokenChunkSize=${tokenChunkSize}, tokenChunkOverlap=${tokenChunkOverlap}, charChunkSize=${targetCharChunkSize}, charChunkOverlap=${targetCharChunkOverlap}` ); return await splitChunks(documentText, tokenChunkSize, tokenChunkOverlap); } async function processAndSaveFragments({ runtime, documentId, chunks, fullDocumentText, contentType, agentId, roomId, entityId, worldId, concurrencyLimit, rateLimiter, documentTitle }) { let savedCount = 0; let failedCount = 0; const failedChunks = []; for (let i = 0; i < chunks.length; i += concurrencyLimit) { const batchChunks = chunks.slice(i, i + concurrencyLimit); const batchOriginalIndices = Array.from({ length: batchChunks.length }, (_, k) => i + k); logger3.debug( `[Document Processor] Batch ${Math.floor(i / concurrencyLimit) + 1}/${Math.ceil(chunks.length / concurrencyLimit)}: processing ${batchChunks.length} chunks (${batchOriginalIndices[0]}-${batchOriginalIndices[batchOriginalIndices.length - 1]})` ); const contextualizedChunks = await getContextualizedChunks( runtime, fullDocumentText, batchChunks, contentType, batchOriginalIndices, documentTitle ); const embeddingResults = await generateEmbeddingsForChunks( runtime, contextualizedChunks, rateLimiter ); for (const result of embeddingResults) { const originalChunkIndex = result.index; if (!result.success) { failedCount++; failedChunks.push(originalChunkIndex); logger3.warn(`Failed to process chunk ${originalChunkIndex} for document ${documentId}`); continue; } const contextualizedChunkText = result.text; const embedding = result.embedding; if (!embedding || embedding.length === 0) { logger3.warn( `Zero vector detected for chunk ${originalChunkIndex} (document ${documentId}). Embedding: ${JSON.stringify(result.embedding)}` ); failedCount++; failedChunks.push(originalChunkIndex); continue; } try { const fragmentMemory = { id: v4_default(), agentId, roomId: roomId || agentId, worldId: worldId || agentId, entityId: entityId || agentId, embedding, content: { text: contextualizedChunkText }, metadata: { type: MemoryType.FRAGMENT, documentId, position: originalChunkIndex, timestamp: Date.now(), source: "rag-service-fragment-sync" } }; await runtime.createMemory(fragmentMemory, "knowledge"); if (originalChunkIndex === chunks.length - 1) { const docName = documentTitle || documentId.substring(0, 8); logger3.info( `[Document Processor] "${docName}": All ${chunks.length} chunks processed successfully` ); } savedCount++; } catch (saveError) { logger3.error( `Error saving chunk ${originalChunkIndex} to database: ${saveError.message}`, saveError.stack ); failedCount++; failedChunks.push(originalChunkIndex); } } if (i + concurrencyLimit < chunks.length) { await new Promise((resolve) => setTimeout(resolve, 500)); } } return { savedCount, failedCount, failedChunks }; } async function generateEmbeddingsForChunks(runtime, contextualizedChunks, rateLimiter) { const validChunks = contextualizedChunks.filter((chunk) => chunk.success); const failedChunks = contextualizedChunks.filter((chunk) => !chunk.success); if (validChunks.length === 0) { return failedChunks.map((chunk) => ({ success: false, index: chunk.index, error: new Error("Chunk processing failed"), text: chunk.contextualizedText })); } return await Promise.all( contextualizedChunks.map(async (contextualizedChunk) => { if (!contextualizedChunk.success) { return { success: false, index: contextualizedChunk.index, error: new Error("Chunk processing failed"), text: contextualizedChunk.contextualizedText };