UNPKG

@elizaos/plugin-knowledge

Version:

Plugin for Knowledge

elizaos-plugins/plugin-knowledge

1,175 lines (1,126 loc) • 198 kB

JavaScript

// src/index.ts import { logger as logger11 } from "@elizaos/core"; // src/service.ts import { createUniqueUuid, logger as logger6, MemoryType as MemoryType2, ModelType as ModelType2, Semaphore, Service, splitChunks as splitChunks2 } from "@elizaos/core"; // src/document-processor.ts import { MemoryType, ModelType, logger as logger4, splitChunks } from "@elizaos/core"; // node_modules/uuid/dist-node/regex.js var regex_default = /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i; // node_modules/uuid/dist-node/validate.js function validate(uuid) { return typeof uuid === "string" && regex_default.test(uuid); } var validate_default = validate; // node_modules/uuid/dist-node/parse.js function parse(uuid) { if (!validate_default(uuid)) { throw TypeError("Invalid UUID"); } let v; return Uint8Array.of((v = parseInt(uuid.slice(0, 8), 16)) >>> 24, v >>> 16 & 255, v >>> 8 & 255, v & 255, (v = parseInt(uuid.slice(9, 13), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(14, 18), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(19, 23), 16)) >>> 8, v & 255, (v = parseInt(uuid.slice(24, 36), 16)) / 1099511627776 & 255, v / 4294967296 & 255, v >>> 24 & 255, v >>> 16 & 255, v >>> 8 & 255, v & 255); } var parse_default = parse; // node_modules/uuid/dist-node/stringify.js var byteToHex = []; for (let i = 0; i < 256; ++i) { byteToHex.push((i + 256).toString(16).slice(1)); } function unsafeStringify(arr, offset = 0) { return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase(); } // node_modules/uuid/dist-node/rng.js import { randomFillSync } from "crypto"; var rnds8Pool = new Uint8Array(256); var poolPtr = rnds8Pool.length; function rng() { if (poolPtr > rnds8Pool.length - 16) { randomFillSync(rnds8Pool); poolPtr = 0; } return rnds8Pool.slice(poolPtr, poolPtr += 16); } // node_modules/uuid/dist-node/v35.js function stringToBytes(str) { str = unescape(encodeURIComponent(str)); const bytes = new Uint8Array(str.length); for (let i = 0; i < str.length; ++i) { bytes[i] = str.charCodeAt(i); } return bytes; } var DNS = "6ba7b810-9dad-11d1-80b4-00c04fd430c8"; var URL2 = "6ba7b811-9dad-11d1-80b4-00c04fd430c8"; function v35(version, hash, value, namespace, buf, offset) { const valueBytes = typeof value === "string" ? stringToBytes(value) : value; const namespaceBytes = typeof namespace === "string" ? parse_default(namespace) : namespace; if (typeof namespace === "string") { namespace = parse_default(namespace); } if (namespace?.length !== 16) { throw TypeError("Namespace must be array-like (16 iterable integer values, 0-255)"); } let bytes = new Uint8Array(16 + valueBytes.length); bytes.set(namespaceBytes); bytes.set(valueBytes, namespaceBytes.length); bytes = hash(bytes); bytes[6] = bytes[6] & 15 | version; bytes[8] = bytes[8] & 63 | 128; if (buf) { offset = offset || 0; for (let i = 0; i < 16; ++i) { buf[offset + i] = bytes[i]; } return buf; } return unsafeStringify(bytes); } // node_modules/uuid/dist-node/native.js import { randomUUID } from "crypto"; var native_default = { randomUUID }; // node_modules/uuid/dist-node/v4.js function _v4(options, buf, offset) { options = options || {}; const rnds = options.random ?? options.rng?.() ?? rng(); if (rnds.length < 16) { throw new Error("Random bytes length must be >= 16"); } rnds[6] = rnds[6] & 15 | 64; rnds[8] = rnds[8] & 63 | 128; if (buf) { offset = offset || 0; if (offset < 0 || offset + 16 > buf.length) { throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`); } for (let i = 0; i < 16; ++i) { buf[offset + i] = rnds[i]; } return buf; } return unsafeStringify(rnds); } function v4(options, buf, offset) { if (native_default.randomUUID && !buf && !options) { return native_default.randomUUID(); } return _v4(options, buf, offset); } var v4_default = v4; // node_modules/uuid/dist-node/sha1.js import { createHash } from "crypto"; function sha1(bytes) { if (Array.isArray(bytes)) { bytes = Buffer.from(bytes); } else if (typeof bytes === "string") { bytes = Buffer.from(bytes, "utf8"); } return createHash("sha1").update(bytes).digest(); } var sha1_default = sha1; // node_modules/uuid/dist-node/v5.js function v5(value, namespace, buf, offset) { return v35(80, sha1_default, value, namespace, buf, offset); } v5.DNS = DNS; v5.URL = URL2; var v5_default = v5; // src/types.ts import z from "zod"; var ModelConfigSchema = z.object({ // Provider configuration // NOTE: If EMBEDDING_PROVIDER is not specified, the plugin automatically assumes // plugin-openai is being used and will use OPENAI_EMBEDDING_MODEL and // OPENAI_EMBEDDING_DIMENSIONS for configuration EMBEDDING_PROVIDER: z.enum(["openai", "google"]).optional(), TEXT_PROVIDER: z.enum(["openai", "anthropic", "openrouter", "google"]).optional(), // API keys OPENAI_API_KEY: z.string().optional(), ANTHROPIC_API_KEY: z.string().optional(), OPENROUTER_API_KEY: z.string().optional(), GOOGLE_API_KEY: z.string().optional(), // Base URLs (optional for most providers) OPENAI_BASE_URL: z.string().optional(), ANTHROPIC_BASE_URL: z.string().optional(), OPENROUTER_BASE_URL: z.string().optional(), GOOGLE_BASE_URL: z.string().optional(), // Model names TEXT_EMBEDDING_MODEL: z.string(), TEXT_MODEL: z.string().optional(), // Token limits MAX_INPUT_TOKENS: z.string().or(z.number()).transform((val) => typeof val === "string" ? parseInt(val, 10) : val), MAX_OUTPUT_TOKENS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 4096), // Embedding dimension // For OpenAI: Only applies to text-embedding-3-small and text-embedding-3-large models // Default: 1536 dimensions EMBEDDING_DIMENSION: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 1536), // config setting LOAD_DOCS_ON_STARTUP: z.boolean().default(false), // Contextual Knowledge settings CTX_KNOWLEDGE_ENABLED: z.boolean().default(false), // Rate limiting settings // Set RATE_LIMIT_ENABLED=false to disable all rate limiting for fast uploads // Useful when using APIs without rate limits (e.g., self-hosted models) // High defaults optimized for Vercel gateway / high-throughput APIs RATE_LIMIT_ENABLED: z.boolean().default(true), // Maximum concurrent requests (default: 150, set higher for faster processing) MAX_CONCURRENT_REQUESTS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 150), // Requests per minute limit (default: 300) REQUESTS_PER_MINUTE: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 300), // Tokens per minute limit (default: 750000) TOKENS_PER_MINUTE: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 75e4), // Delay between batches in milliseconds (default: 100, set to 0 for no delay) BATCH_DELAY_MS: z.string().or(z.number()).optional().transform((val) => val ? typeof val === "string" ? parseInt(val, 10) : val : 100) }); var KnowledgeServiceType = { KNOWLEDGE: "knowledge" }; // src/config.ts import z2 from "zod"; import { logger } from "@elizaos/core"; var parseBooleanEnv = (value) => { if (typeof value === "boolean") return value; if (typeof value === "string") return value.toLowerCase() === "true"; return false; }; function validateModelConfig(runtime) { try { const getSetting = (key, defaultValue) => { if (runtime) { return runtime.getSetting(key) || process.env[key] || defaultValue; } return process.env[key] || defaultValue; }; const ctxKnowledgeEnabled = parseBooleanEnv(getSetting("CTX_KNOWLEDGE_ENABLED", "false")); logger.debug( `[Document Processor] CTX_KNOWLEDGE_ENABLED: '${ctxKnowledgeEnabled} (runtime: ${!!runtime})` ); const embeddingProvider = getSetting("EMBEDDING_PROVIDER"); const assumePluginOpenAI = !embeddingProvider; if (assumePluginOpenAI) { const openaiApiKey2 = getSetting("OPENAI_API_KEY"); const openaiEmbeddingModel = getSetting("OPENAI_EMBEDDING_MODEL"); if (openaiApiKey2 && openaiEmbeddingModel) { logger.debug( "[Document Processor] EMBEDDING_PROVIDER not specified, using configuration from plugin-openai" ); } else { logger.debug( "[Document Processor] EMBEDDING_PROVIDER not specified. Assuming embeddings are provided by another plugin (e.g., plugin-google-genai)." ); } } const finalEmbeddingProvider = embeddingProvider; const textEmbeddingModel = getSetting("TEXT_EMBEDDING_MODEL") || getSetting("OPENAI_EMBEDDING_MODEL") || "text-embedding-3-small"; const embeddingDimension = getSetting("EMBEDDING_DIMENSION") || getSetting("OPENAI_EMBEDDING_DIMENSIONS") || "1536"; const openaiApiKey = getSetting("OPENAI_API_KEY"); const config = ModelConfigSchema.parse({ EMBEDDING_PROVIDER: finalEmbeddingProvider, TEXT_PROVIDER: getSetting("TEXT_PROVIDER"), OPENAI_API_KEY: openaiApiKey, ANTHROPIC_API_KEY: getSetting("ANTHROPIC_API_KEY"), OPENROUTER_API_KEY: getSetting("OPENROUTER_API_KEY"), GOOGLE_API_KEY: getSetting("GOOGLE_API_KEY"), OPENAI_BASE_URL: getSetting("OPENAI_BASE_URL"), ANTHROPIC_BASE_URL: getSetting("ANTHROPIC_BASE_URL"), OPENROUTER_BASE_URL: getSetting("OPENROUTER_BASE_URL"), GOOGLE_BASE_URL: getSetting("GOOGLE_BASE_URL"), TEXT_EMBEDDING_MODEL: textEmbeddingModel, TEXT_MODEL: getSetting("TEXT_MODEL"), MAX_INPUT_TOKENS: getSetting("MAX_INPUT_TOKENS", "4000"), MAX_OUTPUT_TOKENS: getSetting("MAX_OUTPUT_TOKENS", "4096"), EMBEDDING_DIMENSION: embeddingDimension, LOAD_DOCS_ON_STARTUP: parseBooleanEnv(getSetting("LOAD_DOCS_ON_STARTUP")), CTX_KNOWLEDGE_ENABLED: ctxKnowledgeEnabled, // Rate limiting settings - optimized for batch embeddings // With batch embeddings, we send 100 texts in ONE API call // 935 chunks / 100 = ~10 API calls instead of 935! RATE_LIMIT_ENABLED: parseBooleanEnv(getSetting("RATE_LIMIT_ENABLED", "true")), MAX_CONCURRENT_REQUESTS: getSetting("MAX_CONCURRENT_REQUESTS", "100"), REQUESTS_PER_MINUTE: getSetting("REQUESTS_PER_MINUTE", "500"), TOKENS_PER_MINUTE: getSetting("TOKENS_PER_MINUTE", "1000000"), BATCH_DELAY_MS: getSetting("BATCH_DELAY_MS", "100") }); validateConfigRequirements(config, assumePluginOpenAI); return config; } catch (error) { if (error instanceof z2.ZodError) { const issues = error.issues.map((issue) => `${issue.path.join(".")}: ${issue.message}`).join(", "); throw new Error(`Model configuration validation failed: ${issues}`); } throw error; } } function validateConfigRequirements(config, assumePluginOpenAI) { const embeddingProvider = config.EMBEDDING_PROVIDER; if (embeddingProvider === "openai" && !config.OPENAI_API_KEY) { throw new Error('OPENAI_API_KEY is required when EMBEDDING_PROVIDER is set to "openai"'); } if (embeddingProvider === "google" && !config.GOOGLE_API_KEY) { throw new Error('GOOGLE_API_KEY is required when EMBEDDING_PROVIDER is set to "google"'); } if (!embeddingProvider) { logger.debug( "[Document Processor] No EMBEDDING_PROVIDER specified. Embeddings will be handled by the runtime." ); } if (assumePluginOpenAI && config.OPENAI_API_KEY && !config.TEXT_EMBEDDING_MODEL) { throw new Error("OPENAI_EMBEDDING_MODEL is required when using plugin-openai configuration"); } if (config.CTX_KNOWLEDGE_ENABLED) { logger.debug("[Document Processor] CTX validation: Checking text generation settings..."); if (config.TEXT_PROVIDER === "openai" && !config.OPENAI_API_KEY) { throw new Error('OPENAI_API_KEY is required when TEXT_PROVIDER is set to "openai"'); } if (config.TEXT_PROVIDER === "anthropic" && !config.ANTHROPIC_API_KEY) { throw new Error('ANTHROPIC_API_KEY is required when TEXT_PROVIDER is set to "anthropic"'); } if (config.TEXT_PROVIDER === "openrouter" && !config.OPENROUTER_API_KEY) { throw new Error('OPENROUTER_API_KEY is required when TEXT_PROVIDER is set to "openrouter"'); } if (config.TEXT_PROVIDER === "google" && !config.GOOGLE_API_KEY) { throw new Error('GOOGLE_API_KEY is required when TEXT_PROVIDER is set to "google"'); } if (config.TEXT_PROVIDER === "openrouter") { const modelName = config.TEXT_MODEL?.toLowerCase() || ""; if (modelName.includes("claude") || modelName.includes("gemini")) { logger.debug( `[Document Processor] Using ${modelName} with OpenRouter. This configuration supports document caching for improved performance.` ); } } } else { logger.info("[Document Processor] Contextual Knowledge is DISABLED!"); logger.info("[Document Processor] This means documents will NOT be enriched with context."); if (assumePluginOpenAI) { logger.info( "[Document Processor] Embeddings will be handled by the runtime (e.g., plugin-openai, plugin-google-genai)." ); } else { logger.info( "[Document Processor] Using configured embedding provider for basic embeddings only." ); } } } async function getProviderRateLimits(runtime) { const config = validateModelConfig(runtime); const rateLimitEnabled = config.RATE_LIMIT_ENABLED; const maxConcurrentRequests = config.MAX_CONCURRENT_REQUESTS; const requestsPerMinute = config.REQUESTS_PER_MINUTE; const tokensPerMinute = config.TOKENS_PER_MINUTE; const batchDelayMs = config.BATCH_DELAY_MS; const primaryProvider = config.TEXT_PROVIDER || config.EMBEDDING_PROVIDER; if (!rateLimitEnabled) { logger.info( `[Document Processor] Rate limiting DISABLED - unlimited throughput mode (concurrent: ${maxConcurrentRequests}, batch delay: ${batchDelayMs}ms)` ); return { maxConcurrentRequests, requestsPerMinute: Number.MAX_SAFE_INTEGER, tokensPerMinute: Number.MAX_SAFE_INTEGER, provider: primaryProvider || "unlimited", rateLimitEnabled: false, batchDelayMs }; } logger.debug( `[Document Processor] Rate limiting for ${primaryProvider}: ${requestsPerMinute} RPM, ${tokensPerMinute} TPM, ${maxConcurrentRequests} concurrent, ${batchDelayMs}ms batch delay` ); return { maxConcurrentRequests, requestsPerMinute, tokensPerMinute, provider: primaryProvider || "unknown", rateLimitEnabled: true, batchDelayMs }; } // src/ctx-embeddings.ts var DEFAULT_CHUNK_TOKEN_SIZE = 500; var DEFAULT_CHUNK_OVERLAP_TOKENS = 100; var DEFAULT_CHARS_PER_TOKEN = 3.5; var CONTEXT_TARGETS = { DEFAULT: { MIN_TOKENS: 60, MAX_TOKENS: 120 }, PDF: { MIN_TOKENS: 80, MAX_TOKENS: 150 }, MATH_PDF: { MIN_TOKENS: 100, MAX_TOKENS: 180 }, CODE: { MIN_TOKENS: 100, MAX_TOKENS: 200 }, TECHNICAL: { MIN_TOKENS: 80, MAX_TOKENS: 160 } }; var SYSTEM_PROMPTS = { DEFAULT: "You are a precision text augmentation tool. Your task is to expand a given text chunk with its direct context from a larger document. You must: 1) Keep the original chunk intact; 2) Add critical context from surrounding text; 3) Never summarize or rephrase the original chunk; 4) Create contextually rich output for improved semantic retrieval.", CODE: "You are a precision code augmentation tool. Your task is to expand a given code chunk with necessary context from the larger codebase. You must: 1) Keep the original code chunk intact with exact syntax and indentation; 2) Add relevant imports, function signatures, or class definitions; 3) Include critical surrounding code context; 4) Create contextually rich output that maintains correct syntax.", PDF: "You are a precision document augmentation tool. Your task is to expand a given PDF text chunk with its direct context from the larger document. You must: 1) Keep the original chunk intact; 2) Add section headings, references, or figure captions; 3) Include text that immediately precedes and follows the chunk; 4) Create contextually rich output that maintains the document's original structure.", MATH_PDF: "You are a precision mathematical content augmentation tool. Your task is to expand a given mathematical text chunk with essential context. You must: 1) Keep original mathematical notations and expressions exactly as they appear; 2) Add relevant definitions, theorems, or equations from elsewhere in the document; 3) Preserve all LaTeX or mathematical formatting; 4) Create contextually rich output for improved mathematical comprehension.", TECHNICAL: "You are a precision technical documentation augmentation tool. Your task is to expand a technical document chunk with critical context. You must: 1) Keep the original chunk intact including all technical terminology; 2) Add relevant configuration examples, parameter definitions, or API references; 3) Include any prerequisite information; 4) Create contextually rich output that maintains technical accuracy." }; var CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE = ` <document> {doc_content} </document> Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. Follow these guidelines: 1. Identify the document's main topic and key information relevant to understanding this chunk 2. Include 2-3 sentences before the chunk that provide essential context 3. Include 2-3 sentences after the chunk that complete thoughts or provide resolution 4. For technical documents, include any definitions or explanations of terms used in the chunk 5. For narrative content, include character or setting information needed to understand the chunk 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. Do not use phrases like "this chunk discusses" - directly present the context 8. The total length should be between {min_tokens} and {max_tokens} tokens 9. Format the response as a single coherent paragraph Provide ONLY the enriched chunk text in your response:`; var CACHED_CHUNK_PROMPT_TEMPLATE = ` Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. Follow these guidelines: 1. Identify the document's main topic and key information relevant to understanding this chunk 2. Include 2-3 sentences before the chunk that provide essential context 3. Include 2-3 sentences after the chunk that complete thoughts or provide resolution 4. For technical documents, include any definitions or explanations of terms used in the chunk 5. For narrative content, include character or setting information needed to understand the chunk 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. Do not use phrases like "this chunk discusses" - directly present the context 8. The total length should be between {min_tokens} and {max_tokens} tokens 9. Format the response as a single coherent paragraph Provide ONLY the enriched chunk text in your response:`; var CACHED_CODE_CHUNK_PROMPT_TEMPLATE = ` Here is the chunk of code we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this code chunk by adding critical surrounding context. Follow these guidelines: 1. Preserve ALL code syntax, indentation, and comments exactly as they appear 2. Include any import statements, function definitions, or class declarations that this code depends on 3. Add necessary type definitions or interfaces that are referenced in this chunk 4. Include any crucial comments from elsewhere in the document that explain this code 5. If there are key variable declarations or initializations earlier in the document, include those 6. Keep the original chunk COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Do NOT include implementation details for functions that are only called but not defined in this chunk Provide ONLY the enriched code chunk in your response:`; var CACHED_MATH_PDF_PROMPT_TEMPLATE = ` Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. This document contains mathematical content that requires special handling. Follow these guidelines: 1. Preserve ALL mathematical notation exactly as it appears in the chunk 2. Include any defining equations, variables, or parameters mentioned earlier in the document that relate to this chunk 3. Add section/subsection names or figure references if they help situate the chunk 4. If variables or symbols are defined elsewhere in the document, include these definitions 5. If mathematical expressions appear corrupted, try to infer their meaning from context 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Format the response as a coherent mathematical explanation Provide ONLY the enriched chunk text in your response:`; var CACHED_TECHNICAL_PROMPT_TEMPLATE = ` Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. This appears to be technical documentation that requires special handling. Follow these guidelines: 1. Preserve ALL technical terminology, product names, and version numbers exactly as they appear 2. Include any prerequisite information or requirements mentioned earlier in the document 3. Add section/subsection headings or navigation path to situate this chunk within the document structure 4. Include any definitions of technical terms, acronyms, or jargon used in this chunk 5. If this chunk references specific configurations, include relevant parameter explanations 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Format the response maintaining any hierarchical structure present in the original Provide ONLY the enriched chunk text in your response:`; var MATH_PDF_PROMPT_TEMPLATE = ` <document> {doc_content} </document> Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. This document contains mathematical content that requires special handling. Follow these guidelines: 1. Preserve ALL mathematical notation exactly as it appears in the chunk 2. Include any defining equations, variables, or parameters mentioned earlier in the document that relate to this chunk 3. Add section/subsection names or figure references if they help situate the chunk 4. If variables or symbols are defined elsewhere in the document, include these definitions 5. If mathematical expressions appear corrupted, try to infer their meaning from context 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Format the response as a coherent mathematical explanation Provide ONLY the enriched chunk text in your response:`; var CODE_PROMPT_TEMPLATE = ` <document> {doc_content} </document> Here is the chunk of code we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this code chunk by adding critical surrounding context. Follow these guidelines: 1. Preserve ALL code syntax, indentation, and comments exactly as they appear 2. Include any import statements, function definitions, or class declarations that this code depends on 3. Add necessary type definitions or interfaces that are referenced in this chunk 4. Include any crucial comments from elsewhere in the document that explain this code 5. If there are key variable declarations or initializations earlier in the document, include those 6. Keep the original chunk COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Do NOT include implementation details for functions that are only called but not defined in this chunk Provide ONLY the enriched code chunk in your response:`; var TECHNICAL_PROMPT_TEMPLATE = ` <document> {doc_content} </document> Here is the chunk we want to situate within the whole document: <chunk> {chunk_content} </chunk> Create an enriched version of this chunk by adding critical surrounding context. This appears to be technical documentation that requires special handling. Follow these guidelines: 1. Preserve ALL technical terminology, product names, and version numbers exactly as they appear 2. Include any prerequisite information or requirements mentioned earlier in the document 3. Add section/subsection headings or navigation path to situate this chunk within the document structure 4. Include any definitions of technical terms, acronyms, or jargon used in this chunk 5. If this chunk references specific configurations, include relevant parameter explanations 6. Keep the original chunk text COMPLETELY INTACT and UNCHANGED in your response 7. The total length should be between {min_tokens} and {max_tokens} tokens 8. Format the response maintaining any hierarchical structure present in the original Provide ONLY the enriched chunk text in your response:`; function getContextualizationPrompt(docContent, chunkContent, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS, promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE) { if (!docContent || !chunkContent) { console.warn("Document content or chunk content is missing for contextualization."); return "Error: Document or chunk content missing."; } const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN); if (chunkTokens > maxTokens * 0.7) { maxTokens = Math.ceil(chunkTokens * 1.3); minTokens = chunkTokens; } return promptTemplate.replace("{doc_content}", docContent).replace("{chunk_content}", chunkContent).replace("{min_tokens}", minTokens.toString()).replace("{max_tokens}", maxTokens.toString()); } function getCachingContextualizationPrompt(chunkContent, contentType, minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS, maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS) { if (!chunkContent) { console.warn("Chunk content is missing for contextualization."); return { prompt: "Error: Chunk content missing.", systemPrompt: SYSTEM_PROMPTS.DEFAULT }; } const chunkTokens = Math.ceil(chunkContent.length / DEFAULT_CHARS_PER_TOKEN); if (chunkTokens > maxTokens * 0.7) { maxTokens = Math.ceil(chunkTokens * 1.3); minTokens = chunkTokens; } let promptTemplate = CACHED_CHUNK_PROMPT_TEMPLATE; let systemPrompt = SYSTEM_PROMPTS.DEFAULT; if (contentType) { if (contentType.includes("javascript") || contentType.includes("typescript") || contentType.includes("python") || contentType.includes("java") || contentType.includes("c++") || contentType.includes("code")) { promptTemplate = CACHED_CODE_CHUNK_PROMPT_TEMPLATE; systemPrompt = SYSTEM_PROMPTS.CODE; } else if (contentType.includes("pdf")) { if (containsMathematicalContent(chunkContent)) { promptTemplate = CACHED_MATH_PDF_PROMPT_TEMPLATE; systemPrompt = SYSTEM_PROMPTS.MATH_PDF; } else { systemPrompt = SYSTEM_PROMPTS.PDF; } } else if (contentType.includes("markdown") || contentType.includes("text/html") || isTechnicalDocumentation(chunkContent)) { promptTemplate = CACHED_TECHNICAL_PROMPT_TEMPLATE; systemPrompt = SYSTEM_PROMPTS.TECHNICAL; } } const formattedPrompt = promptTemplate.replace("{chunk_content}", chunkContent).replace("{min_tokens}", minTokens.toString()).replace("{max_tokens}", maxTokens.toString()); return { prompt: formattedPrompt, systemPrompt }; } function getPromptForMimeType(mimeType, docContent, chunkContent) { let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS; let maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS; let promptTemplate = CONTEXTUAL_CHUNK_ENRICHMENT_PROMPT_TEMPLATE; if (mimeType.includes("pdf")) { if (containsMathematicalContent(docContent)) { minTokens = CONTEXT_TARGETS.MATH_PDF.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.MATH_PDF.MAX_TOKENS; promptTemplate = MATH_PDF_PROMPT_TEMPLATE; console.debug("Using mathematical PDF prompt template"); } else { minTokens = CONTEXT_TARGETS.PDF.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.PDF.MAX_TOKENS; console.debug("Using standard PDF settings"); } } else if (mimeType.includes("javascript") || mimeType.includes("typescript") || mimeType.includes("python") || mimeType.includes("java") || mimeType.includes("c++") || mimeType.includes("code")) { minTokens = CONTEXT_TARGETS.CODE.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.CODE.MAX_TOKENS; promptTemplate = CODE_PROMPT_TEMPLATE; console.debug("Using code prompt template"); } else if (isTechnicalDocumentation(docContent) || mimeType.includes("markdown") || mimeType.includes("text/html")) { minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS; promptTemplate = TECHNICAL_PROMPT_TEMPLATE; } return getContextualizationPrompt(docContent, chunkContent, minTokens, maxTokens, promptTemplate); } function getCachingPromptForMimeType(mimeType, chunkContent) { let minTokens = CONTEXT_TARGETS.DEFAULT.MIN_TOKENS; let maxTokens = CONTEXT_TARGETS.DEFAULT.MAX_TOKENS; if (mimeType.includes("pdf")) { if (containsMathematicalContent(chunkContent)) { minTokens = CONTEXT_TARGETS.MATH_PDF.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.MATH_PDF.MAX_TOKENS; } else { minTokens = CONTEXT_TARGETS.PDF.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.PDF.MAX_TOKENS; } } else if (mimeType.includes("javascript") || mimeType.includes("typescript") || mimeType.includes("python") || mimeType.includes("java") || mimeType.includes("c++") || mimeType.includes("code")) { minTokens = CONTEXT_TARGETS.CODE.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.CODE.MAX_TOKENS; } else if (isTechnicalDocumentation(chunkContent) || mimeType.includes("markdown") || mimeType.includes("text/html")) { minTokens = CONTEXT_TARGETS.TECHNICAL.MIN_TOKENS; maxTokens = CONTEXT_TARGETS.TECHNICAL.MAX_TOKENS; } return getCachingContextualizationPrompt(chunkContent, mimeType, minTokens, maxTokens); } function containsMathematicalContent(content) { const latexMathPatterns = [ /\$\$.+?\$\$/s, // Display math: $$ ... $$ /\$.+?\$/g, // Inline math: $ ... $ /\\begin\{equation\}/, // LaTeX equation environment /\\begin\{align\}/, // LaTeX align environment /\\sum_/, // Summation /\\int/, // Integral /\\frac\{/, // Fraction /\\sqrt\{/, // Square root /\\alpha|\\beta|\\gamma|\\delta|\\theta|\\lambda|\\sigma/, // Greek letters /\\nabla|\\partial/ // Differential operators ]; const generalMathPatterns = [ /[≠≤≥±∞∫∂∑∏√∈∉⊆⊇⊂⊃∪∩]/, // Mathematical symbols /\b[a-zA-Z]\^[0-9]/, // Simple exponents (e.g., x^2) /\(\s*-?\d+(\.\d+)?\s*,\s*-?\d+(\.\d+)?\s*\)/, // Coordinates /\b[xyz]\s*=\s*-?\d+(\.\d+)?/, // Simple equations /\[\s*-?\d+(\.\d+)?\s*,\s*-?\d+(\.\d+)?\s*\]/, // Vectors/matrices /\b\d+\s*×\s*\d+/ // Dimensions with × symbol ]; for (const pattern of latexMathPatterns) { if (pattern.test(content)) { return true; } } for (const pattern of generalMathPatterns) { if (pattern.test(content)) { return true; } } const mathKeywords = [ "theorem", "lemma", "proof", "equation", "function", "derivative", "integral", "matrix", "vector", "algorithm", "constraint", "coefficient" ]; const contentLower = content.toLowerCase(); const mathKeywordCount = mathKeywords.filter((keyword) => contentLower.includes(keyword)).length; return mathKeywordCount >= 2; } function isTechnicalDocumentation(content) { const technicalPatterns = [ /\b(version|v)\s*\d+\.\d+(\.\d+)?/i, // Version numbers /\b(api|sdk|cli)\b/i, // Technical acronyms /\b(http|https|ftp):\/\//i, // URLs /\b(GET|POST|PUT|DELETE)\b/, // HTTP methods /<\/?[a-z][\s\S]*>/i, // HTML/XML tags /\bREADME\b|\bCHANGELOG\b/i, // Common doc file names /\b(config|configuration)\b/i, // Configuration references /\b(parameter|param|argument|arg)\b/i // Parameter references ]; const docHeadings = [ /\b(Introduction|Overview|Getting Started|Installation|Usage|API Reference|Troubleshooting)\b/i ]; for (const pattern of [...technicalPatterns, ...docHeadings]) { if (pattern.test(content)) { return true; } } const listPatterns = [ /\d+\.\s.+\n\d+\.\s.+/, // Numbered lists /•\s.+\n•\s.+/, // Bullet points with • /\*\s.+\n\*\s.+/, // Bullet points with * /-\s.+\n-\s.+/ // Bullet points with - ]; for (const pattern of listPatterns) { if (pattern.test(content)) { return true; } } return false; } function getChunkWithContext(chunkContent, generatedContext) { if (!generatedContext || generatedContext.trim() === "") { console.warn("Generated context is empty. Falling back to original chunk content."); return chunkContent; } return generatedContext.trim(); } // src/llm.ts import { generateText as aiGenerateText, embed } from "ai"; import { createOpenAI } from "@ai-sdk/openai"; import { createAnthropic } from "@ai-sdk/anthropic"; import { createOpenRouter } from "@openrouter/ai-sdk-provider"; import { google } from "@ai-sdk/google"; import { logger as logger2 } from "@elizaos/core"; async function generateText(runtime, prompt, system, overrideConfig) { const config = validateModelConfig(runtime); const provider = overrideConfig?.provider || config.TEXT_PROVIDER; const modelName = overrideConfig?.modelName || config.TEXT_MODEL; const maxTokens = overrideConfig?.maxTokens || config.MAX_OUTPUT_TOKENS; const autoCacheContextualRetrieval = overrideConfig?.autoCacheContextualRetrieval !== false; try { switch (provider) { case "anthropic": return await generateAnthropicText(config, prompt, system, modelName, maxTokens); case "openai": return await generateOpenAIText(config, prompt, system, modelName, maxTokens); case "openrouter": return await generateOpenRouterText( config, prompt, system, modelName, maxTokens, overrideConfig?.cacheDocument, overrideConfig?.cacheOptions, autoCacheContextualRetrieval ); case "google": return await generateGoogleText(prompt, system, modelName, maxTokens, config); default: throw new Error(`Unsupported text provider: ${provider}`); } } catch (error) { logger2.error({ error }, `[Document Processor] ${provider} ${modelName} error`); throw error; } } async function generateAnthropicText(config, prompt, system, modelName, maxTokens) { const anthropic = createAnthropic({ apiKey: config.ANTHROPIC_API_KEY, baseURL: config.ANTHROPIC_BASE_URL }); const modelInstance = anthropic(modelName); const maxRetries = 3; for (let attempt = 0; attempt < maxRetries; attempt++) { try { const result = await aiGenerateText({ model: modelInstance, prompt, system, temperature: 0.3, maxOutputTokens: maxTokens }); const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0); logger2.debug( `[Document Processor] ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}\u2192${result.usage.outputTokens || 0})` ); return result; } catch (error) { const isRateLimit = error?.status === 429 || error?.message?.includes("rate limit") || error?.message?.includes("429"); if (isRateLimit && attempt < maxRetries - 1) { const delay = Math.pow(2, attempt + 1) * 1e3; logger2.warn( `[Document Processor] Rate limit hit (${modelName}): attempt ${attempt + 1}/${maxRetries}, retrying in ${Math.round(delay / 1e3)}s` ); await new Promise((resolve2) => setTimeout(resolve2, delay)); continue; } throw error; } } throw new Error("Max retries exceeded for Anthropic text generation"); } async function generateOpenAIText(config, prompt, system, modelName, maxTokens) { const openai = createOpenAI({ apiKey: config.OPENAI_API_KEY, baseURL: config.OPENAI_BASE_URL }); const modelInstance = openai.chat(modelName); const result = await aiGenerateText({ model: modelInstance, prompt, system, temperature: 0.3, maxOutputTokens: maxTokens }); const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0); logger2.debug( `[Document Processor] OpenAI ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}\u2192${result.usage.outputTokens || 0})` ); return result; } async function generateGoogleText(prompt, system, modelName, maxTokens, config) { const googleProvider = google; if (config.GOOGLE_API_KEY) { process.env.GOOGLE_GENERATIVE_AI_API_KEY = config.GOOGLE_API_KEY; } const modelInstance = googleProvider(modelName); const result = await aiGenerateText({ model: modelInstance, prompt, system, temperature: 0.3, maxOutputTokens: maxTokens }); const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0); logger2.debug( `[Document Processor] Google ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}\u2192${result.usage.outputTokens || 0})` ); return result; } async function generateOpenRouterText(config, prompt, system, modelName, maxTokens, cacheDocument, _cacheOptions, autoCacheContextualRetrieval = true) { const openrouter = createOpenRouter({ apiKey: config.OPENROUTER_API_KEY, baseURL: config.OPENROUTER_BASE_URL }); const modelInstance = openrouter.chat(modelName); const isClaudeModel = modelName.toLowerCase().includes("claude"); const isGeminiModel = modelName.toLowerCase().includes("gemini"); const isGemini25Model = modelName.toLowerCase().includes("gemini-2.5"); const supportsCaching = isClaudeModel || isGeminiModel; let documentForCaching = cacheDocument; if (!documentForCaching && autoCacheContextualRetrieval && supportsCaching) { const docMatch = prompt.match(/<document>([\s\S]*?)<\/document>/); if (docMatch && docMatch[1]) { documentForCaching = docMatch[1].trim(); logger2.debug( `[Document Processor] Auto-detected document for caching (${documentForCaching.length} chars)` ); } } if (documentForCaching && supportsCaching) { let promptText = prompt; if (promptText.includes("<document>")) { promptText = promptText.replace(/<document>[\s\S]*?<\/document>/, "").trim(); } if (isClaudeModel) { return await generateClaudeWithCaching( promptText, system, modelInstance, modelName, maxTokens, documentForCaching ); } else if (isGeminiModel) { return await generateGeminiWithCaching( promptText, system, modelInstance, modelName, maxTokens, documentForCaching, isGemini25Model ); } } logger2.debug("[Document Processor] Using standard request without caching"); return await generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens); } async function generateClaudeWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching) { logger2.debug(`[Document Processor] Using explicit prompt caching with Claude ${modelName}`); const messages = [ // System message with cached document (if system is provided) system ? { role: "system", content: [ { type: "text", text: system }, { type: "text", text: documentForCaching, cache_control: { type: "ephemeral" } } ] } : ( // User message with cached document (if no system message) { role: "user", content: [ { type: "text", text: "Document for context:" }, { type: "text", text: documentForCaching, cache_control: { type: "ephemeral" } }, { type: "text", text: promptText } ] } ), // Only add user message if system was provided (otherwise we included user above) system ? { role: "user", content: [ { type: "text", text: promptText } ] } : null ].filter(Boolean); logger2.debug("[Document Processor] Using Claude-specific caching structure"); const result = await aiGenerateText({ model: modelInstance, messages, temperature: 0.3, maxOutputTokens: maxTokens, providerOptions: { openrouter: { usage: { include: true } } } }); logCacheMetrics(result); const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0); logger2.debug( `[Document Processor] OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}\u2192${result.usage.outputTokens || 0})` ); return result; } async function generateGeminiWithCaching(promptText, system, modelInstance, modelName, maxTokens, documentForCaching, isGemini25Model) { const usingImplicitCaching = isGemini25Model; const estimatedDocTokens = Math.ceil(documentForCaching.length / 4); const minTokensForImplicitCache = modelName.toLowerCase().includes("flash") ? 1028 : 2048; const likelyTriggersCaching = estimatedDocTokens >= minTokensForImplicitCache; if (usingImplicitCaching) { logger2.debug(`[Document Processor] Using Gemini 2.5 implicit caching with ${modelName}`); logger2.debug( `[Document Processor] Gemini 2.5 models automatically cache large prompts (no cache_control needed)` ); if (likelyTriggersCaching) { logger2.debug( `[Document Processor] Document ~${estimatedDocTokens} tokens exceeds ${minTokensForImplicitCache} token threshold for caching` ); } else { logger2.debug( `[Document Processor] Document ~${estimatedDocTokens} tokens may not meet ${minTokensForImplicitCache} token threshold for caching` ); } } else { logger2.debug(`[Document Processor] Using standard prompt format with Gemini ${modelName}`); logger2.debug( `[Document Processor] Note: Only Gemini 2.5 models support automatic implicit caching` ); } const geminiSystemPrefix = system ? `${system} ` : ""; const geminiPrompt = `${geminiSystemPrefix}${documentForCaching} ${promptText}`; const result = await aiGenerateText({ model: modelInstance, prompt: geminiPrompt, temperature: 0.3, maxOutputTokens: maxTokens, providerOptions: { openrouter: { usage: { include: true // Include usage info to see cache metrics } } } }); logCacheMetrics(result); const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0); const cachingType = usingImplicitCaching ? "implicit" : "standard"; logger2.debug( `[Document Processor] OpenRouter ${modelName} (${cachingType} caching): ${totalTokens} tokens (${result.usage.inputTokens || 0}\u2192${result.usage.outputTokens || 0})` ); return result; } async function generateStandardOpenRouterText(prompt, system, modelInstance, modelName, maxTokens) { const result = await aiGenerateText({ model: modelInstance, prompt, system, temperature: 0.3, maxOutputTokens: maxTokens, providerOptions: { openrouter: { usage: { include: true // Include usage info to see cache metrics } } } }); const totalTokens = (result.usage.inputTokens || 0) + (result.usage.outputTokens || 0); logger2.debug( `[Document Processor] OpenRouter ${modelName}: ${totalTokens} tokens (${result.usage.inputTokens || 0}\u2192${result.usage.outputTokens || 0})` ); return result; } function logCacheMetrics(result) { if (result.usage && result.usage.cacheTokens) { logger2.debug( `[Document Processor] Cache metrics - tokens: ${result.usage.cacheTokens}, discount: ${result.usage.cacheDiscount}` ); } } // src/utils.ts import { Buffer as Buffer2 } from "buffer"; import * as mammoth from "mammoth"; import { logger as logger3 } from "@elizaos/core"; import { extractText } from "unpdf"; import { createHash as createHash2 } from "crypto"; var PLAIN_TEXT_CONTENT_TYPES = [ "application/typescript", "text/typescript", "text/x-python", "application/x-python-code", "application/yaml", "text/yaml", "application/x-yaml", "application/json", "text/markdown", "text/csv" ]; var MAX_FALLBACK_SIZE_BYTES = 5 * 1024 * 1024; var BINARY_CHECK_BYTES = 1024; async function extractTextFromFileBuffer(fileBuffer, contentType, originalFilename) { const lowerContentType = contentType.toLowerCase(); logger3.debug( `[TextUtil] Attempting to extract text from ${originalFilename} (type: ${contentType})` ); if (lowerContentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") { logger3.debug(`[TextUtil] Extracting text from DOCX ${originalFilename} via mammoth.`); try { const result = await mammoth.extractRawText({ buffer: fileBuffer }); logger3.debug( `[TextUtil] DOCX text extraction complete for ${originalFilename}. Text length: ${result.value.length}` ); return result.value; } catch (docxError) { const errorMsg = `[TextUtil] Failed to parse DOCX file ${originalFilename}: ${docxError.message}`; logger3.error(errorMsg, docxError.stack); throw new Error(errorMsg); } } else if (lowerContentType === "application/msword" || originalFilename.toLowerCase().endsWith(".doc")) { logger3.debug(`[TextUtil] Handling Microsoft Word .doc file: ${originalFilename}`); return `[Microsoft Word Document: ${originalFilename}] This document was indexed for search but cannot be displayed directly in the browser. The original document content is preserved for retrieval purposes.`; } else if (lowerContentType.startsWith("text/") || PLAIN_TEXT_CONTENT_TYPES.includes(lowerContentType)) { logger3.debug( `[TextUtil] Extracting text from plain text compatible file ${originalFilename} (type: ${contentType})` ); return fileBuffer.toString("utf-8"); } else { logger3.warn( `[TextUtil] Unsupported content type: "${contentType}" for ${originalFilename}. Attempting fallback to plain text.` ); if (fileBuffer.length > MAX_FALLBACK_SIZE_BYTES) { const sizeErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) exceeds maximum size for fallback (${MAX_FALLBACK_SIZE_BYTES} bytes). Cannot process as plain text.`; logger3.error(sizeErrorMsg); throw new Error(sizeErrorMsg); } const initialBytes = fileBuffer.subarray(0, Math.min(fileBuffer.length, BINARY_CHECK_BYTES)); if (initialBytes.includes(0)) { const binaryHeuristicMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) appears to be binary based on initial byte check. Cannot process as plain text.`; logger3.error(binaryHeuristicMsg); throw new Error(binaryHeuristicMsg); } try { const textContent = fileBuffer.toString("utf-8"); if (textContent.includes("\uFFFD")) { const binaryErrorMsg = `[TextUtil] File ${originalFilename} (type: ${contentType}) seems to be binary or has encoding issues after fallback to plain text (detected \uFFFD).`; logger3.error(binaryErrorMsg); throw new Error(binaryErrorMsg); } logger3.debug( `[TextUtil] Successfully processed unknown type ${contentType} as plain text after fallback for ${originalFilename}.` ); return textContent; } catch (fallbackError) { const finalErrorMsg = `[TextUtil] Unsupported content type: ${contentType} for ${originalFilename}. Fallback to plain text also failed or indicated binary content.`; logger3.error(finalErrorMsg, fallbackError.message ? fallbackError.stack : void 0); throw new Error(finalErrorMsg); } } } async function convertPdfToTextFromBuffer(pdfBuffer, filename) {