UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

1,227 lines 72.9 kB
/** * Message Builder Utility * Centralized logic for building message arrays from TextGenerationOptions * Enhanced with multimodal support for images */ import { existsSync, readFileSync, statSync } from "fs"; import { getGlobalDispatcher, interceptors, request } from "undici"; import { MultimodalLogger, ProviderImageAdapter, } from "../adapters/providerImageAdapter.js"; import { CONVERSATION_INSTRUCTIONS, STRUCTURED_OUTPUT_INSTRUCTIONS, } from "../config/conversationMemory.js"; import { getAvailableInputTokens } from "../constants/contextWindows.js"; import { enforceAggregateFileBudget, FILE_READ_BUDGET_PERCENT, } from "../context/fileTokenBudget.js"; import { SIZE_TIER_THRESHOLDS } from "../types/index.js"; import { tracers, ATTR, withSpan } from "../telemetry/index.js"; import { FileDetector } from "./fileDetector.js"; import { getImageCache } from "./imageCache.js"; import { logger } from "./logger.js"; import { PDFImageConverter, PDFProcessor } from "./pdfProcessor.js"; import { urlDownloadRateLimiter } from "./rateLimiter.js"; import { estimateTokens } from "./tokenEstimation.js"; // --------------------------------------------------------------------------- // SDK-7: Lightweight file-type inference helpers for budget estimation // These avoid calling the full FileDetector pipeline — they only need to // classify files into broad categories (video, audio, image, etc.) so // estimatePostProcessingTokens() can use type-aware estimates. // --------------------------------------------------------------------------- /** Extension → file type mapping for budget estimation */ const EXTENSION_TYPE_MAP = { // Video mp4: "video", mkv: "video", mov: "video", avi: "video", webm: "video", wmv: "video", flv: "video", m4v: "video", // Audio mp3: "audio", wav: "audio", ogg: "audio", flac: "audio", m4a: "audio", aac: "audio", wma: "audio", opus: "audio", // Image jpg: "image", jpeg: "image", png: "image", gif: "image", webp: "image", bmp: "image", tiff: "image", tif: "image", avif: "image", // Archive zip: "archive", tar: "archive", gz: "archive", tgz: "archive", rar: "archive", "7z": "archive", jar: "archive", // Documents xlsx: "xlsx", xls: "xlsx", ods: "xlsx", docx: "docx", doc: "docx", odt: "docx", rtf: "docx", pptx: "pptx", ppt: "pptx", odp: "pptx", // PDF pdf: "pdf", // SVG svg: "svg", // CSV csv: "csv", tsv: "csv", }; /** * Infer file type from extension in a file path or URL. * Returns undefined if no extension or unrecognized. */ function inferFileTypeFromExtension(filePath) { // Strip query string / fragment for URLs const cleaned = filePath.split("?")[0].split("#")[0]; const lastDot = cleaned.lastIndexOf("."); if (lastDot === -1) { return undefined; } const ext = cleaned.slice(lastDot + 1).toLowerCase(); return EXTENSION_TYPE_MAP[ext]; } /** * Infer file type from the first few magic bytes of a Buffer. * Only checks the most common binary types — text types default to undefined. */ function inferFileTypeFromBuffer(buf) { if (buf.length < 4) { return undefined; } // PNG if (buf[0] === 0x89 && buf[1] === 0x50 && buf[2] === 0x4e && buf[3] === 0x47) { return "image"; } // JPEG if (buf[0] === 0xff && buf[1] === 0xd8 && buf[2] === 0xff) { return "image"; } // GIF if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) { return "image"; } // WebP (RIFF + WEBP) if (buf.length >= 12 && buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x57 && buf[9] === 0x45 && buf[10] === 0x42 && buf[11] === 0x50) { return "image"; } // PDF if (buf[0] === 0x25 && buf[1] === 0x50 && buf[2] === 0x44 && buf[3] === 0x46) { return "pdf"; } // MP4/MOV (ftyp at offset 4) if (buf.length >= 8 && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) { return "video"; } // MKV/WebM (EBML) if (buf[0] === 0x1a && buf[1] === 0x45 && buf[2] === 0xdf && buf[3] === 0xa3) { return "video"; } // AVI (RIFF + AVI) if (buf.length >= 12 && buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x41 && buf[9] === 0x56 && buf[10] === 0x49 && buf[11] === 0x20) { return "video"; } // WAV (RIFF + WAVE) if (buf.length >= 12 && buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x57 && buf[9] === 0x41 && buf[10] === 0x56 && buf[11] === 0x45) { return "audio"; } // MP3 (ID3 tag) if (buf[0] === 0x49 && buf[1] === 0x44 && buf[2] === 0x33) { return "audio"; } // FLAC if (buf[0] === 0x66 && buf[1] === 0x4c && buf[2] === 0x61 && buf[3] === 0x43) { return "audio"; } // OGG if (buf[0] === 0x4f && buf[1] === 0x67 && buf[2] === 0x67 && buf[3] === 0x53) { return "audio"; } // ZIP (also .xlsx, .docx, .pptx — but without extension we default to archive) if (buf[0] === 0x50 && buf[1] === 0x4b && buf[2] === 0x03 && buf[3] === 0x04) { return "archive"; } // GZIP if (buf[0] === 0x1f && buf[1] === 0x8b) { return "archive"; } // RAR if (buf[0] === 0x52 && buf[1] === 0x61 && buf[2] === 0x72 && buf[3] === 0x21) { return "archive"; } return undefined; } /** * Type guard to check if an image input has alt text */ function isImageWithAltText(image) { return (typeof image === "object" && !Buffer.isBuffer(image) && "data" in image); } /** * Extract image data from an image input (handles both simple and alt text formats) */ function extractImageData(image) { if (isImageWithAltText(image)) { return image.data; } return image; } /** * Extract alt text from an image input if available */ function extractAltText(image) { if (isImageWithAltText(image)) { return image.altText; } return undefined; } /** * Type guard for validating message roles */ function isValidRole(role) { return (typeof role === "string" && (role === "user" || role === "assistant" || role === "system")); } /** * Type guard for validating content items */ function isValidContentItem(item) { if (!item || typeof item !== "object") { return false; } const contentItem = item; if (contentItem.type === "text") { return typeof contentItem.text === "string"; } if (contentItem.type === "image") { return (typeof contentItem.image === "string" && (contentItem.mimeType === undefined || typeof contentItem.mimeType === "string")); } if (contentItem.type === "file") { return (Buffer.isBuffer(contentItem.data) && typeof contentItem.mimeType === "string"); } return false; } /** * Safely convert content item to AI SDK content format */ function convertContentItem(item) { if (!isValidContentItem(item)) { return null; } const contentItem = item; if (contentItem.type === "text" && typeof contentItem.text === "string") { return { type: "text", text: contentItem.text }; } if (contentItem.type === "image" && typeof contentItem.image === "string") { return { type: "image", image: contentItem.image, ...(contentItem.mimeType && { mediaType: contentItem.mimeType }), }; } if (contentItem.type === "file" && Buffer.isBuffer(contentItem.data) && contentItem.mimeType) { return { type: "file", data: contentItem.data, mediaType: contentItem.mimeType, }; } return null; } /** * Type-safe conversion from MultimodalChatMessage[] to ModelMessage[] * Filters out invalid content and ensures strict ModelMessage contract compliance */ export function convertToModelMessages(messages) { return messages .map((msg) => { // Validate role if (!isValidRole(msg.role)) { logger.warn("Invalid message role found, skipping", { role: msg.role }); return null; } // Handle string content if (typeof msg.content === "string") { // Create properly typed discriminated union messages if (msg.role === "system") { return { role: "system", content: msg.content, }; } else if (msg.role === "user") { return { role: "user", content: msg.content, }; } else if (msg.role === "assistant") { return { role: "assistant", content: msg.content, }; } } // Handle array content (multimodal) - only user messages support full multimodal content if (Array.isArray(msg.content)) { const validContent = msg.content .map(convertContentItem) .filter((item) => item !== null); // If no valid content items, skip the message if (validContent.length === 0) { logger.warn("No valid content items found in multimodal message, skipping"); return null; } if (msg.role === "user") { // User messages support both text and image content return { role: "user", content: validContent, }; } else if (msg.role === "assistant") { // Assistant messages only support text content, filter out images const textOnlyContent = validContent.filter((item) => item.type === "text"); if (textOnlyContent.length === 0) { // No text content (e.g., only images/files) — skip message // to avoid sending empty content to providers like Claude return null; } else if (textOnlyContent.length === 1) { // Single text item, use string content return { role: "assistant", content: textOnlyContent[0].text, }; } else { // Multiple text items, concatenate them const combinedText = textOnlyContent .map((item) => item.text) .join(" "); return { role: "assistant", content: combinedText, }; } } else { // System messages cannot have multimodal content, convert to text const textContent = validContent.find((item) => item.type === "text")?.text || ""; return { role: "system", content: textContent, }; } } // Invalid content type logger.warn("Invalid message content type found, skipping", { contentType: typeof msg.content, }); return null; }) .filter((msg) => msg !== null); } /** * Convert ChatMessage to ModelMessage for AI SDK compatibility */ function toModelMessage(message) { // Only include messages with roles supported by AI SDK if (message.role === "user" || message.role === "assistant" || message.role === "system") { if (message.content.trim() === "") { return null; } return { role: message.role, content: message.content, }; } return null; // Filter out tool_call and tool_result messages } /** * Format CSV metadata for LLM consumption */ function formatCSVMetadata(metadata) { const parts = []; if (metadata.rowCount !== undefined) { parts.push(`${metadata.rowCount} data rows`); } if (metadata.columnCount !== undefined) { parts.push(`${metadata.columnCount} columns`); } if (metadata.columnNames && metadata.columnNames.length > 0) { const columns = metadata.columnNames.join(", "); parts.push(`Columns: [${columns}]`); } if (metadata.hasEmptyColumns) { parts.push(`⚠️ Contains empty column names`); } return parts.length > 0 ? `**Metadata**: ${parts.join(" | ")}` : ""; } /** * Check if structured output mode should be enabled * Structured output is used when a schema is provided with json/structured format */ function shouldUseStructuredOutput(options) { return (!!options.schema && (options.output?.format === "json" || options.output?.format === "structured")); } /** * Log structural metadata about a composed message array without logging content. * Only logs a compact summary (role counts, total chars, estimated tokens). * Per-message breakdown is intentionally omitted to avoid log noise * (~600 lines per retry cascade with many messages). */ function logMessageComposition(messages, requestId) { if (!logger.shouldLog("debug")) { return; } const roles = {}; let totalChars = 0; for (const msg of messages) { const chars = typeof msg.content === "string" ? msg.content.length : 0; roles[msg.role] = (roles[msg.role] || 0) + 1; totalChars += chars; } logger.debug("[MessageBuilder] Composed", { requestId, totalMessages: messages.length, roles, totalChars, estimatedTokens: Math.ceil(totalChars / 4), }); } /** * Build a properly formatted message array for AI providers * Combines system prompt, conversation history, and current user prompt * Supports both TextGenerationOptions and StreamOptions * Enhanced with CSV file processing support */ export async function buildMessagesArray(options) { const messages = []; // Check if conversation history exists const hasConversationHistory = options.conversationMessages && options.conversationMessages.length > 0; // Build enhanced system prompt let systemPrompt = options.systemPrompt?.trim() || ""; // Add conversation-aware instructions when history exists if (hasConversationHistory) { systemPrompt = `${systemPrompt.trim()}${CONVERSATION_INSTRUCTIONS}`; } // Add structured output instructions when schema is provided with json/structured format if (shouldUseStructuredOutput(options)) { systemPrompt = `${systemPrompt.trim()}${STRUCTURED_OUTPUT_INSTRUCTIONS}`; } // Add system message if we have one if (systemPrompt.trim()) { messages.push({ role: "system", content: systemPrompt.trim(), providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } }, }, }); } // Add conversation history if available // Convert ChatMessages to ModelMessages and filter out tool messages if (hasConversationHistory && options.conversationMessages) { for (const chatMessage of options.conversationMessages) { const coreMessage = toModelMessage(chatMessage); if (coreMessage) { messages.push(coreMessage); } } } // Add current user prompt (required) // Handle both TextGenerationOptions (prompt field) and StreamOptions (input.text field) let currentPrompt; if ("prompt" in options && options.prompt) { currentPrompt = options.prompt; } else if ("input" in options && options.input?.text) { currentPrompt = options.input.text; } // Process CSV files if present and inject into prompt using proper CSV parser if ("input" in options && options.input) { const input = options.input; let csvContent = ""; const csvOptions = "csvOptions" in options ? options.csvOptions : undefined; // Process explicit csvFiles array if (input.csvFiles && input.csvFiles.length > 0) { for (let i = 0; i < input.csvFiles.length; i++) { const csvFile = input.csvFiles[i]; const filename = extractFilename(csvFile, i); const filePath = typeof csvFile === "string" ? csvFile : filename; try { const result = await FileDetector.detectAndProcess(csvFile, { allowedTypes: ["csv"], csvOptions: csvOptions, }); let csvSection = `\n\n## CSV Data from "${filename}":\n`; // Add metadata from csv-parser library if (result.metadata) { const metadataText = formatCSVMetadata(result.metadata); if (metadataText) { csvSection += metadataText + `\n\n`; } } // Put the actual CSV content BEFORE the tool instructions — // buildCSVToolInstructions references "the CSV data shown above" // and the trailing position keeps that reference accurate. // Vertex Gemini misreads CSV-only prompts as "no files attached" // when the NOTE-then-data order makes the reference dangle. csvSection += result.content; csvSection += buildCSVToolInstructions(filePath); csvContent += csvSection; logger.info(`[CSV] ✅ Processed: ${filename}`, result.metadata); } catch (error) { logger.error(`[CSV] ❌ Failed to process ${filename}:`, error); csvContent += `\n\n## CSV Data Error: Failed to process "${filename}"\nReason: ${error instanceof Error ? error.message : "Unknown error"}`; } } } // Process unified files array (auto-detect CSV) if (input.files && input.files.length > 0) { for (const file of input.files) { const filename = extractFilename(file); try { const result = await FileDetector.detectAndProcess(file, { maxSize: 50 * 1024 * 1024, allowedTypes: ["csv"], csvOptions: csvOptions, mimetypeHint: isFileWithMetadata(file) ? file.mimetype : undefined, }); if (result.type === "csv") { let csvSection = `\n\n## CSV Data from "${filename}":\n`; // Add metadata from csv-parser library if (result.metadata) { const metadataText = formatCSVMetadata(result.metadata); if (metadataText) { csvSection += metadataText + `\n\n`; } } csvSection += result.content; csvContent += csvSection; logger.info(`[FileDetector] ✅ CSV: ${filename}`, result.metadata); } } catch (error) { // Silently skip non-CSV files in auto-detect mode logger.debug(`[FileDetector] Skipped ${filename}: ${error instanceof Error ? error.message : String(error)}`); } } } // Prepend CSV content to current prompt if (csvContent) { currentPrompt = csvContent + (currentPrompt || ""); } } if (currentPrompt?.trim()) { messages.push({ role: "user", content: currentPrompt.trim(), }); } const reqId = options.context ?.requestId; logMessageComposition(messages, reqId); return messages; } /** * Enforce aggregate file budget, excluding files that would exceed the context window. * Mutates options.input.files and options.input.text as needed. */ function enforceFileBudget(options, provider, model) { options.input ??= {}; if (!options.input.files || options.input.files.length === 0) { return; } const availableTokens = getAvailableInputTokens(provider, model); const budgetFiles = options.input.files.map((file, idx) => { let sizeBytes; let fileType; if (Buffer.isBuffer(file)) { sizeBytes = file.length; fileType = inferFileTypeFromBuffer(file); } else if (typeof file === "string") { if (existsSync(file)) { try { sizeBytes = statSync(file).size; } catch { sizeBytes = 0; } } else { sizeBytes = file.length; } fileType = inferFileTypeFromExtension(file); } else { sizeBytes = 0; } return { name: typeof file === "string" ? file : `file-${idx}`, sizeBytes, fileType, originalIndex: idx, }; }); const budgetResult = enforceAggregateFileBudget(budgetFiles.map((f) => ({ name: f.name, sizeBytes: f.sizeBytes, fileType: f.fileType, })), availableTokens); if (budgetResult.excluded.length > 0) { const includedIndices = new Set(budgetResult.included.map((f) => { return budgetFiles.findIndex((bf) => bf.name === f.name); })); options.input.files = options.input.files.filter((_file, idx) => { return includedIndices.has(idx); }); options.input.text = (options.input.text || "") + "\n\n" + budgetResult.notices.join("\n"); logger.warn(`[FileDetector] Aggregate file budget enforcement: excluded ${budgetResult.excluded.length} file(s)`); } } /** * Append a detected file result to options.input based on its type. * Handles CSV, SVG, image, PDF, video, audio, archive, xlsx, docx, pptx, text, and unknown types. */ function appendDetectedFileResult(result, file, options) { options.input ??= {}; const filename = extractFilename(file); if (result.type === "csv") { const filePath = typeof file === "string" ? file : filename; let csvSection = `\n\n## CSV Data from "${filename}":\n`; if (result.metadata) { const metadataText = formatCSVMetadata(result.metadata); if (metadataText) { csvSection += metadataText + `\n\n`; } } // Put the actual CSV content BEFORE the tool instructions — // buildCSVToolInstructions references "the CSV data shown above" and // the trailing position keeps that reference accurate. csvSection += result.content; csvSection += buildCSVToolInstructions(filePath); options.input.text += csvSection; logger.info(`[FileDetector] ✅ CSV: ${filename}`); } else if (result.type === "svg") { const svgSection = `\n\n## SVG Content from "${filename}":\n\`\`\`xml\n${result.content}\n\`\`\`\n`; options.input.text += svgSection; logger.info(`[FileDetector] ✅ SVG (as text): ${filename}`); } else if (result.type === "image") { options.input.images = [...(options.input.images || []), result.content]; logger.info(`[FileDetector] ✅ Image: ${result.mimeType}`); } else if (result.type === "pdf") { options.input.pdfFiles = [ ...(options.input.pdfFiles || []), result.content, ]; logger.info(`[FileDetector] ✅ PDF: ${filename}`); } else if (result.type === "video") { if (result.content) { options.input.text += `\n\n## Video File: "${filename}"\n${result.content}\n`; } if (result.images && result.images.length > 0) { options.input.images = [ ...(options.input.images || []), ...result.images, ]; logger.info(`[FileDetector] Added ${result.images.length} video keyframes as images`); } logger.info(`[FileDetector] ✅ Video: ${filename}`); } else if (result.type === "audio") { if (result.content) { options.input.text += `\n\n## Audio File: "${filename}"\n${result.content}\n`; } if (result.images && result.images.length > 0) { options.input.images = [ ...(options.input.images || []), ...result.images, ]; logger.info(`[FileDetector] Added audio cover art as image`); } logger.info(`[FileDetector] ✅ Audio: ${filename}`); } else if (result.type === "archive") { if (result.content) { options.input.text += `\n\n## Archive File: "${filename}"\n${result.content}\n`; } logger.info(`[FileDetector] ✅ Archive: ${filename}`); } else if (result.type === "xlsx") { if (result.content) { options.input.text += `\n\n## Spreadsheet: "${filename}"\n${result.content}\n`; } logger.info(`[FileDetector] ✅ Spreadsheet: ${filename}`); } else if (result.type === "docx") { if (result.content) { options.input.text += `\n\n## Document: "${filename}"\n${result.content}\n`; } logger.info(`[FileDetector] ✅ Document: ${filename}`); } else if (result.type === "pptx") { if (result.content) { options.input.text += `\n\n## Presentation: "${filename}"\n${result.content}\n`; } logger.info(`[FileDetector] ✅ Presentation: ${filename}`); } else if (result.type === "text") { if (result.content) { const langHint = getLanguageHint(result.mimeType, filename); const MAX_TEXT_FILE_CHARS = 200_000; let fileContent = result.content; let truncated = false; if (fileContent.length > MAX_TEXT_FILE_CHARS) { const headChars = Math.floor(MAX_TEXT_FILE_CHARS * 0.75); const tailChars = Math.floor(MAX_TEXT_FILE_CHARS * 0.25); const omittedChars = fileContent.length - headChars - tailChars; fileContent = fileContent.slice(0, headChars) + `\n\n... [${omittedChars.toLocaleString()} characters omitted — file truncated to fit context window] ...\n\n` + fileContent.slice(-tailChars); truncated = true; } const textSection = langHint ? `\n\n## File: "${filename}"\n\`\`\`${langHint}\n${fileContent}\n\`\`\`\n` : `\n\n## File: "${filename}"\n${fileContent}\n`; options.input.text += textSection; if (truncated) { logger.warn(`[FileDetector] Large text file "${filename}" truncated from ${result.content.length.toLocaleString()} to ${MAX_TEXT_FILE_CHARS.toLocaleString()} chars`); } } logger.info(`[FileDetector] ✅ Text: ${filename}`); } else if (result.type === "unknown") { if (result.content) { options.input.text += `\n\n## Attached File: "${filename}"\n${result.content}\n`; } logger.info(`[FileDetector] ⚠️ Unknown format (metadata extracted): ${filename}`); } } /** * Process the unified files array with auto-detection. * Handles lazy file registration, full processing, and preview injection. * * Exported so providers that bypass BaseProvider.generate() (e.g. * GoogleVertex's native @google/genai path) can still preprocess * `input.files` — without this, mimetype-hint and text-file inputs * would silently never reach the model on those paths. */ export async function processUnifiedFilesArray(options, maxSize, provider) { options.input ??= {}; if (!options.input.files || options.input.files.length === 0) { return; } const totalFiles = options.input.files.length; const files = options.input.files; return withSpan({ name: "neurolink.file.process_all", tracer: tracers.file, attributes: { [ATTR.FILE_TOTAL_COUNT]: totalFiles, [ATTR.NL_PROVIDER]: provider, }, }, async (span) => { logger.info(`[NEUROLINK] Processing ${totalFiles} file(s) with auto-detection`); // `options.input` was guaranteed non-null by the `??= {}` guard at the // top of processUnifiedFilesArray; re-assert here so TypeScript is happy // inside this withSpan closure (it doesn't track mutations across closures). options.input ??= {}; const inp2 = options.input; inp2.text = inp2.text || ""; let includedCount = 0; const fileRegistry = options.fileRegistry; for (let fileIdx = 0; fileIdx < files.length; fileIdx++) { const file = files[fileIdx]; const filename = extractFilename(file, fileIdx); try { // ─── Lazy file registration path ────────────────────────────── const fileSize = fileRegistry ? getFileSize(file) : 0; if (fileRegistry && fileSize > SIZE_TIER_THRESHOLDS.TINY_MAX) { const registered = await tryRegisterFileReference(file, fileSize, fileRegistry, fileIdx); if (registered) { logger.info(`[NEUROLINK] File lazily registered: ${filename} (${fileSize} bytes) — deferred processing`); includedCount++; continue; } } // ─── Full processing path (current behavior) ────────────────── const genericFileMaxSize = Math.max(maxSize, 100 * 1024 * 1024); const rawFileInput = isFileWithMetadata(file) ? file.buffer : file; // Forward the caller's mimetype hint (Slack/Curator-style // extension-less buffers) so the eager path classifies correctly // for tiny files — the lazy registry path has its own hint wiring. const fileMimetypeHint = isFileWithMetadata(file) ? file.mimetype : undefined; const result = await FileDetector.detectAndProcess(rawFileInput, { maxSize: genericFileMaxSize, allowedTypes: [ "csv", "image", "pdf", "svg", "video", "audio", "archive", "xlsx", "docx", "pptx", "text", "unknown", ], csvOptions: options.csvOptions, provider: provider, mimetypeHint: fileMimetypeHint, }); appendDetectedFileResult(result, file, options); includedCount++; // Log what content type was added to the message const contentType = result.type === "image" ? "image" : "text"; logger.info(`[NEUROLINK] File added to message: ${filename} as ${contentType} (type: ${result.type})`); } catch (error) { const errMsg = error instanceof Error ? error.message : String(error); logger.error(`[NEUROLINK] File skipped/failed: ${filename} — reason: ${errMsg}`); } } span.setAttribute(ATTR.FILE_INCLUDED_COUNT, includedCount); // After processing all files, inject previews for any lazily-registered files if (fileRegistry && fileRegistry.size > 0) { const previewText = await fileRegistry.generatePromptPreview(); if (previewText) { inp2.text = (inp2.text || "") + previewText; logger.info(`[FileDetector] Injected previews for ${fileRegistry.size} lazily-registered file(s)`); } const registeredFiles = fileRegistry.list(); for (const ref of registeredFiles) { if (ref.extractedImages && ref.extractedImages.length > 0) { inp2.images = [...(inp2.images || []), ...ref.extractedImages]; logger.info(`[FileDetector] Injected ${ref.extractedImages.length} extracted images from "${ref.filename}"`); } } } logger.info(`[NEUROLINK] File processing complete: ${includedCount}/${totalFiles} files included in message`); // Augment options.systemPrompt with file-handling guidance so providers // that bypass the message-builder's system message and read // `options.systemPrompt` directly (e.g. GoogleVertex's native @google/genai // path uses `config.systemInstruction = options.systemPrompt`) still see // the "treat inlined CSV/PDF as the actual file" guidance. Without this, // Vertex Gemini 2.5 reliably responds with "no files attached" even // though the CSV content is fully embedded in the user prompt. if (includedCount > 0) { const filePromptAugmentation = `\n\nIMPORTANT FILE HANDLING INSTRUCTIONS: - The full content of the user's local file(s) is INLINED in this message under "## CSV Data from ..." / "## PDF Data from ..." / "## File: ..." headings — it is the actual file the user is asking about. - TREAT THE INLINED CONTENT AS IF IT WERE AN ATTACHMENT. Do NOT respond with "no files attached" or ask the user to re-upload — the data is already here. - DO NOT use GitHub tools (get_file_contents, search_code, etc.) for local files - they only work for remote repository files. - Analyze the inlined file content directly without attempting to fetch or read files using tools.`; const existingSystem = (options.systemPrompt || "").trim(); options.systemPrompt = existingSystem ? `${existingSystem}${filePromptAugmentation}` : filePromptAugmentation.trim(); } }); } /** * Process explicit CSV files array and append to options.input.text. */ async function processExplicitCsvFiles(options) { options.input ??= {}; if (!options.input.csvFiles || options.input.csvFiles.length === 0) { return; } logger.info(`[CSV] Processing ${options.input.csvFiles.length} explicit CSV file(s)`); options.input.text = options.input.text || ""; for (let i = 0; i < options.input.csvFiles.length; i++) { const csvFile = options.input.csvFiles[i]; try { const result = await FileDetector.detectAndProcess(csvFile, { allowedTypes: ["csv"], csvOptions: options.csvOptions, }); const filename = extractFilename(csvFile, i); const filePath = typeof csvFile === "string" ? csvFile : filename; let csvSection = `\n\n## CSV Data from "${filename}":\n`; if (result.metadata) { const metadataText = formatCSVMetadata(result.metadata); if (metadataText) { csvSection += metadataText + `\n\n`; } } // Put the actual CSV content BEFORE the tool instructions — // buildCSVToolInstructions references "the CSV data shown above" // and the trailing position keeps that reference accurate. csvSection += result.content; csvSection += buildCSVToolInstructions(filePath); options.input.text += csvSection; logger.info(`[CSV] ✅ Processed: ${filename}`); } catch (error) { logger.error(`[CSV] ❌ Failed:`, error); const filename = extractFilename(csvFile, i); options.input.text += `\n\n## CSV Data Error: Failed to process "${filename}"`; options.input.text += `\nReason: ${error instanceof Error ? error.message : "Unknown error"}`; } } } /** * Enforce post-processing budget on accumulated text content and log token usage. */ function enforcePostProcessingBudget(options, provider, model) { options.input ??= {}; if (!options.input.text) { return; } const availableTokens = getAvailableInputTokens(provider, model); const textTokenBudget = Math.floor(availableTokens * FILE_READ_BUDGET_PERCENT); const actualTextTokens = estimateTokens(options.input.text, provider); if (actualTextTokens > textTokenBudget && textTokenBudget > 0) { const maxChars = textTokenBudget * 4; if (options.input.text.length > maxChars) { const headChars = Math.floor(maxChars * 0.75); const tailChars = Math.floor(maxChars * 0.25); const head = options.input.text.slice(0, headChars); const tail = options.input.text.slice(-tailChars); const truncatedTokens = actualTextTokens - textTokenBudget; options.input.text = head + `\n\n[... ${truncatedTokens.toLocaleString()} tokens of file content truncated to fit context window ...]\n\n` + tail; logger.warn(`[FileDetector] Post-processing budget enforcement: truncated ~${truncatedTokens.toLocaleString()} tokens of file content to fit ${textTokenBudget.toLocaleString()} token budget`); } } // Token usage breakdown logging const textTokens = estimateTokens(options.input.text, provider); const imageCount = (options.input.images?.length ?? 0) + (options.input.content?.filter((c) => c.type === "image").length ?? 0); const imageTokens = imageCount * 1500; const totalContentTokens = textTokens + imageTokens; const contextWindow = getAvailableInputTokens(provider, model); logger.info(`[TokenUsage] Content breakdown: text=${textTokens.toLocaleString()} tokens, ` + `images=${imageCount} (~${imageTokens.toLocaleString()} tokens), ` + `total=${totalContentTokens.toLocaleString()} tokens, ` + `budget=${contextWindow.toLocaleString()} tokens, ` + `utilization=${contextWindow > 0 ? ((totalContentTokens / contextWindow) * 100).toFixed(1) : "N/A"}%`); } /** * Process explicit PDF files and return structured PDF entries for multimodal processing. */ async function processExplicitPdfFiles(options, maxSize, provider) { options.input ??= {}; const pdfFiles = []; if (!options.input.pdfFiles || options.input.pdfFiles.length === 0) { return pdfFiles; } logger.info(`[PDF] Processing ${options.input.pdfFiles.length} explicit PDF file(s) for ${provider}`); for (let i = 0; i < options.input.pdfFiles.length; i++) { const pdfFile = options.input.pdfFiles[i]; const filename = extractFilename(pdfFile, i); try { const result = await FileDetector.detectAndProcess(pdfFile, { maxSize, allowedTypes: ["pdf"], provider: provider, }); if (Buffer.isBuffer(result.content)) { pdfFiles.push({ buffer: result.content, filename, pageCount: result.metadata?.estimatedPages ?? null, }); logger.info(`[PDF] ✅ Queued for multimodal: ${filename} (${result.metadata?.estimatedPages ?? "unknown"} pages)`); } } catch (error) { logger.error(`[PDF] ❌ Failed to process ${filename}:`, error); throw error; } } return pdfFiles; } /** * Build the enhanced system prompt for multimodal messages, including * conversation instructions, structured output instructions, and file handling guidance. */ function buildMultimodalSystemPrompt(options, hasPDFFiles) { options.input ??= {}; let systemPrompt = options.systemPrompt?.trim() || ""; const hasConversationHistory = options.conversationHistory && options.conversationHistory.length > 0; if (hasConversationHistory) { systemPrompt = `${systemPrompt.trim()}${CONVERSATION_INSTRUCTIONS}`; } if (shouldUseStructuredOutput(options)) { systemPrompt = `${systemPrompt.trim()}${STRUCTURED_OUTPUT_INSTRUCTIONS}`; } const inp = options.input; const hasCSVFiles = (inp.csvFiles && inp.csvFiles.length > 0) || (inp.files && inp.files.some((f) => typeof f === "string" ? f.toLowerCase().endsWith(".csv") : false)); if (hasCSVFiles || hasPDFFiles) { const fileTypes = []; if (hasPDFFiles) { fileTypes.push("PDFs"); } if (hasCSVFiles) { fileTypes.push("CSVs"); } systemPrompt += `\n\nIMPORTANT FILE HANDLING INSTRUCTIONS: - The full content of the user's local ${fileTypes.join(", ")} (and any images) is INLINED in this message under the "## CSV Data from ..." / "## PDF Data from ..." headings — it is the actual file the user is asking about. - TREAT THE INLINED CONTENT AS IF IT WERE AN ATTACHMENT. Do NOT respond with "no files attached" or ask the user to re-upload — the data is already here. - DO NOT use GitHub tools (get_file_contents, search_code, etc.) for local files - they only work for remote repository files - Analyze the provided file content directly without attempting to fetch or read files using tools - GitHub MCP tools are ONLY for remote repository operations, not local filesystem access - Use the file content shown in this message for your analysis`; } return systemPrompt; } /** * Build multimodal message array with image support * Detects when images are present and routes through provider adapter */ export async function buildMultimodalMessagesArray(options, provider, model) { // Media-only callers (avatar / music / video) may omit `input` entirely. // Normalise to an empty object so all sub-functions can access input.* // without defensive null checks on every field access. if (!options.input) { options.input = {}; } // After normalisation `input` is guaranteed non-undefined. Capture it in a // local const so TypeScript sees the definite (non-optional) type in the // rest of this function, avoiding 60+ "possibly undefined" errors. const inp = options.input; // Compute provider-specific max PDF size once for consistent validation const pdfConfig = PDFProcessor.getProviderConfig(provider); const maxSize = pdfConfig ? pdfConfig.maxSizeMB * 1024 * 1024 : 10 * 1024 * 1024; // Aggregate file budget enforcement enforceFileBudget(options, provider, model); // Process unified files array (auto-detect) await processUnifiedFilesArray(options, maxSize, provider); // Process explicit CSV files array await processExplicitCsvFiles(options); // Post-processing budget enforcement and token usage logging enforcePostProcessingBudget(options, provider, model); // Process explicit PDF files const pdfFiles = await processExplicitPdfFiles(options, maxSize, provider); // Check if this is a multimodal request const hasImages = (inp.images && inp.images.length > 0) || (inp.content && inp.content.some((c) => c.type === "image")); const hasPDFs = pdfFiles.length > 0; // If no images or PDFs, use standard message building and convert to MultimodalChatMessage[] if (!hasImages && !hasPDFs) { if (inp.csvFiles) { inp.csvFiles = []; } if (inp.pdfFiles) { inp.pdfFiles = []; } if (inp.files) { inp.files = []; } const standardMessages = await buildMessagesArray(options); return standardMessages.map((msg) => { const msgProviderOptions = msg .providerOptions; return { role: msg.role, content: msg.content, ...(msgProviderOptions && { providerOptions: msgProviderOptions }), }; }); } // Validate provider supports vision if (!ProviderImageAdapter.supportsVision(provider, model)) { throw new Error(`Provider ${provider} with model ${model} does not support vision processing. ` + `Supported providers: ${ProviderImageAdapter.getVisionProviders().join(", ")}`); } const messages = []; // Build enhanced system prompt const systemPrompt = buildMultimodalSystemPrompt(options, pdfFiles.length > 0); if (systemPrompt.trim()) { messages.push({ role: "system", content: systemPrompt.trim(), providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } }, }, }); } // Add conversation history if available const hasConversationHistory = options.conversationHistory && options.conversationHistory.length > 0; if (hasConversationHistory && options.conversationHistory) { for (const msg of options.conversationHistory) { // Filter out tool_call and tool_result roles — only user/assistant/system are valid for AI providers if (msg.role === "user" || msg.role === "assistant" || msg.role === "system") { const providerOptions = msg.providerOptions; // Sanitize assistant array content: strip tool_use/tool_result blocks // that providers cannot handle. If an assistant message ends up empty // after stripping, skip it to avoid sending content: "" to Claude. // Only assistant messages need this — user messages may contain valid // image/file blocks that must pass through unchanged. let sanitizedContent = msg.content; if (msg.role === "assistant" && Array.isArray(msg.content)) { const textParts = msg.content.filter((item) => !!item && typeof item === "object" && item.type === "text" && typeof item.text === "string"); if (textParts.length === 0) { // All content was tool_use/tool_result/non-text — skip message continue; } // Check if any retained text part carries providerOptions // (e.g. Anthropic cache_control). If so, preserve them as // array content to avoid losing per-block metadata. const hasItemProviderOptions = textParts.some((item) => !!item.providerOptions); if (hasItemProviderOptions) { sanitizedContent = textParts; } else { sanitizedContent = textParts.length === 1 ? textParts[0].text : textParts .map((p) => p.text) .join(" "); } } // Skip empty string content to avoid Claude API rejection if (sanitizedContent === "") { continue; } messages.push({ role: msg.role, content: sanitizedContent, ...(providerOptions && { providerOptions }), }); } } } // Handle multimodal content try { let userContent; if (inp.content && inp.content.length > 0) { userContent = await convertContentToProviderFormat(inp.content, provider, model); } else if ((inp.images && inp.images.length > 0) || pdfFiles.length > 0) { userContent = await convertMultimodalToProviderFormat(inp.text ?? "", inp.images || [], pdfFiles, provider, model); } else { userContent = inp.text; } if (typeof userContent === "string") { messages.push({ role: "user", content: userContent, }); } else { messages.push({ role: "user", content: userContent, }); } const reqId = options.context ?.requestId; logMessageComposition(messages, reqId); return messages; } catch (error) { MultimodalLogger.logError("MULTIMODAL_BUILD", error, { provider, model, hasImages, imageCount: inp.images?.length || 0, }); throw error; } } /** * Convert advanced content format to provider-specific format */ async function convertContentToProviderFormat(content, provider, _model) { const textContent = content.find((c) => c.type === "text"); const imageContent = content.filter((c) => c.type === "image"); const pdfContent = content.filter((c) => c.type