UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

994 lines 79.1 kB
import { context, SpanKind, SpanStatusCode, trace } from "@opentelemetry/api"; import { generateText } from "ai"; import { directAgentTools } from "../agent/directTools.js"; import { IMAGE_GENERATION_MODELS } from "../core/constants.js"; import { MiddlewareFactory } from "../middleware/factory.js"; import { ATTR, tracers } from "../telemetry/index.js"; import { isAbortError } from "../utils/errorHandling.js"; import { hasLifecycleErrorFired, markLifecycleErrorFired, } from "../utils/lifecycleCallbacks.js"; import { resolveLifecycleTimeoutMs } from "../utils/lifecycleTimeout.js"; import { logger } from "../utils/logger.js"; import { withTimeoutFn } from "../utils/async/withTimeout.js"; import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js"; import { shouldDisableBuiltinTools } from "../utils/toolUtils.js"; import { getKeyCount, getKeysAsString } from "../utils/transformationUtils.js"; import { TTSProcessor } from "../utils/ttsProcessor.js"; import { executeVideoAnalysis, hasVideoFrames, } from "../utils/videoAnalysisProcessor.js"; import { GenerationHandler } from "./modules/GenerationHandler.js"; // Import modules for composition import { MessageBuilder } from "./modules/MessageBuilder.js"; import { StreamHandler } from "./modules/StreamHandler.js"; import { TelemetryHandler } from "./modules/TelemetryHandler.js"; import { ToolsManager } from "./modules/ToolsManager.js"; import { Utilities } from "./modules/Utilities.js"; /** * Abstract base class for all AI providers * Tools are integrated as first-class citizens - always available by default */ export class BaseProvider { // Not `readonly` because providers that auto-discover the model from a // /v1/models endpoint (lm-studio, llamacpp) need to update modelName after // construction so handlers (TelemetryHandler, MessageBuilder) cache the // resolved name. All other providers treat this as effectively readonly. modelName; providerName; defaultTimeout = 30000; // 30 seconds middlewareOptions; // TODO: Implement global level middlewares that can be used // Tools are conditionally included based on centralized configuration directTools = shouldDisableBuiltinTools() ? {} : directAgentTools; mcpTools; // MCP tools loaded dynamically when available customTools; // Custom tools from registerTool() toolExecutor; // Tool executor from setupToolExecutor sessionId; userId; neurolink; // Reference to actual NeuroLink instance for MCP tools /** @internal Trace context propagated from NeuroLink SDK for span hierarchy */ _traceContext = null; setTraceContext(ctx) { this._traceContext = ctx; } // Composition modules - Single Responsibility Principle // Handlers below are not `readonly` so that providers which auto-discover // their model after construction (lm-studio, llamacpp) can rebuild them // via `refreshHandlersForModel(...)` and propagate the resolved name into // pricing / telemetry / span attributes. All other providers leave these // alone. messageBuilder; streamHandler; generationHandler; telemetryHandler; utilities; toolsManager; constructor(modelName, providerName, neurolink, middleware) { this.modelName = modelName || this.getDefaultModel(); this.providerName = providerName || this.getProviderName(); this.neurolink = neurolink; this.middlewareOptions = middleware; // Initialize composition modules this.messageBuilder = new MessageBuilder(this.providerName, this.modelName); this.streamHandler = new StreamHandler(this.providerName, this.modelName); this.telemetryHandler = new TelemetryHandler(this.providerName, this.modelName, this.neurolink); this.generationHandler = new GenerationHandler(this.providerName, this.modelName, () => this.supportsTools(), (options, type) => this.telemetryHandler.getTelemetryConfig(options, type), (toolCalls, toolResults, options, timestamp) => this.handleToolExecutionStorage(toolCalls, toolResults, options, timestamp), () => this.neurolink?.getEventEmitter()); this.utilities = new Utilities(this.providerName, this.modelName, this.defaultTimeout, this.middlewareOptions); this.toolsManager = new ToolsManager(this.providerName, this.directTools, this.neurolink, { isZodSchema: (schema) => this.isZodSchema(schema), convertToolResult: (result) => this.convertToolResult(result), createPermissiveZodSchema: () => this.createPermissiveZodSchema(), fixSchemaForOpenAIStrictMode: (schema) => this.fixSchemaForOpenAIStrictMode(schema), }); } /** * Update modelName and rebuild composition handlers with the new value. * * Auto-discovery providers (lm-studio, llamacpp) call this once they have * resolved the loaded model from `/v1/models`. Without this, handlers * (TelemetryHandler, MessageBuilder, ...) keep the pre-discovery name and * pricing / span / log metadata reports the stale value. */ refreshHandlersForModel(model) { this.modelName = model; trace .getSpan(context.active()) ?.setAttribute(ATTR.GEN_AI_MODEL, this.modelName); this.messageBuilder = new MessageBuilder(this.providerName, this.modelName); this.streamHandler = new StreamHandler(this.providerName, this.modelName); this.telemetryHandler = new TelemetryHandler(this.providerName, this.modelName, this.neurolink); this.generationHandler = new GenerationHandler(this.providerName, this.modelName, () => this.supportsTools(), (options, type) => this.telemetryHandler.getTelemetryConfig(options, type), (toolCalls, toolResults, options, timestamp) => this.handleToolExecutionStorage(toolCalls, toolResults, options, timestamp), () => this.neurolink?.getEventEmitter()); this.utilities = new Utilities(this.providerName, this.modelName, this.defaultTimeout, this.middlewareOptions); } /** * Check if this provider supports tool/function calling * Override in subclasses to disable tools for specific providers or models * @returns true by default, providers can override to return false */ supportsTools() { return true; } // =================== // PUBLIC API METHODS // =================== /** * Primary streaming method - implements AIProvider interface * When tools are involved, falls back to generate() with synthetic streaming */ async stream(optionsOrPrompt, analysisSchema) { let options = this.normalizeStreamOptions(optionsOrPrompt); logger.info(`Starting stream`, { provider: this.providerName, hasTools: !options.disableTools && this.supportsTools(), disableTools: !!options.disableTools, supportsTools: this.supportsTools(), inputLength: options.input?.text?.length || 0, maxTokens: options.maxTokens, temperature: options.temperature, timestamp: Date.now(), }); // ===== EARLY MULTIMODAL DETECTION ===== const hasFileInput = !!options.input?.files?.length || !!options.input?.videoFiles?.length; if (hasFileInput) { // ===== VIDEO ANALYSIS DETECTION ===== // Check if video frames are present and handle with fake streaming const messages = await this.buildMessagesForStream(options); if (hasVideoFrames(messages)) { logger.info(`Video frames detected in stream, using fake streaming for video analysis`, { provider: this.providerName, model: this.modelName, }); // Note: executeFakeStreaming() owns its own catch that fires the // consumer-supplied onError before re-throwing through // handleProviderError(), so we do not need to wrap again here — // doing so would route the error through handleProviderError() // twice (and risk a double-fire onError without the shared // lifecycle-fired WeakSet mark). const fakeResult = await this.executeFakeStreaming(options, analysisSchema); return this.wrapStreamWithLifecycleCallbacks(fakeResult, options); } } // CRITICAL: Image generation models don't support real streaming // Force fake streaming for image models to ensure image output is yielded. // Skip this path when the caller explicitly requests non-image output (e.g. // JSON analysis) so dual-mode models like gemini-3.1-flash-image-preview // can still perform text/structured generation. const isImageModel = IMAGE_GENERATION_MODELS.some((m) => this.modelName.includes(m)); const requestsNonImageOutput = options.output?.format === "json" || options.output?.format === "structured" || options.output?.format === "text"; if (isImageModel && !requestsNonImageOutput) { logger.info(`Image model detected, forcing fake streaming`, { provider: this.providerName, model: this.modelName, reason: "Image generation requires fake streaming to yield image output", }); // Skip real streaming, go directly to fake streaming. // executeFakeStreaming() owns its own catch + lifecycle fire, so // wrapping again here would double-route through handleProviderError(). const fakeResult = await this.executeFakeStreaming(options, analysisSchema); return this.wrapStreamWithLifecycleCallbacks(fakeResult, options); } // Central tool merge: Pre-merge base tools (MCP/built-in) with user-provided // tools (e.g. RAG tools) into options.tools. This way, every provider's // executeStream() can simply use options.tools (or getAllTools() + options.tools) // and get the complete tool set without needing per-provider merge logic. if (!options.disableTools && this.supportsTools()) { const mergedTools = await this.getToolsForStream(options); options = { ...options, tools: mergedTools }; } else { options = { ...options, tools: {} }; } // CRITICAL FIX: Always prefer real streaming over fake streaming // Try real streaming first, use fake streaming only as fallback try { logger.debug(`Attempting real streaming`, { provider: this.providerName, timestamp: Date.now(), }); const realStreamResult = await this.executeStream(options, analysisSchema); logger.info(`Real streaming succeeded`, { provider: this.providerName, timestamp: Date.now(), }); // Wire lifecycle callbacks (onChunk/onFinish/onError) on the user- // facing StreamResult.stream. The AI-SDK lifecycle middleware only // sees AI-SDK-internal chunks via streamText/wrapLanguageModel, so // providers with custom HTTP streaming (Ollama, llama.cpp's /api, // anything that doesn't go through streamText) bypass it. Wrapping // here makes the callbacks fire for every provider, regardless of // streaming implementation. return this.wrapStreamWithLifecycleCallbacks(realStreamResult, options); } catch (realStreamError) { // Don't retry on terminal/abort errors — only fall back for // "real streaming with tools is unsupported" style failures. const errMsg = realStreamError instanceof Error ? realStreamError.message : String(realStreamError); const errName = realStreamError instanceof Error ? realStreamError.name : ""; if (errName === "AbortError" || errMsg.includes("abort") || errMsg.includes("timeout") || errMsg.includes("401") || errMsg.includes("403") || errMsg.includes("quota") || errMsg.includes("rate limit") || errMsg.includes("authentication")) { await this.fireLifecycleErrorCallback(options, realStreamError); throw this.handleProviderError(realStreamError); } logger.warn(`Real streaming failed for ${this.providerName}, falling back to fake streaming:`, { error: errMsg, timestamp: Date.now(), }); // Fallback to fake streaming only if real streaming fails AND tools // are enabled. executeFakeStreaming() owns its own catch + lifecycle // fire, so a fake-streaming failure here surfaces through that path // without needing an outer wrap (which would double-route through // handleProviderError()). if (!options.disableTools && this.supportsTools()) { const fakeResult = await this.executeFakeStreaming(options, analysisSchema); return this.wrapStreamWithLifecycleCallbacks(fakeResult, options); } else { // If real streaming failed and no tools are enabled, fire onError // before re-throwing so consumer-supplied callbacks see the failure. await this.fireLifecycleErrorCallback(options, realStreamError); // If real streaming failed and no tools are enabled, re-throw the original error logger.error(`Real streaming failed for ${this.providerName}:`, realStreamError); throw this.handleProviderError(realStreamError); } } } /** * Wrap a StreamResult with consumer-facing lifecycle callbacks. * * `options.onChunk`, `options.onFinish`, `options.onError` are translated * by NeuroLink.applyStreamLifecycleMiddleware() into * `options.middleware.middlewareConfig.lifecycle.config`. The AI SDK's * lifecycle middleware only sees these via the wrapped LanguageModel — * which is bypassed by providers that stream via raw HTTP fetch (Ollama * over /api/chat, custom OpenAI-compatible servers, etc). Wrapping the * user-facing stream here ensures the callbacks fire regardless of the * underlying transport. */ wrapStreamWithLifecycleCallbacks(result, options) { const lifecycle = options ?.middleware?.middlewareConfig?.lifecycle?.config; if (!lifecycle?.onChunk && !lifecycle?.onFinish && !lifecycle?.onError) { return result; } const { onChunk, onFinish, onError } = lifecycle; const startTime = Date.now(); const originalStream = result.stream; // Lifecycle callbacks are awaited with a bounded deadline so callers // observe ordering guarantees (onChunk/onFinish/onError have all // settled by the time `for await` returns / throws). The previous // fire-and-forget pattern left async work running past stream close, // creating races during cleanup. The deadline is configurable via // `lifecycle.timeoutMs` (per-call) or `NEUROLINK_LIFECYCLE_TIMEOUT_MS` // (env / CLI surface) — see `resolveLifecycleTimeoutMs`. const timeoutMs = resolveLifecycleTimeoutMs(lifecycle); const safeFire = async (fn, label) => { try { await withTimeoutFn(async () => { const ret = fn(); if (ret && typeof ret.then === "function") { await ret; } }, timeoutMs, `[lifecycle] ${label} callback exceeded ${timeoutMs}ms`); } catch (e) { logger.warn(`[lifecycle] ${label} callback error:`, e); } }; const wrappedStream = (async function* () { let accumulated = ""; let seq = 0; try { for await (const chunk of originalStream) { const textPart = chunk && typeof chunk === "object" && "content" in chunk && typeof chunk.content === "string" ? chunk.content : ""; // Only fire onChunk for actual text deltas. Non-text chunks // (image, tts_audio) would otherwise produce empty text-delta // events that consumers must filter out themselves. if (onChunk && textPart) { const currentSeq = seq++; await safeFire(() => onChunk({ type: "text-delta", textDelta: textPart, sequenceNumber: currentSeq, }), "onChunk"); } if (textPart) { accumulated += textPart; } yield chunk; } if (onFinish) { await safeFire(() => onFinish({ text: accumulated, duration: Date.now() - startTime, }), "onFinish"); } } catch (error) { const err = error instanceof Error ? error : new Error(String(error)); if (onError && !hasLifecycleErrorFired(err)) { // Mark before firing so a higher layer that also routes through // fireLifecycleErrorCallback (or its own lifecycle wrapper) with // the same error instance won't double-fire onError. Mirrors the // pattern in fireLifecycleErrorCallback below. markLifecycleErrorFired(err); await safeFire(() => onError({ error: err, duration: Date.now() - startTime, recoverable: false, }), "onError"); } throw err; } })(); return { ...result, stream: wrappedStream }; } /** * Fire the consumer-supplied onError callback before throwing. Used in * error branches inside stream() that re-throw without emitting any * stream chunks (which would otherwise hide the failure from a caller * that supplied `onError`). */ async fireLifecycleErrorCallback(options, error) { const err = error instanceof Error ? error : new Error(String(error)); // The AI-SDK lifecycle middleware stamps errors it has already // surfaced (Symbol.for("neurolink.onErrorFired"); see // utils/lifecycleCallbacks.ts). Skip here so consumers don't receive // duplicate onError events for the same failure. if (hasLifecycleErrorFired(err)) { return; } const lifecycle = options ?.middleware?.middlewareConfig?.lifecycle?.config; const onError = lifecycle?.onError; if (!onError) { return; } // Set the marker before invoking so a sync re-entry (or a concurrent // dispatch path) can't double-fire onError for the same error object. markLifecycleErrorFired(err); // Fire the consumer's onError with a bounded deadline AND await its // completion — callers can now `await fireLifecycleErrorCallback(...)` // to guarantee the consumer's async onError settles before the // surrounding stream() / executeFakeStreaming() rethrows. Deadline is // configurable via `lifecycle.timeoutMs` or the // `NEUROLINK_LIFECYCLE_TIMEOUT_MS` env var. const timeoutMs = resolveLifecycleTimeoutMs(lifecycle); try { await withTimeoutFn(async () => { // Capturing `onError` into a const above means TypeScript sees the // narrowing past the early-return, so no non-null assertion needed // here — and the callback identity is stable across the timeout // boundary even if the caller mutates `lifecycle.onError` mid-call. const ret = onError({ error: err, duration: 0, recoverable: false, }); if (ret && typeof ret.then === "function") { await ret; } }, timeoutMs, `[lifecycle] onError callback exceeded ${timeoutMs}ms`); } catch (e) { logger.warn("[lifecycle] onError callback error:", e); } } /** * Execute fake streaming - extracted method for reusability */ async executeFakeStreaming(options, analysisSchema) { try { logger.info(`Starting fake streaming with tools`, { provider: this.providerName, supportsTools: this.supportsTools(), timestamp: Date.now(), }); // Convert stream options to text generation options const textOptions = { prompt: options.input?.text || "", input: options.input, systemPrompt: options.systemPrompt, temperature: options.temperature, maxTokens: options.maxTokens, tools: options.tools, // 🔧 FIX: Pass user-provided tools (including RAG tools) to generation pipeline disableTools: !!options.disableTools, maxSteps: options.maxSteps || 5, provider: options.provider, model: options.model, region: options.region, // Pass region for Vertex AI // 🔧 FIX: Include analytics and evaluation options from stream options enableAnalytics: options.enableAnalytics, enableEvaluation: options.enableEvaluation, evaluationDomain: options.evaluationDomain, toolUsageContext: options.toolUsageContext, context: options.context, csvOptions: options.csvOptions, // Forward abort, tool filtering, and timeout options to prevent // silent bypass when falling back from real streaming to fake streaming abortSignal: options.abortSignal, toolFilter: options.toolFilter, excludeTools: options.excludeTools, skipToolPromptInjection: options.skipToolPromptInjection, timeout: options.timeout, stt: options.stt, // Forward TTS options too — without this, the fake-streaming fallback // path silently drops `tts` and the resulting StreamResult never // produces a `tts_audio` chunk even when synthesis was requested. tts: options.tts, }; logger.debug(`Calling generate for fake streaming`, { provider: this.providerName, maxSteps: textOptions.maxSteps, disableTools: textOptions.disableTools, timestamp: Date.now(), }); const result = await this.generate(textOptions, analysisSchema); logger.info(`Generate completed for fake streaming`, { provider: this.providerName, hasContent: !!result?.content, contentLength: result?.content?.length || 0, toolsUsed: result?.toolsUsed?.length || 0, hasImageOutput: !!result?.imageOutput, timestamp: Date.now(), }); // Create a synthetic stream from the generate result that simulates progressive delivery return { stream: (async function* () { if (result?.content) { // Split content into words for more natural streaming const words = result.content.split(/(\s+)/); // Keep whitespace let buffer = ""; for (let i = 0; i < words.length; i++) { buffer += words[i]; // Yield chunks of roughly 5-10 words or at punctuation const shouldYield = i === words.length - 1 || // Last word buffer.length > 50 || // Buffer getting long /[.!?;,]\s*$/.test(buffer); // End of sentence/clause if (shouldYield && buffer.trim()) { yield { content: buffer }; buffer = ""; // Small delay to simulate streaming (1-10ms) await new Promise((resolve) => { setTimeout(resolve, Math.random() * 9 + 1); }); } } // Yield all remaining content if (buffer.trim()) { yield { content: buffer }; } } // 🔧 CRITICAL FIX: Yield image output if present if (result?.imageOutput) { yield { type: "image", imageOutput: result.imageOutput, }; } // Yield synthesized audio so callers using stream() with tts.enabled // still receive a tts_audio chunk on the fake-streaming fallback // path (matches the discriminator used by the real streaming path). if (result?.audio) { yield { type: "tts_audio", audio: { data: result.audio.buffer, format: result.audio.format, index: 0, isFinal: true, cumulativeSize: result.audio.size, voice: result.audio.voice, sampleRate: result.audio.sampleRate, }, }; } })(), usage: result?.usage, provider: result?.provider, model: result?.model, toolCalls: result?.toolCalls?.map((call) => ({ toolName: call.toolName, parameters: call.args, id: call.toolCallId, })), toolResults: result?.toolResults ? result.toolResults.map((tr) => ({ toolName: tr.toolName || "unknown", status: (tr.status === "error" ? "failure" : "success"), result: tr.output ?? tr.result, error: tr.error, })) : undefined, // 🔧 FIX: Include analytics and evaluation from generate result analytics: result?.analytics, evaluation: result?.evaluation, }; } catch (error) { logger.error(`Fake streaming fallback failed for ${this.providerName}:`, error); // Fire the consumer-supplied onError BEFORE re-throwing through // handleProviderError() so callers using onChunk/onFinish/onError // get notified even when fake-streaming setup (message build, image // adapter, etc.) fails synchronously. Awaited so the consumer's // async onError fully settles before we rethrow. The shared // lifecycle-fired WeakSet mark prevents double-fire if a wrapper // layer also handles this. await this.fireLifecycleErrorCallback(options, error); throw this.handleProviderError(error); } } /** * Apply per-call tool filtering (whitelist/blacklist) to a tools record. * If toolFilter is set, only tools whose names are in the list are kept. * If excludeTools is set, matching tools are removed. excludeTools is applied after toolFilter. */ applyToolFiltering(tools, options) { if ((!options.toolFilter || options.toolFilter.length === 0) && (!options.excludeTools || options.excludeTools.length === 0)) { return tools; } const beforeCount = Object.keys(tools).length; let filtered = { ...tools }; if (options.toolFilter && options.toolFilter.length > 0) { const allowSet = new Set(options.toolFilter); const result = {}; for (const [name, tool] of Object.entries(filtered)) { if (allowSet.has(name)) { result[name] = tool; } } filtered = result; } if (options.excludeTools && options.excludeTools.length > 0) { const denySet = new Set(options.excludeTools); for (const name of Object.keys(filtered)) { if (denySet.has(name)) { delete filtered[name]; } } } const afterCount = Object.keys(filtered).length; if (beforeCount !== afterCount) { logger.debug(`Tool filtering applied`, { provider: this.providerName, beforeCount, afterCount, toolFilter: options.toolFilter, excludeTools: options.excludeTools, }); } return filtered; } /** * Prepare generation context including tools and model */ async prepareGenerationContext(options) { const shouldUseTools = !options.disableTools && this.supportsTools(); const baseTools = shouldUseTools ? await this.getAllTools() : {}; let tools = shouldUseTools ? { ...baseTools, ...(options.tools || {}), } : {}; // Apply per-call tool filtering (whitelist/blacklist) tools = this.applyToolFiltering(tools, options); logger.debug(`Final tools prepared for AI`, { provider: this.providerName, directTools: getKeyCount(baseTools), directToolNames: getKeysAsString(baseTools), externalTools: getKeyCount(options.tools || {}), externalToolNames: getKeysAsString(options.tools || {}), totalTools: getKeyCount(tools), totalToolNames: getKeysAsString(tools), shouldUseTools, timestamp: Date.now(), }); const model = await this.getAISDKModelWithMiddleware(options); return { tools, model }; } /** * Get merged tools for streaming: combines base tools (MCP/built-in) with * user-provided tools (e.g., RAG tools passed via options.tools). * * This is the canonical tool-merge pattern for executeStream() implementations. * All providers should call this instead of getAllTools() directly. */ async getToolsForStream(options) { const shouldUseTools = !options.disableTools && this.supportsTools(); if (!shouldUseTools) { return {}; } const baseTools = await this.getAllTools(); const externalTools = (options.tools || {}); let merged = { ...baseTools, ...externalTools }; // Apply per-call tool filtering (whitelist/blacklist) merged = this.applyToolFiltering(merged, options); logger.debug(`Tools prepared for streaming`, { provider: this.providerName, baseToolCount: Object.keys(baseTools).length, externalToolCount: Object.keys(externalTools).length, totalToolCount: Object.keys(merged).length, }); return merged; } /** * Build messages array for generation - delegated to MessageBuilder */ async buildMessages(options) { return this.messageBuilder.buildMessages(options); } /** * Build messages array for streaming operations - delegated to MessageBuilder * This is a protected helper method that providers can use to build messages * with automatic multimodal detection, eliminating code duplication * * @param options - Stream options or text generation options * @returns Promise resolving to ModelMessage array ready for AI SDK */ async buildMessagesForStream(options) { return this.messageBuilder.buildMessagesForStream(options); } /** * Execute the generation with AI SDK - delegated to GenerationHandler */ async executeGeneration(model, messages, tools, options) { return this.generationHandler.executeGeneration(model, messages, tools, options); } /** * Log generation completion information - delegated to GenerationHandler */ logGenerationComplete(generateResult) { this.generationHandler.logGenerationComplete(generateResult); } /** * Record performance metrics - delegated to TelemetryHandler */ async recordPerformanceMetrics(usage, responseTime) { await this.telemetryHandler.recordPerformanceMetrics(usage, responseTime); } /** * Extract tool information from generation result - delegated to GenerationHandler */ extractToolInformation(generateResult) { return this.generationHandler.extractToolInformation(generateResult); } /** * Format the enhanced result - delegated to GenerationHandler */ formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options) { return this.generationHandler.formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options); } /** * Analyze AI response structure and log detailed debugging information - delegated to GenerationHandler */ analyzeAIResponse(result) { this.generationHandler.analyzeAIResponse(result); } /** * Text generation method - implements AIProvider interface * Tools are always available unless explicitly disabled * * Supports Text-to-Speech (TTS) audio generation in two modes: * 1. Direct synthesis (default): TTS synthesizes the input text without AI generation * 2. AI response synthesis: TTS synthesizes the AI-generated response after generation * * When TTS is enabled with useAiResponse=false (default), the method returns early with * only the audio result, skipping AI generation entirely for optimal performance. * * When TTS is enabled with useAiResponse=true, the method performs full AI generation * and then synthesizes the AI response to audio. * * @param optionsOrPrompt - Generation options or prompt string * @param _analysisSchema - Optional analysis schema (not used) * @returns Enhanced result with optional audio field containing TTSResult * * IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results * for consistency and better performance */ async generate(optionsOrPrompt, _analysisSchema) { const options = this.normalizeTextOptions(optionsOrPrompt); this.validateOptions(options); const startTime = Date.now(); // OTEL span for provider-level generate tracing // Use startActiveSpan pattern via context.with() so child spans become descendants const otelSpan = tracers.provider.startSpan("neurolink.provider.generate", { kind: SpanKind.CLIENT, attributes: { [ATTR.GEN_AI_SYSTEM]: this.providerName || "unknown", [ATTR.GEN_AI_MODEL]: this.modelName || options.model || "unknown", [ATTR.GEN_AI_OPERATION]: "generate", [ATTR.NL_PROVIDER]: this.providerName || "unknown", }, }); // Set this span as the active context so child spans (GenerationHandler, etc.) become descendants const activeCtx = trace.setSpan(context.active(), otelSpan); const otelSpanState = { ended: false }; return await context.with(activeCtx, async () => this.runGenerateInActiveContext(options, startTime, otelSpan, otelSpanState)); } /** * Alias for generate method - implements AIProvider interface */ async gen(optionsOrPrompt, analysisSchema) { return this.generate(optionsOrPrompt, analysisSchema); } async runGenerateInActiveContext(options, startTime, otelSpan, otelSpanState) { try { if (options.output?.mode === "video") { return await this.handleVideoGeneration(options, startTime); } const isImageModel = IMAGE_GENERATION_MODELS.some((m) => this.modelName.includes(m)); const requestsNonImageOutput = options.output?.format === "json" || options.output?.format === "structured" || options.output?.format === "text"; if (isImageModel && !requestsNonImageOutput) { logger.info(`Image generation model detected, routing to executeImageGeneration`, { provider: this.providerName, model: this.modelName, }); const imageResult = await this.executeImageGeneration(options); return await this.enhanceResult(imageResult, options, startTime); } if (options.tts?.enabled && !options.tts?.useAiResponse) { return this.handleDirectTTSSynthesis(options, startTime); } const { tools, model } = await this.prepareGenerationContext(options); const messages = await this.buildMessages(options); const videoFrameResult = await this.handleVideoFrameGeneration(options, messages, model, startTime); if (videoFrameResult) { return videoFrameResult; } return await this.executeStandardGenerateFlow(options, startTime, model, messages, tools); } catch (error) { otelSpan.setStatus({ code: SpanStatusCode.ERROR, message: error instanceof Error ? error.message : String(error), }); otelSpan.end(); otelSpanState.ended = true; if (isAbortError(error)) { logger.info(`Generate aborted for ${this.providerName}`, { error: error instanceof Error ? error.message : String(error), }); } else { logger.error(`Generate failed for ${this.providerName}:`, error); } throw this.handleProviderError(error); } finally { if (!otelSpanState.ended) { otelSpan.setStatus({ code: SpanStatusCode.OK }); otelSpan.end(); } } } async handleDirectTTSSynthesis(options, startTime) { const textToSynthesize = options.prompt ?? options.input?.text ?? ""; const baseResult = { content: textToSynthesize, provider: options.provider ?? this.providerName, model: this.modelName, usage: { input: 0, output: 0, total: 0 }, }; try { if (!options.tts) { return this.enhanceResult(baseResult, options, startTime); } baseResult.audio = await TTSProcessor.synthesize(textToSynthesize, options.tts.provider ?? options.provider ?? this.providerName, options.tts); } catch (ttsError) { logger.error(`TTS synthesis failed in Mode 1 (direct input synthesis):`, ttsError); } return this.enhanceResult(baseResult, options, startTime); } async handleVideoFrameGeneration(options, messages, model, startTime) { if (!hasVideoFrames(messages)) { return null; } // Bug 2 fix: callers requesting structured output (schema or explicit // output.format) must NOT be hijacked into the prose-returning video // analysis path. Without this gate, schema/format are silently dropped // whenever messages contain >=3 image parts. if (options.schema !== undefined || options.output?.format !== undefined) { logger.info("[VideoFrameGen] Skipping video-frame analysis route; caller requested structured output", { provider: this.providerName, model: this.modelName, hasSchema: options.schema !== undefined, outputFormat: options.output?.format, }); return null; } const videoAnalysisResult = await executeVideoAnalysis(messages, { provider: options.provider, providerName: this.providerName, region: options.region, }); const userText = messages .filter((m) => m.role === "user") .flatMap((m) => Array.isArray(m.content) ? m.content .filter((p) => p.type === "text") .map((p) => p.text) : [typeof m.content === "string" ? m.content : ""]) .filter(Boolean) .join("\n") .trim(); let formattedContent = videoAnalysisResult; let usage = { input: 0, output: 0, total: 0 }; if (options.systemPrompt) { try { const formattingPrompt = userText ? `The user asked: "${userText}"\n\nHere is the video/image analysis result from the visual analysis system:\n\n${videoAnalysisResult}\n\nBased on this analysis, provide your response.` : `Here is a video/image analysis result from the visual analysis system:\n\n${videoAnalysisResult}\n\nBased on this analysis, provide your response.`; logger.debug("[VideoAnalysis] Formatting via Claude", { userTextLength: userText.length, analysisLength: videoAnalysisResult.length, }); const formattedResult = await generateText({ model, system: options.systemPrompt, messages: [{ role: "user", content: formattingPrompt }], maxOutputTokens: options.maxTokens || 8192, temperature: 0.3, abortSignal: options.abortSignal, experimental_telemetry: this.telemetryHandler?.getTelemetryConfig(options, "generate"), }); formattedContent = formattedResult.text; usage = { input: formattedResult.usage?.inputTokens || 0, output: formattedResult.usage?.outputTokens || 0, total: (formattedResult.usage?.inputTokens || 0) + (formattedResult.usage?.outputTokens || 0), }; logger.debug("[VideoAnalysis] Claude formatting complete", { formattedLength: formattedContent.length, usage, }); } catch (error) { logger.warn("[VideoAnalysis] Claude formatting failed, using raw Gemini output", { error: error instanceof Error ? error.message : String(error), }); } } return this.enhanceResult({ content: formattedContent, provider: options.provider ?? this.providerName, model: this.modelName, usage, }, options, startTime); } async executeStandardGenerateFlow(options, startTime, model, messages, tools) { // Apply a defensive default timeout (3 min) when the caller didn't pass // one. Without this guard, AI SDK's generateText() will wait forever on // an upstream that accepts the connection but never produces a response // (observed against the litellm gateway when a request triggers the // team-access denial path — connection stays open, no response is sent, // and the matrix test hangs the entire suite). Callers can still pass // a larger value (e.g. video generation passes 10 min). const effectiveTimeout = options.timeout ?? 180_000; const timeoutController = createTimeoutController(effectiveTimeout, this.providerName, "generate"); const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal); const composedOptions = composedSignal ? { ...options, abortSignal: composedSignal } : options; let generateResult; try { generateResult = await this.executeGeneration(model, messages, tools, composedOptions); } finally { timeoutController?.cleanup(); } this.analyzeAIResponse(generateResult); this.logGenerationComplete(generateResult); const responseTime = Date.now() - startTime; await this.recordPerformanceMetrics(generateResult.usage, responseTime); const { toolsUsed, toolExecutions } = this.extractToolInformation(generateResult); let enhancedResult = this.formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options); enhancedResult = await this.synthesizeAIResponseIfNeeded(enhancedResult, options); const finalResult = await this.enhanceResult(enhancedResult, options, startTime); return finalResult; } async synthesizeAIResponseIfNeeded(enhancedResult, options) { if (!options.tts?.enabled || !options.tts?.useAiResponse) { return enhancedResult; } const aiResponse = enhancedResult.content; const ttsProvider = options.tts?.provider ?? options.provider ?? this.providerName; if (!aiResponse || !ttsProvider) { logger.warn(`TTS synthesis skipped despite being enabled`, { provider: this.providerName, hasAiResponse: !!aiResponse, aiResponseLength: aiResponse?.length ?? 0, hasProvider: !!ttsProvider, ttsConfig: { enabled: options.tts?.enabled, useAiResponse: options.tts?.useAiResponse, }, reason: !aiResponse ? "AI response is empty or undefined" : "Provider is missing", }); return enhancedResult; } try { const ttsResult = await TTSProcessor.synthesize(aiResponse, ttsProvider, options.tts); return { ...enhancedResult, audio: ttsResult, }; } catch (ttsError) { logger.error(`TTS synthesis failed in Mode 2 (AI response synthesis):`, ttsError); return enhancedResult; } } /** * BACKWARD COMPATIBILITY: Legacy generateText method * Converts EnhancedGenerateResult to TextGenerationResult format * Ensures existing scripts using createAIProvider().generateText() continue to work */ async generateText(options) { // Validate required parameters for backward compatibility - support both prompt and input.text const promptText = options.prompt || options.input?.text; if (!promptText || typeof promptText !== "string" || promptText.trim() === "") { throw new Error("GenerateText options must include prompt or input.text as a non-empty string"); } // Call the main generate method const result = await this.generate(options); if (!result) { throw new Error("Generation failed: No result returned"); } // Convert EnhancedGenerateResult to TextGenerationResult format return { content: result.content || "", provider: result.provider || this.providerName, model: result.model || this.modelName, usage: result.usage || { input: 0, output: 0, total: 0, }, responseTime: 0, // BaseProvider doesn't track response time directly toolsUsed: result.toolsUsed || [], // Map toolExecutions from EnhancedGenerateResult shape to TextGenerationResult shape // Preserve original timing/status fields when present, fall back to safe defaults toolExecutions: result.toolExecutions?.map((te) => { const t = te; return { // Spread original fields first so normalized fields take precedence ...te, toolName: te.name, executionTime: typeof t.executionTime === "number" ? t.executionTime : typeof t.duration === "number" ? t.duration : 0, success: typeof t.success === "boolean" ? t.success : t.status === undefined || t.status === "success", }; }), enhancedWithTools: !!(result.toolsUsed && result.toolsUsed.length > 0), analytics: result.analytics, evaluation: result.evaluation, audio: r