@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
994 lines • 79.1 kB
JavaScript
import { context, SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
import { generateText } from "ai";
import { directAgentTools } from "../agent/directTools.js";
import { IMAGE_GENERATION_MODELS } from "../core/constants.js";
import { MiddlewareFactory } from "../middleware/factory.js";
import { ATTR, tracers } from "../telemetry/index.js";
import { isAbortError } from "../utils/errorHandling.js";
import { hasLifecycleErrorFired, markLifecycleErrorFired, } from "../utils/lifecycleCallbacks.js";
import { resolveLifecycleTimeoutMs } from "../utils/lifecycleTimeout.js";
import { logger } from "../utils/logger.js";
import { withTimeoutFn } from "../utils/async/withTimeout.js";
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
import { shouldDisableBuiltinTools } from "../utils/toolUtils.js";
import { getKeyCount, getKeysAsString } from "../utils/transformationUtils.js";
import { TTSProcessor } from "../utils/ttsProcessor.js";
import { executeVideoAnalysis, hasVideoFrames, } from "../utils/videoAnalysisProcessor.js";
import { GenerationHandler } from "./modules/GenerationHandler.js";
// Import modules for composition
import { MessageBuilder } from "./modules/MessageBuilder.js";
import { StreamHandler } from "./modules/StreamHandler.js";
import { TelemetryHandler } from "./modules/TelemetryHandler.js";
import { ToolsManager } from "./modules/ToolsManager.js";
import { Utilities } from "./modules/Utilities.js";
/**
* Abstract base class for all AI providers
* Tools are integrated as first-class citizens - always available by default
*/
export class BaseProvider {
// Not `readonly` because providers that auto-discover the model from a
// /v1/models endpoint (lm-studio, llamacpp) need to update modelName after
// construction so handlers (TelemetryHandler, MessageBuilder) cache the
// resolved name. All other providers treat this as effectively readonly.
modelName;
providerName;
defaultTimeout = 30000; // 30 seconds
middlewareOptions; // TODO: Implement global level middlewares that can be used
// Tools are conditionally included based on centralized configuration
directTools = shouldDisableBuiltinTools()
? {}
: directAgentTools;
mcpTools; // MCP tools loaded dynamically when available
customTools; // Custom tools from registerTool()
toolExecutor; // Tool executor from setupToolExecutor
sessionId;
userId;
neurolink; // Reference to actual NeuroLink instance for MCP tools
/** @internal Trace context propagated from NeuroLink SDK for span hierarchy */
_traceContext = null;
setTraceContext(ctx) {
this._traceContext = ctx;
}
// Composition modules - Single Responsibility Principle
// Handlers below are not `readonly` so that providers which auto-discover
// their model after construction (lm-studio, llamacpp) can rebuild them
// via `refreshHandlersForModel(...)` and propagate the resolved name into
// pricing / telemetry / span attributes. All other providers leave these
// alone.
messageBuilder;
streamHandler;
generationHandler;
telemetryHandler;
utilities;
toolsManager;
constructor(modelName, providerName, neurolink, middleware) {
this.modelName = modelName || this.getDefaultModel();
this.providerName = providerName || this.getProviderName();
this.neurolink = neurolink;
this.middlewareOptions = middleware;
// Initialize composition modules
this.messageBuilder = new MessageBuilder(this.providerName, this.modelName);
this.streamHandler = new StreamHandler(this.providerName, this.modelName);
this.telemetryHandler = new TelemetryHandler(this.providerName, this.modelName, this.neurolink);
this.generationHandler = new GenerationHandler(this.providerName, this.modelName, () => this.supportsTools(), (options, type) => this.telemetryHandler.getTelemetryConfig(options, type), (toolCalls, toolResults, options, timestamp) => this.handleToolExecutionStorage(toolCalls, toolResults, options, timestamp), () => this.neurolink?.getEventEmitter());
this.utilities = new Utilities(this.providerName, this.modelName, this.defaultTimeout, this.middlewareOptions);
this.toolsManager = new ToolsManager(this.providerName, this.directTools, this.neurolink, {
isZodSchema: (schema) => this.isZodSchema(schema),
convertToolResult: (result) => this.convertToolResult(result),
createPermissiveZodSchema: () => this.createPermissiveZodSchema(),
fixSchemaForOpenAIStrictMode: (schema) => this.fixSchemaForOpenAIStrictMode(schema),
});
}
/**
* Update modelName and rebuild composition handlers with the new value.
*
* Auto-discovery providers (lm-studio, llamacpp) call this once they have
* resolved the loaded model from `/v1/models`. Without this, handlers
* (TelemetryHandler, MessageBuilder, ...) keep the pre-discovery name and
* pricing / span / log metadata reports the stale value.
*/
refreshHandlersForModel(model) {
this.modelName = model;
trace
.getSpan(context.active())
?.setAttribute(ATTR.GEN_AI_MODEL, this.modelName);
this.messageBuilder = new MessageBuilder(this.providerName, this.modelName);
this.streamHandler = new StreamHandler(this.providerName, this.modelName);
this.telemetryHandler = new TelemetryHandler(this.providerName, this.modelName, this.neurolink);
this.generationHandler = new GenerationHandler(this.providerName, this.modelName, () => this.supportsTools(), (options, type) => this.telemetryHandler.getTelemetryConfig(options, type), (toolCalls, toolResults, options, timestamp) => this.handleToolExecutionStorage(toolCalls, toolResults, options, timestamp), () => this.neurolink?.getEventEmitter());
this.utilities = new Utilities(this.providerName, this.modelName, this.defaultTimeout, this.middlewareOptions);
}
/**
* Check if this provider supports tool/function calling
* Override in subclasses to disable tools for specific providers or models
* @returns true by default, providers can override to return false
*/
supportsTools() {
return true;
}
// ===================
// PUBLIC API METHODS
// ===================
/**
* Primary streaming method - implements AIProvider interface
* When tools are involved, falls back to generate() with synthetic streaming
*/
async stream(optionsOrPrompt, analysisSchema) {
let options = this.normalizeStreamOptions(optionsOrPrompt);
logger.info(`Starting stream`, {
provider: this.providerName,
hasTools: !options.disableTools && this.supportsTools(),
disableTools: !!options.disableTools,
supportsTools: this.supportsTools(),
inputLength: options.input?.text?.length || 0,
maxTokens: options.maxTokens,
temperature: options.temperature,
timestamp: Date.now(),
});
// ===== EARLY MULTIMODAL DETECTION =====
const hasFileInput = !!options.input?.files?.length || !!options.input?.videoFiles?.length;
if (hasFileInput) {
// ===== VIDEO ANALYSIS DETECTION =====
// Check if video frames are present and handle with fake streaming
const messages = await this.buildMessagesForStream(options);
if (hasVideoFrames(messages)) {
logger.info(`Video frames detected in stream, using fake streaming for video analysis`, {
provider: this.providerName,
model: this.modelName,
});
// Note: executeFakeStreaming() owns its own catch that fires the
// consumer-supplied onError before re-throwing through
// handleProviderError(), so we do not need to wrap again here —
// doing so would route the error through handleProviderError()
// twice (and risk a double-fire onError without the shared
// lifecycle-fired WeakSet mark).
const fakeResult = await this.executeFakeStreaming(options, analysisSchema);
return this.wrapStreamWithLifecycleCallbacks(fakeResult, options);
}
}
// CRITICAL: Image generation models don't support real streaming
// Force fake streaming for image models to ensure image output is yielded.
// Skip this path when the caller explicitly requests non-image output (e.g.
// JSON analysis) so dual-mode models like gemini-3.1-flash-image-preview
// can still perform text/structured generation.
const isImageModel = IMAGE_GENERATION_MODELS.some((m) => this.modelName.includes(m));
const requestsNonImageOutput = options.output?.format === "json" ||
options.output?.format === "structured" ||
options.output?.format === "text";
if (isImageModel && !requestsNonImageOutput) {
logger.info(`Image model detected, forcing fake streaming`, {
provider: this.providerName,
model: this.modelName,
reason: "Image generation requires fake streaming to yield image output",
});
// Skip real streaming, go directly to fake streaming.
// executeFakeStreaming() owns its own catch + lifecycle fire, so
// wrapping again here would double-route through handleProviderError().
const fakeResult = await this.executeFakeStreaming(options, analysisSchema);
return this.wrapStreamWithLifecycleCallbacks(fakeResult, options);
}
// Central tool merge: Pre-merge base tools (MCP/built-in) with user-provided
// tools (e.g. RAG tools) into options.tools. This way, every provider's
// executeStream() can simply use options.tools (or getAllTools() + options.tools)
// and get the complete tool set without needing per-provider merge logic.
if (!options.disableTools && this.supportsTools()) {
const mergedTools = await this.getToolsForStream(options);
options = { ...options, tools: mergedTools };
}
else {
options = { ...options, tools: {} };
}
// CRITICAL FIX: Always prefer real streaming over fake streaming
// Try real streaming first, use fake streaming only as fallback
try {
logger.debug(`Attempting real streaming`, {
provider: this.providerName,
timestamp: Date.now(),
});
const realStreamResult = await this.executeStream(options, analysisSchema);
logger.info(`Real streaming succeeded`, {
provider: this.providerName,
timestamp: Date.now(),
});
// Wire lifecycle callbacks (onChunk/onFinish/onError) on the user-
// facing StreamResult.stream. The AI-SDK lifecycle middleware only
// sees AI-SDK-internal chunks via streamText/wrapLanguageModel, so
// providers with custom HTTP streaming (Ollama, llama.cpp's /api,
// anything that doesn't go through streamText) bypass it. Wrapping
// here makes the callbacks fire for every provider, regardless of
// streaming implementation.
return this.wrapStreamWithLifecycleCallbacks(realStreamResult, options);
}
catch (realStreamError) {
// Don't retry on terminal/abort errors — only fall back for
// "real streaming with tools is unsupported" style failures.
const errMsg = realStreamError instanceof Error
? realStreamError.message
: String(realStreamError);
const errName = realStreamError instanceof Error ? realStreamError.name : "";
if (errName === "AbortError" ||
errMsg.includes("abort") ||
errMsg.includes("timeout") ||
errMsg.includes("401") ||
errMsg.includes("403") ||
errMsg.includes("quota") ||
errMsg.includes("rate limit") ||
errMsg.includes("authentication")) {
await this.fireLifecycleErrorCallback(options, realStreamError);
throw this.handleProviderError(realStreamError);
}
logger.warn(`Real streaming failed for ${this.providerName}, falling back to fake streaming:`, {
error: errMsg,
timestamp: Date.now(),
});
// Fallback to fake streaming only if real streaming fails AND tools
// are enabled. executeFakeStreaming() owns its own catch + lifecycle
// fire, so a fake-streaming failure here surfaces through that path
// without needing an outer wrap (which would double-route through
// handleProviderError()).
if (!options.disableTools && this.supportsTools()) {
const fakeResult = await this.executeFakeStreaming(options, analysisSchema);
return this.wrapStreamWithLifecycleCallbacks(fakeResult, options);
}
else {
// If real streaming failed and no tools are enabled, fire onError
// before re-throwing so consumer-supplied callbacks see the failure.
await this.fireLifecycleErrorCallback(options, realStreamError);
// If real streaming failed and no tools are enabled, re-throw the original error
logger.error(`Real streaming failed for ${this.providerName}:`, realStreamError);
throw this.handleProviderError(realStreamError);
}
}
}
/**
* Wrap a StreamResult with consumer-facing lifecycle callbacks.
*
* `options.onChunk`, `options.onFinish`, `options.onError` are translated
* by NeuroLink.applyStreamLifecycleMiddleware() into
* `options.middleware.middlewareConfig.lifecycle.config`. The AI SDK's
* lifecycle middleware only sees these via the wrapped LanguageModel —
* which is bypassed by providers that stream via raw HTTP fetch (Ollama
* over /api/chat, custom OpenAI-compatible servers, etc). Wrapping the
* user-facing stream here ensures the callbacks fire regardless of the
* underlying transport.
*/
wrapStreamWithLifecycleCallbacks(result, options) {
const lifecycle = options
?.middleware?.middlewareConfig?.lifecycle?.config;
if (!lifecycle?.onChunk && !lifecycle?.onFinish && !lifecycle?.onError) {
return result;
}
const { onChunk, onFinish, onError } = lifecycle;
const startTime = Date.now();
const originalStream = result.stream;
// Lifecycle callbacks are awaited with a bounded deadline so callers
// observe ordering guarantees (onChunk/onFinish/onError have all
// settled by the time `for await` returns / throws). The previous
// fire-and-forget pattern left async work running past stream close,
// creating races during cleanup. The deadline is configurable via
// `lifecycle.timeoutMs` (per-call) or `NEUROLINK_LIFECYCLE_TIMEOUT_MS`
// (env / CLI surface) — see `resolveLifecycleTimeoutMs`.
const timeoutMs = resolveLifecycleTimeoutMs(lifecycle);
const safeFire = async (fn, label) => {
try {
await withTimeoutFn(async () => {
const ret = fn();
if (ret && typeof ret.then === "function") {
await ret;
}
}, timeoutMs, `[lifecycle] ${label} callback exceeded ${timeoutMs}ms`);
}
catch (e) {
logger.warn(`[lifecycle] ${label} callback error:`, e);
}
};
const wrappedStream = (async function* () {
let accumulated = "";
let seq = 0;
try {
for await (const chunk of originalStream) {
const textPart = chunk &&
typeof chunk === "object" &&
"content" in chunk &&
typeof chunk.content === "string"
? chunk.content
: "";
// Only fire onChunk for actual text deltas. Non-text chunks
// (image, tts_audio) would otherwise produce empty text-delta
// events that consumers must filter out themselves.
if (onChunk && textPart) {
const currentSeq = seq++;
await safeFire(() => onChunk({
type: "text-delta",
textDelta: textPart,
sequenceNumber: currentSeq,
}), "onChunk");
}
if (textPart) {
accumulated += textPart;
}
yield chunk;
}
if (onFinish) {
await safeFire(() => onFinish({
text: accumulated,
duration: Date.now() - startTime,
}), "onFinish");
}
}
catch (error) {
const err = error instanceof Error ? error : new Error(String(error));
if (onError && !hasLifecycleErrorFired(err)) {
// Mark before firing so a higher layer that also routes through
// fireLifecycleErrorCallback (or its own lifecycle wrapper) with
// the same error instance won't double-fire onError. Mirrors the
// pattern in fireLifecycleErrorCallback below.
markLifecycleErrorFired(err);
await safeFire(() => onError({
error: err,
duration: Date.now() - startTime,
recoverable: false,
}), "onError");
}
throw err;
}
})();
return { ...result, stream: wrappedStream };
}
/**
* Fire the consumer-supplied onError callback before throwing. Used in
* error branches inside stream() that re-throw without emitting any
* stream chunks (which would otherwise hide the failure from a caller
* that supplied `onError`).
*/
async fireLifecycleErrorCallback(options, error) {
const err = error instanceof Error ? error : new Error(String(error));
// The AI-SDK lifecycle middleware stamps errors it has already
// surfaced (Symbol.for("neurolink.onErrorFired"); see
// utils/lifecycleCallbacks.ts). Skip here so consumers don't receive
// duplicate onError events for the same failure.
if (hasLifecycleErrorFired(err)) {
return;
}
const lifecycle = options
?.middleware?.middlewareConfig?.lifecycle?.config;
const onError = lifecycle?.onError;
if (!onError) {
return;
}
// Set the marker before invoking so a sync re-entry (or a concurrent
// dispatch path) can't double-fire onError for the same error object.
markLifecycleErrorFired(err);
// Fire the consumer's onError with a bounded deadline AND await its
// completion — callers can now `await fireLifecycleErrorCallback(...)`
// to guarantee the consumer's async onError settles before the
// surrounding stream() / executeFakeStreaming() rethrows. Deadline is
// configurable via `lifecycle.timeoutMs` or the
// `NEUROLINK_LIFECYCLE_TIMEOUT_MS` env var.
const timeoutMs = resolveLifecycleTimeoutMs(lifecycle);
try {
await withTimeoutFn(async () => {
// Capturing `onError` into a const above means TypeScript sees the
// narrowing past the early-return, so no non-null assertion needed
// here — and the callback identity is stable across the timeout
// boundary even if the caller mutates `lifecycle.onError` mid-call.
const ret = onError({
error: err,
duration: 0,
recoverable: false,
});
if (ret && typeof ret.then === "function") {
await ret;
}
}, timeoutMs, `[lifecycle] onError callback exceeded ${timeoutMs}ms`);
}
catch (e) {
logger.warn("[lifecycle] onError callback error:", e);
}
}
/**
* Execute fake streaming - extracted method for reusability
*/
async executeFakeStreaming(options, analysisSchema) {
try {
logger.info(`Starting fake streaming with tools`, {
provider: this.providerName,
supportsTools: this.supportsTools(),
timestamp: Date.now(),
});
// Convert stream options to text generation options
const textOptions = {
prompt: options.input?.text || "",
input: options.input,
systemPrompt: options.systemPrompt,
temperature: options.temperature,
maxTokens: options.maxTokens,
tools: options.tools, // 🔧 FIX: Pass user-provided tools (including RAG tools) to generation pipeline
disableTools: !!options.disableTools,
maxSteps: options.maxSteps || 5,
provider: options.provider,
model: options.model,
region: options.region, // Pass region for Vertex AI
// 🔧 FIX: Include analytics and evaluation options from stream options
enableAnalytics: options.enableAnalytics,
enableEvaluation: options.enableEvaluation,
evaluationDomain: options.evaluationDomain,
toolUsageContext: options.toolUsageContext,
context: options.context,
csvOptions: options.csvOptions,
// Forward abort, tool filtering, and timeout options to prevent
// silent bypass when falling back from real streaming to fake streaming
abortSignal: options.abortSignal,
toolFilter: options.toolFilter,
excludeTools: options.excludeTools,
skipToolPromptInjection: options.skipToolPromptInjection,
timeout: options.timeout,
stt: options.stt,
// Forward TTS options too — without this, the fake-streaming fallback
// path silently drops `tts` and the resulting StreamResult never
// produces a `tts_audio` chunk even when synthesis was requested.
tts: options.tts,
};
logger.debug(`Calling generate for fake streaming`, {
provider: this.providerName,
maxSteps: textOptions.maxSteps,
disableTools: textOptions.disableTools,
timestamp: Date.now(),
});
const result = await this.generate(textOptions, analysisSchema);
logger.info(`Generate completed for fake streaming`, {
provider: this.providerName,
hasContent: !!result?.content,
contentLength: result?.content?.length || 0,
toolsUsed: result?.toolsUsed?.length || 0,
hasImageOutput: !!result?.imageOutput,
timestamp: Date.now(),
});
// Create a synthetic stream from the generate result that simulates progressive delivery
return {
stream: (async function* () {
if (result?.content) {
// Split content into words for more natural streaming
const words = result.content.split(/(\s+)/); // Keep whitespace
let buffer = "";
for (let i = 0; i < words.length; i++) {
buffer += words[i];
// Yield chunks of roughly 5-10 words or at punctuation
const shouldYield = i === words.length - 1 || // Last word
buffer.length > 50 || // Buffer getting long
/[.!?;,]\s*$/.test(buffer); // End of sentence/clause
if (shouldYield && buffer.trim()) {
yield { content: buffer };
buffer = "";
// Small delay to simulate streaming (1-10ms)
await new Promise((resolve) => {
setTimeout(resolve, Math.random() * 9 + 1);
});
}
}
// Yield all remaining content
if (buffer.trim()) {
yield { content: buffer };
}
}
// 🔧 CRITICAL FIX: Yield image output if present
if (result?.imageOutput) {
yield {
type: "image",
imageOutput: result.imageOutput,
};
}
// Yield synthesized audio so callers using stream() with tts.enabled
// still receive a tts_audio chunk on the fake-streaming fallback
// path (matches the discriminator used by the real streaming path).
if (result?.audio) {
yield {
type: "tts_audio",
audio: {
data: result.audio.buffer,
format: result.audio.format,
index: 0,
isFinal: true,
cumulativeSize: result.audio.size,
voice: result.audio.voice,
sampleRate: result.audio.sampleRate,
},
};
}
})(),
usage: result?.usage,
provider: result?.provider,
model: result?.model,
toolCalls: result?.toolCalls?.map((call) => ({
toolName: call.toolName,
parameters: call.args,
id: call.toolCallId,
})),
toolResults: result?.toolResults
? result.toolResults.map((tr) => ({
toolName: tr.toolName || "unknown",
status: (tr.status === "error"
? "failure"
: "success"),
result: tr.output ?? tr.result,
error: tr.error,
}))
: undefined,
// 🔧 FIX: Include analytics and evaluation from generate result
analytics: result?.analytics,
evaluation: result?.evaluation,
};
}
catch (error) {
logger.error(`Fake streaming fallback failed for ${this.providerName}:`, error);
// Fire the consumer-supplied onError BEFORE re-throwing through
// handleProviderError() so callers using onChunk/onFinish/onError
// get notified even when fake-streaming setup (message build, image
// adapter, etc.) fails synchronously. Awaited so the consumer's
// async onError fully settles before we rethrow. The shared
// lifecycle-fired WeakSet mark prevents double-fire if a wrapper
// layer also handles this.
await this.fireLifecycleErrorCallback(options, error);
throw this.handleProviderError(error);
}
}
/**
* Apply per-call tool filtering (whitelist/blacklist) to a tools record.
* If toolFilter is set, only tools whose names are in the list are kept.
* If excludeTools is set, matching tools are removed. excludeTools is applied after toolFilter.
*/
applyToolFiltering(tools, options) {
if ((!options.toolFilter || options.toolFilter.length === 0) &&
(!options.excludeTools || options.excludeTools.length === 0)) {
return tools;
}
const beforeCount = Object.keys(tools).length;
let filtered = { ...tools };
if (options.toolFilter && options.toolFilter.length > 0) {
const allowSet = new Set(options.toolFilter);
const result = {};
for (const [name, tool] of Object.entries(filtered)) {
if (allowSet.has(name)) {
result[name] = tool;
}
}
filtered = result;
}
if (options.excludeTools && options.excludeTools.length > 0) {
const denySet = new Set(options.excludeTools);
for (const name of Object.keys(filtered)) {
if (denySet.has(name)) {
delete filtered[name];
}
}
}
const afterCount = Object.keys(filtered).length;
if (beforeCount !== afterCount) {
logger.debug(`Tool filtering applied`, {
provider: this.providerName,
beforeCount,
afterCount,
toolFilter: options.toolFilter,
excludeTools: options.excludeTools,
});
}
return filtered;
}
/**
* Prepare generation context including tools and model
*/
async prepareGenerationContext(options) {
const shouldUseTools = !options.disableTools && this.supportsTools();
const baseTools = shouldUseTools ? await this.getAllTools() : {};
let tools = shouldUseTools
? {
...baseTools,
...(options.tools || {}),
}
: {};
// Apply per-call tool filtering (whitelist/blacklist)
tools = this.applyToolFiltering(tools, options);
logger.debug(`Final tools prepared for AI`, {
provider: this.providerName,
directTools: getKeyCount(baseTools),
directToolNames: getKeysAsString(baseTools),
externalTools: getKeyCount(options.tools || {}),
externalToolNames: getKeysAsString(options.tools || {}),
totalTools: getKeyCount(tools),
totalToolNames: getKeysAsString(tools),
shouldUseTools,
timestamp: Date.now(),
});
const model = await this.getAISDKModelWithMiddleware(options);
return { tools, model };
}
/**
* Get merged tools for streaming: combines base tools (MCP/built-in) with
* user-provided tools (e.g., RAG tools passed via options.tools).
*
* This is the canonical tool-merge pattern for executeStream() implementations.
* All providers should call this instead of getAllTools() directly.
*/
async getToolsForStream(options) {
const shouldUseTools = !options.disableTools && this.supportsTools();
if (!shouldUseTools) {
return {};
}
const baseTools = await this.getAllTools();
const externalTools = (options.tools || {});
let merged = { ...baseTools, ...externalTools };
// Apply per-call tool filtering (whitelist/blacklist)
merged = this.applyToolFiltering(merged, options);
logger.debug(`Tools prepared for streaming`, {
provider: this.providerName,
baseToolCount: Object.keys(baseTools).length,
externalToolCount: Object.keys(externalTools).length,
totalToolCount: Object.keys(merged).length,
});
return merged;
}
/**
* Build messages array for generation - delegated to MessageBuilder
*/
async buildMessages(options) {
return this.messageBuilder.buildMessages(options);
}
/**
* Build messages array for streaming operations - delegated to MessageBuilder
* This is a protected helper method that providers can use to build messages
* with automatic multimodal detection, eliminating code duplication
*
* @param options - Stream options or text generation options
* @returns Promise resolving to ModelMessage array ready for AI SDK
*/
async buildMessagesForStream(options) {
return this.messageBuilder.buildMessagesForStream(options);
}
/**
* Execute the generation with AI SDK - delegated to GenerationHandler
*/
async executeGeneration(model, messages, tools, options) {
return this.generationHandler.executeGeneration(model, messages, tools, options);
}
/**
* Log generation completion information - delegated to GenerationHandler
*/
logGenerationComplete(generateResult) {
this.generationHandler.logGenerationComplete(generateResult);
}
/**
* Record performance metrics - delegated to TelemetryHandler
*/
async recordPerformanceMetrics(usage, responseTime) {
await this.telemetryHandler.recordPerformanceMetrics(usage, responseTime);
}
/**
* Extract tool information from generation result - delegated to GenerationHandler
*/
extractToolInformation(generateResult) {
return this.generationHandler.extractToolInformation(generateResult);
}
/**
* Format the enhanced result - delegated to GenerationHandler
*/
formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options) {
return this.generationHandler.formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options);
}
/**
* Analyze AI response structure and log detailed debugging information - delegated to GenerationHandler
*/
analyzeAIResponse(result) {
this.generationHandler.analyzeAIResponse(result);
}
/**
* Text generation method - implements AIProvider interface
* Tools are always available unless explicitly disabled
*
* Supports Text-to-Speech (TTS) audio generation in two modes:
* 1. Direct synthesis (default): TTS synthesizes the input text without AI generation
* 2. AI response synthesis: TTS synthesizes the AI-generated response after generation
*
* When TTS is enabled with useAiResponse=false (default), the method returns early with
* only the audio result, skipping AI generation entirely for optimal performance.
*
* When TTS is enabled with useAiResponse=true, the method performs full AI generation
* and then synthesizes the AI response to audio.
*
* @param optionsOrPrompt - Generation options or prompt string
* @param _analysisSchema - Optional analysis schema (not used)
* @returns Enhanced result with optional audio field containing TTSResult
*
* IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results
* for consistency and better performance
*/
async generate(optionsOrPrompt, _analysisSchema) {
const options = this.normalizeTextOptions(optionsOrPrompt);
this.validateOptions(options);
const startTime = Date.now();
// OTEL span for provider-level generate tracing
// Use startActiveSpan pattern via context.with() so child spans become descendants
const otelSpan = tracers.provider.startSpan("neurolink.provider.generate", {
kind: SpanKind.CLIENT,
attributes: {
[ATTR.GEN_AI_SYSTEM]: this.providerName || "unknown",
[ATTR.GEN_AI_MODEL]: this.modelName || options.model || "unknown",
[ATTR.GEN_AI_OPERATION]: "generate",
[ATTR.NL_PROVIDER]: this.providerName || "unknown",
},
});
// Set this span as the active context so child spans (GenerationHandler, etc.) become descendants
const activeCtx = trace.setSpan(context.active(), otelSpan);
const otelSpanState = { ended: false };
return await context.with(activeCtx, async () => this.runGenerateInActiveContext(options, startTime, otelSpan, otelSpanState));
}
/**
* Alias for generate method - implements AIProvider interface
*/
async gen(optionsOrPrompt, analysisSchema) {
return this.generate(optionsOrPrompt, analysisSchema);
}
async runGenerateInActiveContext(options, startTime, otelSpan, otelSpanState) {
try {
if (options.output?.mode === "video") {
return await this.handleVideoGeneration(options, startTime);
}
const isImageModel = IMAGE_GENERATION_MODELS.some((m) => this.modelName.includes(m));
const requestsNonImageOutput = options.output?.format === "json" ||
options.output?.format === "structured" ||
options.output?.format === "text";
if (isImageModel && !requestsNonImageOutput) {
logger.info(`Image generation model detected, routing to executeImageGeneration`, {
provider: this.providerName,
model: this.modelName,
});
const imageResult = await this.executeImageGeneration(options);
return await this.enhanceResult(imageResult, options, startTime);
}
if (options.tts?.enabled && !options.tts?.useAiResponse) {
return this.handleDirectTTSSynthesis(options, startTime);
}
const { tools, model } = await this.prepareGenerationContext(options);
const messages = await this.buildMessages(options);
const videoFrameResult = await this.handleVideoFrameGeneration(options, messages, model, startTime);
if (videoFrameResult) {
return videoFrameResult;
}
return await this.executeStandardGenerateFlow(options, startTime, model, messages, tools);
}
catch (error) {
otelSpan.setStatus({
code: SpanStatusCode.ERROR,
message: error instanceof Error ? error.message : String(error),
});
otelSpan.end();
otelSpanState.ended = true;
if (isAbortError(error)) {
logger.info(`Generate aborted for ${this.providerName}`, {
error: error instanceof Error ? error.message : String(error),
});
}
else {
logger.error(`Generate failed for ${this.providerName}:`, error);
}
throw this.handleProviderError(error);
}
finally {
if (!otelSpanState.ended) {
otelSpan.setStatus({ code: SpanStatusCode.OK });
otelSpan.end();
}
}
}
async handleDirectTTSSynthesis(options, startTime) {
const textToSynthesize = options.prompt ?? options.input?.text ?? "";
const baseResult = {
content: textToSynthesize,
provider: options.provider ?? this.providerName,
model: this.modelName,
usage: { input: 0, output: 0, total: 0 },
};
try {
if (!options.tts) {
return this.enhanceResult(baseResult, options, startTime);
}
baseResult.audio = await TTSProcessor.synthesize(textToSynthesize, options.tts.provider ?? options.provider ?? this.providerName, options.tts);
}
catch (ttsError) {
logger.error(`TTS synthesis failed in Mode 1 (direct input synthesis):`, ttsError);
}
return this.enhanceResult(baseResult, options, startTime);
}
async handleVideoFrameGeneration(options, messages, model, startTime) {
if (!hasVideoFrames(messages)) {
return null;
}
// Bug 2 fix: callers requesting structured output (schema or explicit
// output.format) must NOT be hijacked into the prose-returning video
// analysis path. Without this gate, schema/format are silently dropped
// whenever messages contain >=3 image parts.
if (options.schema !== undefined || options.output?.format !== undefined) {
logger.info("[VideoFrameGen] Skipping video-frame analysis route; caller requested structured output", {
provider: this.providerName,
model: this.modelName,
hasSchema: options.schema !== undefined,
outputFormat: options.output?.format,
});
return null;
}
const videoAnalysisResult = await executeVideoAnalysis(messages, {
provider: options.provider,
providerName: this.providerName,
region: options.region,
});
const userText = messages
.filter((m) => m.role === "user")
.flatMap((m) => Array.isArray(m.content)
? m.content
.filter((p) => p.type === "text")
.map((p) => p.text)
: [typeof m.content === "string" ? m.content : ""])
.filter(Boolean)
.join("\n")
.trim();
let formattedContent = videoAnalysisResult;
let usage = { input: 0, output: 0, total: 0 };
if (options.systemPrompt) {
try {
const formattingPrompt = userText
? `The user asked: "${userText}"\n\nHere is the video/image analysis result from the visual analysis system:\n\n${videoAnalysisResult}\n\nBased on this analysis, provide your response.`
: `Here is a video/image analysis result from the visual analysis system:\n\n${videoAnalysisResult}\n\nBased on this analysis, provide your response.`;
logger.debug("[VideoAnalysis] Formatting via Claude", {
userTextLength: userText.length,
analysisLength: videoAnalysisResult.length,
});
const formattedResult = await generateText({
model,
system: options.systemPrompt,
messages: [{ role: "user", content: formattingPrompt }],
maxOutputTokens: options.maxTokens || 8192,
temperature: 0.3,
abortSignal: options.abortSignal,
experimental_telemetry: this.telemetryHandler?.getTelemetryConfig(options, "generate"),
});
formattedContent = formattedResult.text;
usage = {
input: formattedResult.usage?.inputTokens || 0,
output: formattedResult.usage?.outputTokens || 0,
total: (formattedResult.usage?.inputTokens || 0) +
(formattedResult.usage?.outputTokens || 0),
};
logger.debug("[VideoAnalysis] Claude formatting complete", {
formattedLength: formattedContent.length,
usage,
});
}
catch (error) {
logger.warn("[VideoAnalysis] Claude formatting failed, using raw Gemini output", {
error: error instanceof Error ? error.message : String(error),
});
}
}
return this.enhanceResult({
content: formattedContent,
provider: options.provider ?? this.providerName,
model: this.modelName,
usage,
}, options, startTime);
}
async executeStandardGenerateFlow(options, startTime, model, messages, tools) {
// Apply a defensive default timeout (3 min) when the caller didn't pass
// one. Without this guard, AI SDK's generateText() will wait forever on
// an upstream that accepts the connection but never produces a response
// (observed against the litellm gateway when a request triggers the
// team-access denial path — connection stays open, no response is sent,
// and the matrix test hangs the entire suite). Callers can still pass
// a larger value (e.g. video generation passes 10 min).
const effectiveTimeout = options.timeout ?? 180_000;
const timeoutController = createTimeoutController(effectiveTimeout, this.providerName, "generate");
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
const composedOptions = composedSignal
? { ...options, abortSignal: composedSignal }
: options;
let generateResult;
try {
generateResult = await this.executeGeneration(model, messages, tools, composedOptions);
}
finally {
timeoutController?.cleanup();
}
this.analyzeAIResponse(generateResult);
this.logGenerationComplete(generateResult);
const responseTime = Date.now() - startTime;
await this.recordPerformanceMetrics(generateResult.usage, responseTime);
const { toolsUsed, toolExecutions } = this.extractToolInformation(generateResult);
let enhancedResult = this.formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options);
enhancedResult = await this.synthesizeAIResponseIfNeeded(enhancedResult, options);
const finalResult = await this.enhanceResult(enhancedResult, options, startTime);
return finalResult;
}
async synthesizeAIResponseIfNeeded(enhancedResult, options) {
if (!options.tts?.enabled || !options.tts?.useAiResponse) {
return enhancedResult;
}
const aiResponse = enhancedResult.content;
const ttsProvider = options.tts?.provider ?? options.provider ?? this.providerName;
if (!aiResponse || !ttsProvider) {
logger.warn(`TTS synthesis skipped despite being enabled`, {
provider: this.providerName,
hasAiResponse: !!aiResponse,
aiResponseLength: aiResponse?.length ?? 0,
hasProvider: !!ttsProvider,
ttsConfig: {
enabled: options.tts?.enabled,
useAiResponse: options.tts?.useAiResponse,
},
reason: !aiResponse
? "AI response is empty or undefined"
: "Provider is missing",
});
return enhancedResult;
}
try {
const ttsResult = await TTSProcessor.synthesize(aiResponse, ttsProvider, options.tts);
return {
...enhancedResult,
audio: ttsResult,
};
}
catch (ttsError) {
logger.error(`TTS synthesis failed in Mode 2 (AI response synthesis):`, ttsError);
return enhancedResult;
}
}
/**
* BACKWARD COMPATIBILITY: Legacy generateText method
* Converts EnhancedGenerateResult to TextGenerationResult format
* Ensures existing scripts using createAIProvider().generateText() continue to work
*/
async generateText(options) {
// Validate required parameters for backward compatibility - support both prompt and input.text
const promptText = options.prompt || options.input?.text;
if (!promptText ||
typeof promptText !== "string" ||
promptText.trim() === "") {
throw new Error("GenerateText options must include prompt or input.text as a non-empty string");
}
// Call the main generate method
const result = await this.generate(options);
if (!result) {
throw new Error("Generation failed: No result returned");
}
// Convert EnhancedGenerateResult to TextGenerationResult format
return {
content: result.content || "",
provider: result.provider || this.providerName,
model: result.model || this.modelName,
usage: result.usage || {
input: 0,
output: 0,
total: 0,
},
responseTime: 0, // BaseProvider doesn't track response time directly
toolsUsed: result.toolsUsed || [],
// Map toolExecutions from EnhancedGenerateResult shape to TextGenerationResult shape
// Preserve original timing/status fields when present, fall back to safe defaults
toolExecutions: result.toolExecutions?.map((te) => {
const t = te;
return {
// Spread original fields first so normalized fields take precedence
...te,
toolName: te.name,
executionTime: typeof t.executionTime === "number"
? t.executionTime
: typeof t.duration === "number"
? t.duration
: 0,
success: typeof t.success === "boolean"
? t.success
: t.status === undefined || t.status === "success",
};
}),
enhancedWithTools: !!(result.toolsUsed && result.toolsUsed.length > 0),
analytics: result.analytics,
evaluation: result.evaluation,
audio: r