@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
598 lines • 29.4 kB
JavaScript
/**
* Generation Handler Module
*
* Handles text generation execution, result formatting, and tool information extraction.
* Extracted from BaseProvider to follow Single Responsibility Principle.
*
* Responsibilities:
* - Generation execution with AI SDK
* - Tool information extraction
* - Result formatting and enhancement
* - Response analysis and logging
*
* @module core/modules/GenerationHandler
*/
import { SpanKind, SpanStatusCode } from "@opentelemetry/api";
import { generateText, NoObjectGeneratedError, Output, stepCountIs } from "ai";
import { getModelId } from "../../providers/providerTypeUtils.js";
import { tracers } from "../../telemetry/tracers.js";
import { logger } from "../../utils/logger.js";
import { emitToolEndFromStepFinish } from "../../utils/toolEndEmitter.js";
import { calculateCost } from "../../utils/pricing.js";
import { withProviderRetry } from "../../utils/providerRetry.js";
import { calculateCacheSavingsPercent, extractCacheCreationTokens, extractCacheReadTokens, extractTokenUsage, } from "../../utils/tokenUtils.js";
import { DEFAULT_MAX_STEPS } from "../constants.js";
const genTracer = tracers.generation;
/**
* Safely preview-serialize a value for debug logging.
* Handles undefined, circular references, and non-serializable values.
*/
function safePreview(v) {
if (v === undefined) {
return "";
}
try {
const text = typeof v === "string" ? v : JSON.stringify(v);
return (text ?? "").substring(0, 200);
}
catch {
return "[unserializable]";
}
}
/**
* GenerationHandler class - Handles text generation operations for AI providers
*/
export class GenerationHandler {
providerName;
modelName;
supportsToolsFn;
getTelemetryConfigFn;
handleToolStorageFn;
getEmitterFn;
constructor(providerName, modelName, supportsToolsFn, getTelemetryConfigFn, handleToolStorageFn, getEmitterFn) {
this.providerName = providerName;
this.modelName = modelName;
this.supportsToolsFn = supportsToolsFn;
this.getTelemetryConfigFn = getTelemetryConfigFn;
this.handleToolStorageFn = handleToolStorageFn;
this.getEmitterFn = getEmitterFn;
}
/**
* Helper method to call generateText with optional structured output
* @private
*/
async callGenerateText(model, messages, tools, options, shouldUseTools, includeStructuredOutput) {
// Check if this is a Google provider (for provider-specific options)
const isGoogleProvider = this.providerName === "google-ai" || this.providerName === "vertex";
// Check if this is an Anthropic provider (includes Vertex+Claude)
const isAnthropicProvider = this.providerName === "anthropic" ||
this.providerName === "bedrock" ||
(this.providerName === "vertex" && this.modelName?.startsWith("claude-"));
// Gemini 2.5 and earlier cannot use tools + structured JSON output simultaneously.
// When both are requested on a Google provider, disable structured output (tools take priority).
const wantsStructuredOutput = includeStructuredOutput &&
(!!options.schema ||
options.output?.format === "json" ||
options.output?.format === "structured");
const useStructuredOutput = wantsStructuredOutput &&
!(isGoogleProvider && shouldUseTools && Object.keys(tools).length > 0);
// Annotate the last tool with cache_control so the full tool-definition
// block becomes a cache breakpoint for Anthropic-family providers.
// Non-Anthropic providers harmlessly ignore unknown providerOptions.
// Note: The AI SDK Tool type doesn't yet include providerOptions, so we
// use a type assertion. The Anthropic adapter reads this at runtime.
const toolsWithCache = { ...tools };
if (isAnthropicProvider &&
shouldUseTools &&
Object.keys(toolsWithCache).length > 0) {
const toolNames = Object.keys(toolsWithCache);
const lastToolName = toolNames[toolNames.length - 1];
if (lastToolName && toolsWithCache[lastToolName]) {
const lastTool = toolsWithCache[lastToolName];
toolsWithCache[lastToolName] = {
...lastTool,
providerOptions: {
...(lastTool.providerOptions ?? {}),
anthropic: { cacheControl: { type: "ephemeral" } },
},
};
}
}
const prepareStep = options.prepareStep;
return await generateText({
model,
messages,
...(shouldUseTools &&
Object.keys(toolsWithCache).length > 0 && { tools: toolsWithCache }),
stopWhen: stepCountIs(options.maxSteps ?? DEFAULT_MAX_STEPS),
...(shouldUseTools &&
options.toolChoice && { toolChoice: options.toolChoice }),
...(prepareStep && {
experimental_prepareStep: ((stepOptions) => prepareStep({
...stepOptions,
maxSteps: options.maxSteps ?? DEFAULT_MAX_STEPS,
})),
}),
temperature: options.temperature,
maxOutputTokens: options.maxTokens,
maxRetries: 0, // NL11: Disable AI SDK's invisible internal retries; we handle retries with OTel instrumentation
abortSignal: options.abortSignal,
...(useStructuredOutput &&
options.schema && {
experimental_output: Output.object({ schema: options.schema }),
}),
// Add thinking configuration for extended reasoning
// Gemini 3 models use providerOptions.google.thinkingConfig with thinkingLevel
// Gemini 2.5 models use thinkingBudget
// Anthropic models use experimental_thinking with budgetTokens
...(options.thinkingConfig?.enabled && {
// For Anthropic: experimental_thinking with budgetTokens
...(isAnthropicProvider &&
options.thinkingConfig.budgetTokens &&
!options.thinkingConfig.thinkingLevel && {
experimental_thinking: {
type: "enabled",
budgetTokens: options.thinkingConfig.budgetTokens,
},
}),
// For Google Gemini 3: providerOptions with thinkingLevel
// For Gemini 2.5: providerOptions with thinkingBudget
...(isGoogleProvider && {
providerOptions: {
google: {
thinkingConfig: {
...(options.thinkingConfig.thinkingLevel && {
thinkingLevel: options.thinkingConfig.thinkingLevel,
}),
...(options.thinkingConfig.budgetTokens &&
!options.thinkingConfig.thinkingLevel && {
thinkingBudget: options.thinkingConfig.budgetTokens,
}),
includeThoughts: true,
},
},
},
}),
}),
experimental_telemetry: this.getTelemetryConfigFn(options, "generate"),
onStepFinish: ({ toolCalls, toolResults }) => {
logger.info("Tool execution completed", { toolResults, toolCalls });
// Emit tool:end events for Pipeline B (metrics aggregator).
// This surfaces AI-SDK-driven tool completions as telemetry events
// so that tool spans are created even when the SDK runs tools
// internally (gaps G5 / S2).
emitToolEndFromStepFinish(this.getEmitterFn?.(), toolResults);
// Handle tool execution storage
this.handleToolStorageFn(toolCalls, toolResults, options, new Date()).catch((error) => {
logger.warn("[GenerationHandler] Failed to store tool executions", {
provider: this.providerName,
error: error instanceof Error ? error.message : String(error),
});
});
},
});
}
/**
* Execute the generation with AI SDK
*/
async executeGeneration(model, messages, tools, options) {
return genTracer.startActiveSpan("neurolink.executeGeneration", { kind: SpanKind.INTERNAL }, async (span) => {
const shouldUseTools = !options.disableTools && this.supportsToolsFn();
const toolCount = Object.keys(tools || {}).length;
const useStructuredOutput = !!options.schema ||
options.output?.format === "json" ||
options.output?.format === "structured";
span.setAttribute("gen_ai.system", this.providerName || "unknown");
span.setAttribute("neurolink.structured_output", useStructuredOutput);
span.setAttribute("neurolink.tool_count", toolCount);
span.setAttribute("neurolink.message_count", messages.length);
span.setAttribute("gen_ai.request.model", getModelId(model, this.modelName || "unknown"));
const requestId = options.requestId ||
options.context?.requestId ||
"unknown";
logger.info("[GenerationHandler] Calling generateText", {
requestId,
model: getModelId(model),
messageCount: messages.length,
toolCount,
maxSteps: options.maxSteps,
temperature: options.temperature,
});
if (logger.shouldLog("debug")) {
try {
logger.debug("[Observability] Full generateText parameters", {
requestId,
model: getModelId(model),
messageCount: messages.length,
messages: messages.map((msg, i) => ({
index: i,
role: msg.role,
contentLength: typeof msg.content === "string"
? msg.content.length
: safePreview(msg.content).length,
contentPreview: typeof msg.content === "string"
? msg.content.substring(0, 200)
: "[multimodal]",
})),
toolNames: Object.keys(tools || {}),
toolCount,
maxSteps: options.maxSteps,
temperature: options.temperature,
maxTokens: options.maxTokens,
});
}
catch {
// Ignore serialization errors in debug logging
}
}
const genStartTime = Date.now();
try {
const result = await withProviderRetry(() => this.callGenerateText(model, messages, tools, options, shouldUseTools, true), span, "generateText");
logger.info("[GenerationHandler] generateText returned", {
requestId,
durationMs: Date.now() - genStartTime,
finishReason: result.finishReason,
steps: result.steps?.length || 1,
toolCallsTotal: result.toolCalls?.length || 0,
responseChars: result.text?.length || 0,
});
if (logger.shouldLog("debug")) {
logger.debug("[Observability] LLM response metadata", {
requestId,
responseLength: result.text?.length || 0,
hasToolCalls: !!(result.toolCalls && result.toolCalls.length > 0),
toolCallCount: result.toolCalls?.length || 0,
toolNames: result.toolCalls?.map((tc) => tc.toolName),
finishReason: result.finishReason,
stepCount: result.steps?.length || 0,
steps: result.steps?.map((step, i) => ({
stepIndex: i,
stepType: step.stepType,
textLength: step.text?.length || 0,
toolCallCount: step.toolCalls?.length || 0,
toolNames: step.toolCalls?.map((tc) => tc.toolName),
toolResultCount: step.toolResults?.length || 0,
finishReason: step.finishReason,
})),
usage: result.usage,
});
}
// Set token usage and completion attributes on span
if (result.usage) {
span.setAttribute("gen_ai.usage.input_tokens", result.usage.inputTokens || 0);
span.setAttribute("gen_ai.usage.output_tokens", result.usage.outputTokens || 0);
// Cost on span so users can query "what did this trace cost?"
const cost = calculateCost(this.providerName, this.modelName, {
input: result.usage.inputTokens || 0,
output: result.usage.outputTokens || 0,
total: (result.usage.inputTokens || 0) +
(result.usage.outputTokens || 0),
});
span.setAttribute("neurolink.cost", cost ?? 0);
}
if (result.finishReason) {
span.setAttribute("gen_ai.response.finish_reason", result.finishReason);
}
span.setStatus({ code: SpanStatusCode.OK });
return result;
}
catch (error) {
// If NoObjectGeneratedError is thrown when using schema + tools together,
// fall back to generating without experimental_output and extract JSON manually
if (error instanceof NoObjectGeneratedError && useStructuredOutput) {
span.setAttribute("neurolink.has_fallback", true);
// NLK-GAP-007: Record initial failure event before fallback retry
span.addEvent("retry.initial_failure", {
"error.message": error.message,
"retry.attempt": 1,
"retry.reason": "NoObjectGeneratedError_structured_output_fallback",
});
logger.debug("[GenerationHandler] NoObjectGeneratedError caught - falling back to manual JSON extraction", {
provider: this.providerName,
model: this.modelName,
error: error.message,
});
// Retry without experimental_output - the formatEnhancedResult method
// will extract JSON from the text response
const result = await withProviderRetry(() => this.callGenerateText(model, messages, tools, options, shouldUseTools, false), span, "generateText(fallback)");
// NLK-GAP-007: Record recovery event after successful fallback
span.addEvent("retry.recovered", {
"retry.attempts": 2,
"retry.strategy": "structured_output_disabled",
});
span.setAttribute("retry.count", 1);
logger.info("[GenerationHandler] generateText returned (fallback)", {
requestId,
durationMs: Date.now() - genStartTime,
finishReason: result.finishReason,
steps: result.steps?.length || 1,
toolCallsTotal: result.toolCalls?.length || 0,
responseChars: result.text?.length || 0,
});
if (result.usage) {
span.setAttribute("gen_ai.usage.input_tokens", result.usage.inputTokens || 0);
span.setAttribute("gen_ai.usage.output_tokens", result.usage.outputTokens || 0);
const fallbackCost = calculateCost(this.providerName, this.modelName, {
input: result.usage.inputTokens || 0,
output: result.usage.outputTokens || 0,
total: (result.usage.inputTokens || 0) +
(result.usage.outputTokens || 0),
});
span.setAttribute("neurolink.cost", fallbackCost ?? 0);
}
if (result.finishReason) {
span.setAttribute("gen_ai.response.finish_reason", result.finishReason);
}
span.setStatus({ code: SpanStatusCode.OK });
return result;
}
span.setStatus({
code: SpanStatusCode.ERROR,
message: error instanceof Error ? error.message : String(error),
});
// Re-throw other errors
throw error;
}
finally {
span.end();
}
});
}
/**
* Extract cache metrics from provider metadata (e.g. Anthropic's providerMetadata.anthropic)
* The AI SDK's LanguageModelUsage only has inputTokens/outputTokens.
* Cache metrics are surfaced via providerMetadata by provider-specific SDK adapters.
*/
extractCacheMetricsFromProviderMetadata(generateResult) {
const providerMeta = generateResult.providerMetadata;
if (!providerMeta) {
return {};
}
// Anthropic surfaces cache metrics under providerMetadata.anthropic
const anthropicMeta = providerMeta.anthropic;
if (anthropicMeta) {
const cacheCreationTokens = extractCacheCreationTokens(anthropicMeta);
const cacheReadTokens = extractCacheReadTokens(anthropicMeta);
return {
...(cacheCreationTokens !== undefined && { cacheCreationTokens }),
...(cacheReadTokens !== undefined && { cacheReadTokens }),
};
}
return {};
}
/**
* Log generation completion information
*/
logGenerationComplete(generateResult) {
const cacheMetrics = this.extractCacheMetricsFromProviderMetadata(generateResult);
if (logger.shouldLog("debug")) {
logger.debug(`generateText completed`, {
provider: this.providerName,
model: this.modelName,
responseLength: generateResult.text?.length || 0,
toolResultsCount: generateResult.toolResults?.length || 0,
finishReason: generateResult.finishReason,
usage: generateResult.usage,
...(cacheMetrics.cacheCreationTokens !== undefined && {
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
}),
...(cacheMetrics.cacheReadTokens !== undefined && {
cacheReadTokens: cacheMetrics.cacheReadTokens,
}),
timestamp: Date.now(),
});
}
}
/**
* Extract tool information from generation result
*/
extractToolInformation(generateResult) {
const toolsUsed = [];
const toolExecutions = [];
// Extract tool names from tool calls
if (generateResult.toolCalls && generateResult.toolCalls.length > 0) {
toolsUsed.push(...generateResult.toolCalls.map((tc) => {
return tc.toolName || tc.name || "unknown";
}));
}
// Extract from steps
if (generateResult.steps &&
Array.isArray(generateResult.steps)) {
const toolCallArgsMap = new Map();
for (const step of generateResult
.steps || []) {
// Collect tool calls and their arguments
if (step?.toolCalls && Array.isArray(step.toolCalls)) {
for (const toolCall of step.toolCalls) {
const tcRecord = toolCall;
const toolName = tcRecord.toolName ||
tcRecord.name ||
"unknown";
const toolId = tcRecord.toolCallId ||
tcRecord.id ||
toolName;
toolsUsed.push(toolName);
let callArgs = {};
if (tcRecord.args) {
callArgs = tcRecord.args;
}
else if (tcRecord.arguments) {
callArgs = tcRecord.arguments;
}
else if (tcRecord.parameters) {
callArgs = tcRecord.parameters;
}
toolCallArgsMap.set(toolId, callArgs);
toolCallArgsMap.set(toolName, callArgs);
}
}
// Process tool results
if (step?.toolResults && Array.isArray(step.toolResults)) {
for (const toolResult of step.toolResults) {
const trRecord = toolResult;
const toolName = trRecord.toolName || "unknown";
const toolId = trRecord.toolCallId || trRecord.id;
const toolArgs = trRecord.args ??
trRecord.arguments ??
trRecord.parameters ??
trRecord.input ??
toolCallArgsMap.get(toolId || toolName) ??
{};
toolExecutions.push({
name: toolName,
input: toolArgs,
output: (trRecord.output ?? trRecord.result) ?? "success",
});
}
}
}
}
return { toolsUsed: [...new Set(toolsUsed)], toolExecutions };
}
/**
* Format the enhanced result
*/
formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options) {
// Structured output check — schema alone is sufficient to activate
const useStructuredOutput = !!options.schema ||
options.output?.format === "json" ||
options.output?.format === "structured";
let content;
if (useStructuredOutput) {
try {
const experimentalOutput = generateResult.experimental_output;
if (experimentalOutput !== undefined) {
content = JSON.stringify(experimentalOutput);
}
else {
// Fall back to text parsing
const rawText = generateResult.text || "";
const strippedText = rawText
.replace(/^```(?:json)?\s*\n?/i, "")
.replace(/\n?```\s*$/i, "")
.trim();
content = strippedText;
}
}
catch (outputError) {
// experimental_output is a getter that can throw NoObjectGeneratedError
// Fall back to text parsing when structured output fails
logger.debug("[GenerationHandler] experimental_output threw, falling back to text parsing", {
error: outputError instanceof Error
? outputError.message
: String(outputError),
});
const rawText = generateResult.text || "";
const strippedText = rawText
.replace(/^```(?:json)?\s*\n?/i, "")
.replace(/\n?```\s*$/i, "")
.trim();
content = strippedText;
}
}
else {
content = generateResult.text;
}
// Extract usage with support for different formats and reasoning tokens
// Note: The AI SDK bundles thinking tokens into promptTokens for Google models.
// Separate reasoningTokens tracking will work when/if the AI SDK adds support.
const usage = extractTokenUsage(generateResult.usage);
// Merge cache metrics from providerMetadata if not already present in usage
// The AI SDK's LanguageModelUsage doesn't include cache tokens; they come from
// provider-specific metadata (e.g. Anthropic's providerMetadata.anthropic)
if (usage.cacheCreationTokens === undefined ||
usage.cacheReadTokens === undefined) {
const cacheMetrics = this.extractCacheMetricsFromProviderMetadata(generateResult);
if (usage.cacheCreationTokens === undefined &&
cacheMetrics.cacheCreationTokens !== undefined) {
usage.cacheCreationTokens = cacheMetrics.cacheCreationTokens;
}
if (usage.cacheReadTokens === undefined &&
cacheMetrics.cacheReadTokens !== undefined) {
usage.cacheReadTokens = cacheMetrics.cacheReadTokens;
}
// Recalculate cache savings if we added cache metrics
if (usage.cacheReadTokens !== undefined) {
const savingsPercent = calculateCacheSavingsPercent(usage.cacheReadTokens, usage.input);
if (savingsPercent !== undefined) {
usage.cacheSavingsPercent = savingsPercent;
}
}
}
// Extract reasoning from AI SDK response (Anthropic thinking, Gemini thought, OpenAI o1)
// Handle both string and array (AI SDK v5 returns string, v6 returns ReasoningOutput[])
const rawReasoning = generateResult.reasoning;
const reasoning = rawReasoning
? typeof rawReasoning === "string"
? rawReasoning
: Array.isArray(rawReasoning)
? rawReasoning
// eslint-disable-next-line @typescript-eslint/no-explicit-any
.map((r) => typeof r === "string" ? r : (r.text ?? JSON.stringify(r)))
.join("\n")
: String(rawReasoning)
: undefined;
const reasoningTokens = usage.reasoning ?? undefined;
return {
content,
usage,
finishReason: generateResult.finishReason,
provider: this.providerName,
model: this.modelName,
reasoning,
reasoningTokens,
toolCalls: generateResult.toolCalls
? generateResult.toolCalls.map((tc) => ({
toolCallId: tc.toolCallId || "unknown",
toolName: tc.toolName || "unknown",
args: tc.args || {},
}))
: [],
toolResults: generateResult.toolResults ?? [],
toolsUsed,
toolExecutions,
availableTools: Object.keys(tools).map((name) => {
const tool = tools[name];
return {
name,
description: tool.description || "No description available",
parameters: tool.inputSchema || {},
server: tool.serverId || "direct",
};
}),
};
}
/**
* Analyze AI response structure and log detailed debugging information
*/
analyzeAIResponse(result) {
logger.debug("NeuroLink Raw AI Response Analysis", {
provider: this.providerName,
model: this.modelName,
responseTextLength: result.text?.length || 0,
responsePreview: result.text?.substring(0, 500) ?? "",
finishReason: result.finishReason,
usage: result.usage,
});
// Tool calls analysis
const toolCallsAnalysis = {
hasToolCalls: !!result.toolCalls,
toolCallsLength: result.toolCalls?.length || 0,
toolCalls: result.toolCalls?.map((toolCall, index) => {
const tcRecord = toolCall;
const toolName = tcRecord.toolName || tcRecord.name || "unknown";
return {
index: index + 1,
toolName,
toolId: tcRecord.toolCallId || tcRecord.id || "none",
hasArgs: !!tcRecord.args,
argsKeys: tcRecord.args && typeof tcRecord.args === "object"
? Object.keys(tcRecord.args)
: [],
};
}) || [],
};
logger.debug("Tool Calls Analysis", toolCallsAnalysis);
}
}
//# sourceMappingURL=GenerationHandler.js.map