@vfarcic/dot-ai
Version:
AI-powered development productivity platform that enhances software development workflows through intelligent automation and AI-driven assistance
796 lines (795 loc) • 43.7 kB
JavaScript
"use strict";
/**
* Vercel AI Provider Implementation
*
* Implements AIProvider interface using Vercel AI SDK.
* Supports OpenAI and Google Gemini providers through unified interface.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.VercelProvider = void 0;
const ai_1 = require("ai");
const openai_1 = require("@ai-sdk/openai");
const openai_compatible_1 = require("@ai-sdk/openai-compatible");
const google_1 = require("@ai-sdk/google");
const anthropic_1 = require("@ai-sdk/anthropic");
const xai_1 = require("@ai-sdk/xai");
const alibaba_1 = require("@ai-sdk/alibaba");
const amazon_bedrock_1 = require("@ai-sdk/amazon-bedrock");
const ai_sdk_provider_1 = require("@openrouter/ai-sdk-provider");
const constants_1 = require("../constants");
const provider_debug_utils_1 = require("./provider-debug-utils");
const model_config_1 = require("../model-config");
const investigation_1 = require("../constants/investigation");
const ai_tracing_1 = require("../tracing/ai-tracing");
const ai_retry_config_1 = require("../ai-retry-config");
const copilot_token_exchanger_1 = require("./copilot-token-exchanger");
// Get all supported provider keys dynamically from CURRENT_MODELS
const SUPPORTED_PROVIDERS = Object.keys(model_config_1.CURRENT_MODELS);
class VercelProvider {
providerType;
model;
apiKey;
debugMode;
baseURL; // PRD #194: Custom endpoint URL for OpenAI-compatible APIs
customHeaders; // PRD #443: Custom HTTP headers
modelInstance; // Vercel AI SDK model instance - initialized by initializeModel()
constructor(config) {
this.apiKey = config.apiKey;
this.providerType = config.provider;
this.model = config.model || this.getDefaultModel();
this.debugMode = config.debugMode ?? process.env.DEBUG_DOT_AI === 'true';
this.baseURL = config.baseURL; // PRD #194: Store custom endpoint URL
this.customHeaders = config.customHeaders; // PRD #443: Store custom headers
this.validateConfiguration();
this.initializeModel();
}
validateConfiguration() {
// Copilot resolves its credential from the env chain at fetch time — no apiKey required.
if (!this.apiKey && this.providerType !== 'copilot') {
throw new Error(constants_1.AI_SERVICE_ERROR_TEMPLATES.API_KEY_REQUIRED(this.providerType));
}
if (!SUPPORTED_PROVIDERS.includes(this.providerType)) {
throw new Error(constants_1.AI_SERVICE_ERROR_TEMPLATES.UNSUPPORTED_PROVIDER(this.providerType, SUPPORTED_PROVIDERS));
}
}
initializeModel() {
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Vercel AI SDK provider types vary
let provider;
// PRD #443: Helper to build merged headers (provider defaults + custom headers)
// Custom headers take precedence over provider defaults via spread order
const mergeHeaders = (providerDefaults) => {
if (!providerDefaults && !this.customHeaders)
return undefined;
return {
...providerDefaults,
...this.customHeaders,
};
};
// PRD #443: Helper for optional baseURL spread
const baseURLOpt = this.baseURL ? { baseURL: this.baseURL } : {};
switch (this.providerType) {
case 'openai':
provider = (0, openai_1.createOpenAI)({
apiKey: this.apiKey,
...baseURLOpt,
...(mergeHeaders() && { headers: mergeHeaders() }),
});
break;
case 'google':
case 'google_flash': // PRD #294: Gemini 3 Flash variant
provider = (0, google_1.createGoogleGenerativeAI)({
apiKey: this.apiKey,
...baseURLOpt,
...(mergeHeaders() && { headers: mergeHeaders() }),
});
break;
case 'anthropic':
case 'anthropic_opus':
case 'anthropic_haiku': {
// Detect Authorization header in custom headers (case-insensitive).
// Corporate proxies expect Authorization: Bearer auth, but apiKey sends x-api-key.
// When Authorization is present, extract the Bearer token and pass it as authToken
// so the SDK sends Authorization: Bearer instead of x-api-key.
const authHeaderKey = this.customHeaders
? Object.keys(this.customHeaders).find(key => key.toLowerCase() === 'authorization')
: undefined;
const authOpt = authHeaderKey
? {
authToken: this.customHeaders[authHeaderKey].replace(/^Bearer\s+/i, ''),
}
: { apiKey: this.apiKey };
// Strip Authorization from custom headers when using authToken (SDK generates it)
const filteredCustomHeaders = authHeaderKey && this.customHeaders
? Object.fromEntries(Object.entries(this.customHeaders).filter(([key]) => key.toLowerCase() !== 'authorization'))
: this.customHeaders;
provider = (0, anthropic_1.createAnthropic)({
...authOpt,
...baseURLOpt,
...(filteredCustomHeaders && { headers: filteredCustomHeaders }),
});
break;
}
case 'xai':
provider = (0, xai_1.createXai)({
apiKey: this.apiKey,
...baseURLOpt,
...(mergeHeaders() && { headers: mergeHeaders() }),
});
break;
case 'alibaba':
// PRD #382: Alibaba Qwen 3.5 Plus - uses @ai-sdk/alibaba dedicated SDK
// Note: @ai-sdk/alibaba does not accept baseURL (PRD #443 only covers Anthropic/OpenAI/Google/xAI)
provider = (0, alibaba_1.createAlibaba)({
apiKey: this.apiKey,
...(mergeHeaders() && { headers: mergeHeaders() }),
});
break;
case 'kimi':
// PRD #353: Moonshot AI Kimi K2.5 - uses @ai-sdk/openai-compatible for proper
// reasoning_content preservation in multi-turn tool calling
// Use global endpoint (api.moonshot.ai) - China endpoint (api.moonshot.cn) requires China-specific API keys
provider = (0, openai_compatible_1.createOpenAICompatible)({
name: 'kimi',
apiKey: this.apiKey,
baseURL: 'https://api.moonshot.ai/v1',
...(mergeHeaders() && { headers: mergeHeaders() }),
});
this.modelInstance = provider.chatModel(this.model);
return; // Early return - model instance already set
case 'amazon_bedrock':
// PRD #175: Amazon Bedrock provider
// AWS SDK automatically uses credential chain:
// 1. Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION)
// 2. ~/.aws/credentials file
// 3. IAM roles (EC2 instance profiles, ECS roles, EKS service accounts)
// Note: Custom headers not supported - AWS SDK handles auth via credential chain
provider = (0, amazon_bedrock_1.createAmazonBedrock)({
region: process.env.AWS_REGION || 'us-east-1',
});
break;
case 'openrouter':
// PRD #194: OpenRouter custom endpoint support
// Use dedicated OpenRouter provider for proper format conversion
provider = (0, ai_sdk_provider_1.createOpenRouter)({
apiKey: this.apiKey,
...baseURLOpt,
...(mergeHeaders() && { headers: mergeHeaders() }),
});
break;
case 'custom':
// PRD #194: Generic custom endpoint support for OpenAI-compatible APIs
// For non-OpenRouter custom endpoints (Ollama, vLLM, LiteLLM, etc.)
if (!this.baseURL) {
throw new Error('Custom endpoint requires CUSTOM_LLM_BASE_URL to be set');
}
provider = (0, openai_1.createOpenAI)({
apiKey: this.apiKey,
baseURL: this.baseURL,
...(mergeHeaders() && { headers: mergeHeaders() }),
});
// Use .chat() explicitly for custom endpoints to use /chat/completions instead of /responses
this.modelInstance = provider.chat(this.model);
return; // Early return - model instance already set
case 'copilot': {
// PRD #587: GitHub Copilot provider
// Uses the raw GitHub token (gho_*, github_pat_*, ghu_*) directly as a
// Bearer credential against api.githubcopilot.com — no token-exchange step.
//
// Routing (mirrors Hermes Agent):
// - Claude model IDs (claude-*) → createAnthropic at githubcopilot.com
// because the Copilot OpenAI-compat non-streaming response for Claude omits
// the "index" field that @ai-sdk/openai requires, causing parse failures.
// - All other models → createOpenAI at githubcopilot.com (OpenAI-compat path)
//
// Model IDs must use dot notation matching the Copilot catalog
// (e.g. claude-sonnet-4.6, NOT claude-sonnet-4-6).
// On 401: re-resolve credentials from the env chain and retry once.
const resolver = (0, copilot_token_exchanger_1.makeCopilotCredentialResolver)(this.apiKey);
// These headers were captured from VS Code Copilot Chat network traffic.
// They are required by api.githubcopilot.com to accept the request —
// the endpoint validates the Integration-Id and Editor-Version before routing.
// Future maintainers: if GitHub changes the required headers, update here.
const copilotHeaders = {
'Copilot-Integration-Id': 'vscode-chat',
'Editor-Version': 'vscode/1.104.1',
'Openai-Intent': 'conversation-edits',
'x-initiator': 'user',
};
const COPILOT_FETCH_TIMEOUT_MS = 30000; // 30s — matches git-utils.ts fetchWithTimeout
const copilotFetch = async (url, init) => {
const token = resolver.resolve();
const headers = new Headers(init?.headers);
headers.set('Authorization', `Bearer ${token}`);
for (const [k, v] of Object.entries(copilotHeaders)) {
headers.set(k, v);
}
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), COPILOT_FETCH_TIMEOUT_MS);
let response;
try {
response = await fetch(url, { ...init, headers, signal: controller.signal });
}
finally {
clearTimeout(timeoutId);
}
if (response.status === 401) {
// Drain body to allow connection reuse before retrying
await response.text().catch(() => { });
// Re-resolve from env chain (credentials may have been refreshed externally)
const freshToken = resolver.resolve();
// Build fresh headers for retry — do not mutate the first-attempt object
const retryHeaders = new Headers(init?.headers);
retryHeaders.set('Authorization', `Bearer ${freshToken}`);
for (const [k, v] of Object.entries(copilotHeaders)) {
retryHeaders.set(k, v);
}
const retryController = new AbortController();
const retryTimeoutId = setTimeout(() => retryController.abort(), COPILOT_FETCH_TIMEOUT_MS);
try {
return await fetch(url, { ...init, headers: retryHeaders, signal: retryController.signal });
}
finally {
clearTimeout(retryTimeoutId);
}
}
return response;
};
const isClaudeModel = this.model.startsWith('claude-');
if (isClaudeModel) {
// Use Anthropic SDK routed through the Copilot endpoint.
// baseURL must include /v1 — the SDK appends /messages, so the
// final URL is https://api.githubcopilot.com/v1/messages.
const anthropicProvider = (0, anthropic_1.createAnthropic)({
baseURL: 'https://api.githubcopilot.com/v1',
apiKey: 'unused', // required by SDK but overridden by copilotFetch
fetch: copilotFetch,
});
this.modelInstance = anthropicProvider(this.model);
}
else {
provider = (0, openai_1.createOpenAI)({
apiKey: 'unused',
baseURL: 'https://api.githubcopilot.com',
fetch: copilotFetch,
});
this.modelInstance = provider.chat(this.model);
}
return; // Early return - model instance already set
}
default:
throw new Error(`Cannot initialize model for provider: ${this.providerType}`);
}
this.modelInstance = provider(this.model);
}
catch (error) {
throw new Error(`Failed to initialize ${this.providerType} model: ${error}`, { cause: error });
}
}
getProviderType() {
return this.providerType;
}
getDefaultModel() {
return model_config_1.CURRENT_MODELS[this.providerType];
}
getModelName() {
return this.model;
}
isInitialized() {
return this.modelInstance !== undefined;
}
logDebugIfEnabled(operation, prompt, response) {
if (!this.debugMode)
return null;
const debugId = (0, provider_debug_utils_1.generateDebugId)(operation);
(0, provider_debug_utils_1.debugLogInteraction)(debugId, prompt, response, operation, this.getProviderType(), this.model, this.debugMode);
// Return the actual debug file names created
return {
promptFile: `${debugId}_prompt.md`,
responseFile: `${debugId}_response.md`,
};
}
async sendMessage(message, operation = 'generic', evaluationContext) {
if (!this.isInitialized()) {
throw new Error(`${this.providerType} provider not initialized`);
}
return await (0, ai_tracing_1.withAITracing)({
provider: this.providerType,
model: this.model,
operation: 'chat',
}, async () => {
const startTime = Date.now();
try {
// Use Vercel AI SDK generateText
// Note: maxOutputTokens not specified - provider will use model's natural maximum
const result = await (0, ai_1.generateText)({
model: this.modelInstance,
prompt: message,
// Configurable retry budget; chat defaults to the SDK's value.
maxRetries: (0, ai_retry_config_1.getMaxRetries)('chat'),
});
const response = {
content: result.text,
usage: {
input_tokens: (result.totalUsage || result.usage).inputTokens || 0,
output_tokens: (result.totalUsage || result.usage).outputTokens || 0,
},
};
const durationMs = Date.now() - startTime;
// Debug log the interaction if enabled
if (this.debugMode) {
const debugId = (0, provider_debug_utils_1.generateDebugId)(operation);
(0, provider_debug_utils_1.debugLogInteraction)(debugId, message, response, operation, this.getProviderType(), this.model, this.debugMode);
// PRD #154: Always use new evaluation dataset system
const evaluationMetrics = {
// Core execution data
operation,
sdk: this.getProviderType(),
inputTokens: response.usage.input_tokens,
outputTokens: response.usage.output_tokens,
durationMs,
// Required fields
iterationCount: 1,
toolCallCount: 0,
status: 'completed',
completionReason: 'stop',
modelVersion: this.model,
// Required evaluation context - NO DEFAULTS, must be provided
test_scenario: operation,
ai_response_summary: response.content,
user_intent: evaluationContext?.user_intent || '',
interaction_id: evaluationContext?.interaction_id || '',
// Optional performance data
...(response.usage.cache_creation_input_tokens && {
cacheCreationTokens: response.usage.cache_creation_input_tokens,
}),
...(response.usage.cache_read_input_tokens && {
cacheReadTokens: response.usage.cache_read_input_tokens,
}),
};
// Calculate cache hit rate if applicable
if (response.usage.cache_read_input_tokens &&
response.usage.input_tokens > 0) {
evaluationMetrics.cacheHitRate = Math.round((response.usage.cache_read_input_tokens /
response.usage.input_tokens) *
100);
}
(0, provider_debug_utils_1.logEvaluationDataset)(evaluationMetrics, this.debugMode);
}
return response;
}
catch (error) {
// Log the prompt that caused the error for debugging
if (this.debugMode) {
const debugId = (0, provider_debug_utils_1.generateDebugId)(operation);
(0, provider_debug_utils_1.debugLogPromptOnly)(debugId, message, operation, this.getProviderType(), this.model, this.debugMode);
}
// Generate dataset for failed AI interaction
if (this.debugMode && evaluationContext) {
const failureMetrics = {
operation,
user_intent: evaluationContext.user_intent || '',
ai_response_summary: `Error: ${error instanceof Error ? error.message : String(error)}`,
durationMs: Date.now() - startTime,
inputTokens: 0,
outputTokens: 0,
iterationCount: 0,
toolCallCount: 0,
status: 'failed',
completionReason: 'error',
sdk: this.getProviderType(),
modelVersion: this.model,
test_scenario: operation,
interaction_id: evaluationContext.interaction_id || (0, provider_debug_utils_1.generateDebugId)(operation),
failure_analysis: {
failure_type: 'error',
failure_reason: `${this.providerType} API error: ${error instanceof Error ? error.message : String(error)}`,
time_to_failure: Date.now() - startTime,
},
};
(0, provider_debug_utils_1.logEvaluationDataset)(failureMetrics, this.debugMode);
}
throw new Error(`${this.providerType} API error: ${error}`, {
cause: error,
});
}
}, (response) => ({
inputTokens: response.usage.input_tokens,
outputTokens: response.usage.output_tokens,
cacheReadTokens: response.usage.cache_read_input_tokens,
cacheCreationTokens: response.usage.cache_creation_input_tokens,
}));
}
/**
* Agentic tool loop using Vercel AI SDK
*
* Implements multi-turn tool calling using generateText with maxSteps.
* The Vercel AI SDK handles the conversation loop automatically.
*
* Provider-specific caching:
* - Anthropic: Manual cache control via providerOptions
* - OpenAI: Automatic caching (no code changes needed)
* - Google: Check Gemini caching capabilities
*
* See PRD #143 Milestone 2.5 for Vercel provider implementation details.
*/
async toolLoop(config) {
if (!this.isInitialized()) {
throw new Error(`${this.providerType} provider not initialized`);
}
return await (0, ai_tracing_1.withAITracing)({
provider: this.providerType,
model: this.model,
operation: 'tool_loop',
}, async () => {
const startTime = Date.now();
const maxIterations = config.maxIterations || 20;
const operation = config.operation || 'tool-loop';
// Convert AITool[] to Vercel AI SDK tool format
const tools = {};
// PRD #320: Capture tool calls during execution (not reconstruction from steps)
// This ensures toolCallsExecuted has actual data for visualization
const toolCallsExecuted = [];
for (let i = 0; i < config.tools.length; i++) {
const aiTool = config.tools[i];
const isLastTool = i === config.tools.length - 1;
const toolDef = (0, ai_1.tool)({
description: aiTool.description,
inputSchema: (0, ai_1.jsonSchema)(aiTool.inputSchema),
execute: async (input) => {
// Execute and capture result
const output = await config.toolExecutor(aiTool.name, input);
// Capture for toolCallsExecuted array
toolCallsExecuted.push({
tool: aiTool.name,
input,
output,
});
return output;
},
});
// Add cache control ONLY to last tool for Anthropic (max 4 cache breakpoints)
// This caches the system prompt + all tools together
if ((this.providerType === 'anthropic' ||
this.providerType === 'anthropic_opus' ||
this.providerType === 'anthropic_haiku') &&
isLastTool) {
toolDef.providerOptions = {
anthropic: {
cacheControl: { type: 'ephemeral' },
},
};
}
// TODO: Check if Google Gemini supports caching in future SDK versions
// Google Gemini may have caching capabilities - research providerOptions.google syntax
// if (this.providerType === 'google' && isLastTool) {
// (toolDef as any).providerOptions = {
// google: { /* caching config if available */ }
// };
// }
tools[aiTool.name] = toolDef;
}
// Build messages array with system prompt caching for Anthropic
// Anthropic caching requires system messages in messages array with providerOptions
const messages = [];
let systemParam;
if (this.providerType === 'anthropic' ||
this.providerType === 'anthropic_opus' ||
this.providerType === 'anthropic_haiku') {
// For Anthropic: Put system in messages array with cacheControl
messages.push({
role: 'system',
content: config.systemPrompt,
providerOptions: {
anthropic: {
cacheControl: { type: 'ephemeral' },
},
},
});
// Don't use system parameter for Anthropic when caching
systemParam = undefined;
}
else {
// For OpenAI/Google: Use system parameter (string)
systemParam = config.systemPrompt;
}
// Add user message
messages.push({
role: 'user',
content: config.userMessage,
});
// TODO: Check if Google Gemini supports system prompt caching in future SDK versions
// if (this.providerType === 'google') {
// messages.unshift({
// role: 'system',
// content: config.systemPrompt,
// providerOptions: {
// google: { /* caching config if available */ }
// }
// });
// systemParam = undefined;
// }
try {
// Use Vercel AI SDK's generateText with stopWhen for automatic loop
// Default is stepCountIs(1) - we need to increase for multi-step investigation
// Note: maxOutputTokens not specified - provider will use model's natural maximum
const generateConfig = {
model: this.modelInstance,
messages,
tools,
stopWhen: (0, ai_1.stepCountIs)(maxIterations),
// Configurable retry budget; tool-loop steps use the SDK default.
maxRetries: (0, ai_retry_config_1.getMaxRetries)('tool_loop'),
};
// Add system parameter for non-Anthropic providers
if (systemParam) {
generateConfig.system = systemParam;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Vercel AI SDK types are complex, use any for compatibility
const result = await (0, ai_1.generateText)(generateConfig);
// Log raw response immediately after generation (before any processing)
let debugFiles = null;
if (this.debugMode) {
// Build the full conversation context like Anthropic provider does
let finalPrompt = `System: ${config.systemPrompt}\n\n`;
// Always include the original user intent first
finalPrompt += `user: ${config.userMessage}\n\n`;
// Then add the conversation history if available
if (result.response?.messages) {
finalPrompt += result.response.messages
.map(msg => {
if (typeof msg.content === 'string') {
return `${msg.role}: ${msg.content}`;
}
else if (Array.isArray(msg.content)) {
const contentParts = msg.content
.map(part => {
const typedPart = part;
if (typedPart.type === 'text') {
return typedPart.text;
}
else if (typedPart.type === 'tool-call') {
return `[TOOL_USE: ${typedPart.toolName}]`;
}
else if (typedPart.type === 'tool-result') {
const resultData = typedPart.output ||
typedPart.result ||
typedPart.content;
if (typeof resultData === 'string') {
return `[TOOL_RESULT: ${typedPart.toolName}]\n${resultData}`;
}
else if (resultData) {
return `[TOOL_RESULT: ${typedPart.toolName}]\n${JSON.stringify(resultData, null, 2)}`;
}
return `[TOOL_RESULT: ${typedPart.toolName}]`;
}
return `[${typedPart.type}]`;
})
.join(' ');
return `${msg.role}: ${contentParts}`;
}
return `${msg.role}: [complex_content]`;
})
.join('\n\n');
}
// Create raw response content that includes ALL data from result
let rawResponseContent = `# RAW RESPONSE DATA\n\n`;
rawResponseContent += `**result.text**: ${result.text || '[EMPTY]'}\n\n`;
if (result.steps && result.steps.length > 0) {
rawResponseContent += `**Steps (${result.steps.length})**:\n`;
result.steps.forEach((step, i) => {
rawResponseContent += `\nStep ${i + 1}:\n`;
rawResponseContent += `- text: ${step.text || '[EMPTY]'}\n`;
if (step.toolCalls) {
rawResponseContent += `- toolCalls: ${step.toolCalls.length}\n`;
}
if (step.toolResults) {
rawResponseContent += `- toolResults: ${step.toolResults.length}\n`;
}
});
rawResponseContent += '\n';
}
// Add the last step's text for easy access
let lastStepText = '';
if (result.steps && result.steps.length > 0) {
for (let i = result.steps.length - 1; i >= 0; i--) {
if (result.steps[i].text && result.steps[i].text.trim()) {
lastStepText = result.steps[i].text;
break;
}
}
}
rawResponseContent += `**Last step with text**: ${lastStepText || '[NONE]'}\n\n`;
const usage = result.totalUsage || result.usage;
const rawAiResponse = {
content: rawResponseContent,
usage: {
input_tokens: usage.inputTokens || 0,
output_tokens: usage.outputTokens || 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
},
};
debugFiles = this.logDebugIfEnabled(`${operation}-raw`, finalPrompt, rawAiResponse);
}
// PRD #320: toolCallsExecuted is now captured during execution (see tool wrapper above)
// This replaces the old reconstruction from result.steps which lost input/output data
// Normalize token metrics across providers
// NOTE: Vercel AI SDK had token reporting bugs that were fixed in PR #8945 (merged Sept 26, 2025)
// - GitHub Issue #8349: cache tokens only reflected last step, not summed across all steps
// - GitHub Issue #8795: Token reporting issues with Anthropic provider (streaming)
// Our version (5.0.60, released Oct 2, 2025) includes these fixes.
// However, testing still shows ~70% fewer tokens reported vs Anthropic native SDK.
// Root cause: We were using result.usage (final step only) instead of result.totalUsage (sum of all steps)!
const usage = result.totalUsage || result.usage;
const typedUsage = usage;
let cacheReadTokens = 0;
let cacheCreationTokens = 0;
// Anthropic via Vercel uses cachedInputTokens (confirmed in AI SDK 5+)
if (typedUsage.cachedInputTokens) {
cacheReadTokens = typedUsage.cachedInputTokens;
}
// OpenAI uses cached_tokens or cachedTokens (automatic caching, no config needed)
if (typedUsage.cachedTokens || typedUsage.cached_tokens) {
cacheReadTokens =
typedUsage.cachedTokens || typedUsage.cached_tokens || 0;
}
// Anthropic native SDK uses separate cache_creation and cache_read fields
if (typedUsage.cache_creation_input_tokens) {
cacheCreationTokens = typedUsage.cache_creation_input_tokens;
}
if (typedUsage.cache_read_input_tokens) {
cacheReadTokens = typedUsage.cache_read_input_tokens;
}
// TODO: Check if Google Gemini reports cache metrics in future SDK versions
// Google Gemini may return cache-related metrics - check usage object structure
// Possible fields: cachedTokens, cacheHits, or provider-specific naming
// Add normalization logic here when Gemini caching is confirmed
// Extract final text from the last step (result.text might be empty if last step had tool calls)
let finalText = result.text;
if (!finalText || finalText.trim().length === 0) {
// If result.text is empty, find the last text response from steps
for (let i = (result.steps || []).length - 1; i >= 0; i--) {
const step = result.steps[i];
if (step.text && step.text.trim().length > 0) {
finalText = step.text;
break;
}
}
}
// Check if we hit max iterations without a proper summary
// If so, make one final wrap-up call WITHOUT tools to force summary generation
const stepsUsed = result.steps?.length || 0;
const hasProperSummary = finalText && finalText.includes('{') && finalText.includes('}');
if (stepsUsed >= maxIterations && !hasProperSummary) {
try {
// Build wrap-up messages with full conversation history
const wrapUpMessages = [];
// Add system message for Anthropic providers
if (this.providerType === 'anthropic' ||
this.providerType === 'anthropic_opus' ||
this.providerType === 'anthropic_haiku') {
wrapUpMessages.push({
role: 'system',
content: config.systemPrompt,
});
}
// Add original user message
wrapUpMessages.push({
role: 'user',
content: config.userMessage,
});
// Add conversation history from steps
for (const step of result.steps || []) {
if (step.text) {
wrapUpMessages.push({
role: 'assistant',
content: step.text,
});
}
// Add tool results as user messages
for (const toolResult of step.toolResults || []) {
const typedToolResult = toolResult;
wrapUpMessages.push({
role: 'user',
content: `Tool result from ${typedToolResult.toolName}: ${JSON.stringify(typedToolResult.output || toolResult)}`,
});
}
}
// Add wrap-up instruction
wrapUpMessages.push({
role: 'user',
content: investigation_1.INVESTIGATION_MESSAGES.WRAP_UP,
});
// Make final call WITHOUT tools
const wrapUpConfig = {
model: this.modelInstance,
messages: wrapUpMessages,
// NO tools - forces text response
// Configurable retry budget; wrap-up fails fast by default.
maxRetries: (0, ai_retry_config_1.getMaxRetries)('wrap_up'),
};
// Add system parameter for non-Anthropic providers
if (this.providerType !== 'anthropic' &&
this.providerType !== 'anthropic_opus' &&
this.providerType !== 'anthropic_haiku') {
wrapUpConfig.system = config.systemPrompt;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Vercel AI SDK types are complex, use any for compatibility
const wrapUpResult = await (0, ai_1.generateText)(wrapUpConfig);
finalText = wrapUpResult.text || finalText;
}
catch (wrapUpError) {
// If wrap-up fails, continue with whatever we have
console.warn('Wrap-up call failed, using existing response:', wrapUpError);
}
}
// Log processed summary response (keep existing functionality)
if (this.debugMode && debugFiles === null) {
// Only log summary if we haven't already logged raw response
const finalPrompt = `System: ${config.systemPrompt}\n\nuser: ${config.userMessage}`;
const aiResponse = {
content: finalText || '',
usage: {
input_tokens: usage.inputTokens || 0,
output_tokens: usage.outputTokens || 0,
cache_creation_input_tokens: cacheCreationTokens,
cache_read_input_tokens: cacheReadTokens,
},
};
debugFiles = this.logDebugIfEnabled(`${operation}-summary`, finalPrompt, aiResponse);
}
return (0, provider_debug_utils_1.createAndLogAgenticResult)({
finalMessage: finalText || '',
iterations: result.steps?.length || 1,
toolCallsExecuted,
totalTokens: {
input: usage.inputTokens || 0,
output: usage.outputTokens || 0,
cacheCreation: cacheCreationTokens,
cacheRead: cacheReadTokens,
},
status: 'success',
completionReason: 'investigation_complete',
modelVersion: this.model,
operation: `${operation}-summary`,
sdk: this.getProviderType(),
startTime,
debugMode: this.debugMode,
debugFiles,
evaluationContext: config.evaluationContext,
interaction_id: config.interaction_id,
});
}
catch (error) {
// Return error result with extended metrics
return (0, provider_debug_utils_1.createAndLogAgenticResult)({
finalMessage: `Error during investigation: ${error instanceof Error ? error.message : String(error)}`,
iterations: 0,
toolCallsExecuted: [],
totalTokens: {
input: 0,
output: 0,
cacheCreation: 0,
cacheRead: 0,
},
status: 'failed',
completionReason: 'error',
modelVersion: this.model,
operation: `${operation}-error`,
sdk: this.getProviderType(),
startTime,
debugMode: this.debugMode,
evaluationContext: config.evaluationContext,
interaction_id: config.interaction_id,
});
}
}, (result) => ({
inputTokens: result.totalTokens.input,
outputTokens: result.totalTokens.output,
cacheReadTokens: result.totalTokens.cacheRead,
cacheCreationTokens: result.totalTokens.cacheCreation,
}));
}
}
exports.VercelProvider = VercelProvider;