UNPKG

buroventures-harald-code-core

Version:

Harald Code Core - Core functionality for AI-powered coding assistant

github.com/haraldroine/harald-code

haraldroine/harald-code

1,072 lines (1,071 loc) • 70.6 kB

JavaScript

/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { GenerateContentResponse, FinishReason, } from '@google/genai'; import OpenAI from 'openai'; import { logApiResponse } from '../telemetry/loggers.js'; import { ApiResponseEvent } from '../telemetry/types.js'; import { openaiLogger } from '../utils/openaiLogger.js'; import { ApiKeyRotationManager } from './apiKeyRotationManager.js'; export class OpenAIContentGenerator { client; model; config; rotationManager; streamingToolCalls = new Map(); constructor(apiKey, model, config, rotationManager) { this.model = model; this.config = config; this.rotationManager = rotationManager; const baseURL = process.env.CEREBRAS_BASE_URL || process.env.OPENAI_BASE_URL || 'https://api.cerebras.ai/v1'; // Configure timeout settings - using progressive timeouts const timeoutConfig = { // Base timeout for most requests (10 seconds - very fast failure for rate limits) timeout: 10000, // Maximum retries for failed requests (no retries at client level) maxRetries: 0, // HTTP client options httpAgent: undefined, // Let the client use default agent }; // Allow config to override timeout settings const contentGeneratorConfig = this.config.getContentGeneratorConfig(); if (contentGeneratorConfig?.timeout) { timeoutConfig.timeout = contentGeneratorConfig.timeout; } if (contentGeneratorConfig?.maxRetries !== undefined) { timeoutConfig.maxRetries = contentGeneratorConfig.maxRetries; } // Check if using OpenRouter and add required headers const isOpenRouter = baseURL.includes('openrouter.ai'); const defaultHeaders = isOpenRouter ? { 'HTTP-Referer': 'https://github.com/QwenLM/qwen-code.git', 'X-Title': 'Qwen Code', } : undefined; this.client = this.createClient(apiKey, baseURL, timeoutConfig, defaultHeaders); } /** * Create OpenAI client with given configuration */ createClient(apiKey, baseURL, timeoutConfig, defaultHeaders) { return new OpenAI({ apiKey, baseURL, timeout: timeoutConfig.timeout, maxRetries: timeoutConfig.maxRetries, defaultHeaders, }); } /** * Get current API key (from rotation manager or fallback) */ getCurrentApiKey() { if (this.rotationManager) { const rotatedKey = this.rotationManager.getCurrentApiKey(); if (rotatedKey) { return rotatedKey; } } // Fallback to environment variables if no rotation manager or no keys return process.env.CEREBRAS_API_KEY || process.env.OPENAI_API_KEY || ''; } /** * Recreate client with current API key */ recreateClientWithCurrentKey() { const currentApiKey = this.getCurrentApiKey(); if (!currentApiKey) { throw new Error('No API key available for OpenAI client'); } console.log(`🔄 Recreating OpenAI client with key: ...${currentApiKey.slice(-4)}`); const baseURL = process.env.CEREBRAS_BASE_URL || process.env.OPENAI_BASE_URL || 'https://api.cerebras.ai/v1'; // Configure timeout settings const timeoutConfig = { timeout: 120000, maxRetries: 3, }; const contentGeneratorConfig = this.config.getContentGeneratorConfig(); if (contentGeneratorConfig?.timeout) { timeoutConfig.timeout = contentGeneratorConfig.timeout; } if (contentGeneratorConfig?.maxRetries !== undefined) { timeoutConfig.maxRetries = contentGeneratorConfig.maxRetries; } // Check if using OpenRouter and add required headers const isOpenRouter = baseURL.includes('openrouter.ai'); const defaultHeaders = isOpenRouter ? { 'HTTP-Referer': 'https://github.com/QwenLM/qwen-code.git', 'X-Title': 'Qwen Code', } : undefined; this.client = this.createClient(currentApiKey, baseURL, timeoutConfig, defaultHeaders); } /** * Handle rate limit error and attempt key rotation */ async handleRateLimitWithRotation(error) { if (!this.rotationManager) { return false; // No rotation available } if (!ApiKeyRotationManager.isRateLimitError(error)) { return false; // Not a rate limit error } console.log('Rate limit detected, attempting API key rotation...'); try { const oldApiKey = this.getCurrentApiKey(); console.log(`🔑 Current key before rotation: ...${oldApiKey?.slice(-4) || 'unknown'}`); const newApiKey = await this.rotationManager.handleRateLimit(error); console.log(`🔑 New key after rotation: ...${newApiKey?.slice(-4) || 'unknown'}`); if (newApiKey && newApiKey !== oldApiKey) { // Recreate client with new key this.recreateClientWithCurrentKey(); console.log('✅ Successfully rotated to new API key'); return true; } else { console.log('⚠️ No new key available or same key returned'); } } catch (rotationError) { console.error('❌ Failed to rotate API key:', rotationError); } return false; } /** * Check if an error is a timeout error */ isTimeoutError(error) { if (!error) return false; const errorMessage = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase(); // eslint-disable-next-line @typescript-eslint/no-explicit-any const errorCode = error?.code; // eslint-disable-next-line @typescript-eslint/no-explicit-any const errorType = error?.type; // Check for common timeout indicators return (errorMessage.includes('timeout') || errorMessage.includes('timed out') || errorMessage.includes('connection timeout') || errorMessage.includes('request timeout') || errorMessage.includes('read timeout') || errorMessage.includes('etimedout') || // Include ETIMEDOUT in message check errorMessage.includes('esockettimedout') || // Include ESOCKETTIMEDOUT in message check errorCode === 'ETIMEDOUT' || errorCode === 'ESOCKETTIMEDOUT' || errorType === 'timeout' || // OpenAI specific timeout indicators errorMessage.includes('request timed out') || errorMessage.includes('deadline exceeded')); } async generateContent(request) { const startTime = Date.now(); const messages = this.convertToOpenAIFormat(request); // Track usage for current API key if (this.rotationManager) { const currentKey = this.getCurrentApiKey(); if (currentKey) { await this.rotationManager.trackUsage(currentKey); } } // Retry logic with API key rotation (reduced for faster failure) const maxRetries = 2; // Only 2 attempts total for faster rotation let lastError; for (let attempt = 0; attempt < maxRetries; attempt++) { try { // Ensure we have the current API key for this attempt if (attempt > 0) { this.recreateClientWithCurrentKey(); } // Build sampling parameters with clear priority: // 1. Request-level parameters (highest priority) // 2. Config-level sampling parameters (medium priority) // 3. Default values (lowest priority) const samplingParams = this.buildSamplingParameters(request); const createParams = { model: this.model, messages, ...samplingParams, }; if (request.config?.tools) { createParams.tools = await this.convertGeminiToolsToOpenAI(request.config.tools); } // console.log('createParams', createParams); const completion = (await this.client.chat.completions.create(createParams)); const response = this.convertToGeminiFormat(completion); const durationMs = Date.now() - startTime; // Log API response event for UI telemetry const responseEvent = new ApiResponseEvent(this.model, durationMs, `openai-${Date.now()}`, // Generate a prompt ID this.config.getContentGeneratorConfig()?.authType, response.usageMetadata); logApiResponse(this.config, responseEvent); // Log interaction if enabled if (this.config.getContentGeneratorConfig()?.enableOpenAILogging) { const openaiRequest = await this.convertGeminiRequestToOpenAI(request); const openaiResponse = this.convertGeminiResponseToOpenAI(response); await openaiLogger.logInteraction(openaiRequest, openaiResponse); } return response; } catch (error) { lastError = error; // Try to handle rate limit with rotation const rotationSuccessful = await this.handleRateLimitWithRotation(error); if (rotationSuccessful && attempt < maxRetries - 1) { console.log(`Retrying request with new API key (attempt ${attempt + 2}/${maxRetries})`); continue; // Retry with new key } // If this is the last attempt or rotation failed, break out of retry loop if (attempt === maxRetries - 1 || !rotationSuccessful) { break; } } } // If we get here, all retries failed if (lastError) { const durationMs = Date.now() - startTime; // Identify timeout errors specifically const isTimeoutError = this.isTimeoutError(lastError); const errorMessage = isTimeoutError ? `Request timeout after ${Math.round(durationMs / 1000)}s. Try reducing input length or increasing timeout in config.` : lastError instanceof Error ? lastError.message : String(lastError); // Estimate token usage even when there's an error // This helps track costs and usage even for failed requests let estimatedUsage; try { const tokenCountResult = await this.countTokens({ contents: request.contents, model: this.model, }); estimatedUsage = { promptTokenCount: tokenCountResult.totalTokens, candidatesTokenCount: 0, // No completion tokens since request failed totalTokenCount: tokenCountResult.totalTokens, }; } catch { // If token counting also fails, provide a minimal estimate const contentStr = JSON.stringify(request.contents); const estimatedTokens = Math.ceil(contentStr.length / 4); estimatedUsage = { promptTokenCount: estimatedTokens, candidatesTokenCount: 0, totalTokenCount: estimatedTokens, }; } // Log API error event for UI telemetry with estimated usage const errorEvent = new ApiResponseEvent(this.model, durationMs, `openai-${Date.now()}`, // Generate a prompt ID this.config.getContentGeneratorConfig()?.authType, estimatedUsage, undefined, errorMessage); logApiResponse(this.config, errorEvent); // Log error interaction if enabled if (this.config.getContentGeneratorConfig()?.enableOpenAILogging) { const openaiRequest = await this.convertGeminiRequestToOpenAI(request); await openaiLogger.logInteraction(openaiRequest, undefined, lastError); } console.error('OpenAI API Error:', errorMessage); // Provide helpful timeout-specific error message if (isTimeoutError) { throw new Error(`${errorMessage}\n\nTroubleshooting tips:\n` + `- Reduce input length or complexity\n` + `- Increase timeout in config: contentGenerator.timeout\n` + `- Check network connectivity\n` + `- Consider using streaming mode for long responses`); } throw new Error(`OpenAI API error: ${errorMessage}`); } // This should never be reached, but just in case throw new Error('Unknown error occurred during API request'); } async generateContentStream(request) { const startTime = Date.now(); const messages = this.convertToOpenAIFormat(request); // Track usage for current API key if (this.rotationManager) { const currentKey = this.getCurrentApiKey(); if (currentKey) { await this.rotationManager.trackUsage(currentKey); } } // Retry logic with API key rotation for streaming const maxRetries = 2; let lastError; for (let attempt = 0; attempt < maxRetries; attempt++) { try { // Ensure we have the current API key for this attempt if (attempt > 0) { this.recreateClientWithCurrentKey(); } // Build sampling parameters with clear priority const samplingParams = this.buildSamplingParameters(request); const createParams = { model: this.model, messages, ...samplingParams, stream: true, stream_options: { include_usage: true }, }; if (request.config?.tools) { createParams.tools = await this.convertGeminiToolsToOpenAI(request.config.tools); } // console.log('createParams', createParams); const stream = (await this.client.chat.completions.create(createParams)); const originalStream = this.streamGenerator(stream); // Collect all responses for final logging (don't log during streaming) const responses = []; // Return a new generator that both yields responses and collects them const wrappedGenerator = async function* () { try { for await (const response of originalStream) { responses.push(response); yield response; } const durationMs = Date.now() - startTime; // Get final usage metadata from the last response that has it const finalUsageMetadata = responses .slice() .reverse() .find((r) => r.usageMetadata)?.usageMetadata; // Log API response event for UI telemetry const responseEvent = new ApiResponseEvent(this.model, durationMs, `openai-stream-${Date.now()}`, // Generate a prompt ID this.config.getContentGeneratorConfig()?.authType, finalUsageMetadata); logApiResponse(this.config, responseEvent); // Log interaction if enabled (same as generateContent method) if (this.config.getContentGeneratorConfig()?.enableOpenAILogging) { const openaiRequest = await this.convertGeminiRequestToOpenAI(request); // For streaming, we combine all responses into a single response for logging const combinedResponse = this.combineStreamResponsesForLogging(responses); const openaiResponse = this.convertGeminiResponseToOpenAI(combinedResponse); await openaiLogger.logInteraction(openaiRequest, openaiResponse); } } catch (streamError) { const durationMs = Date.now() - startTime; // Identify timeout errors specifically for streaming const isTimeoutError = this.isTimeoutError(streamError); const errorMessage = isTimeoutError ? `Streaming request timeout after ${Math.round(durationMs / 1000)}s. Try reducing input length or increasing timeout in config.` : streamError instanceof Error ? streamError.message : String(streamError); // Estimate token usage even when there's an error in streaming let estimatedUsage; try { const tokenCountResult = await this.countTokens({ contents: request.contents, model: this.model, }); estimatedUsage = { promptTokenCount: tokenCountResult.totalTokens, candidatesTokenCount: 0, // No completion tokens since request failed totalTokenCount: tokenCountResult.totalTokens, }; } catch { // If token counting also fails, provide a minimal estimate const contentStr = JSON.stringify(request.contents); const estimatedTokens = Math.ceil(contentStr.length / 4); estimatedUsage = { promptTokenCount: estimatedTokens, candidatesTokenCount: 0, totalTokenCount: estimatedTokens, }; } // Log API error event for UI telemetry with estimated usage const errorEvent = new ApiResponseEvent(this.model, durationMs, `openai-stream-${Date.now()}`, // Generate a prompt ID this.config.getContentGeneratorConfig()?.authType, estimatedUsage, undefined, errorMessage); logApiResponse(this.config, errorEvent); // Log error interaction if enabled if (this.config.getContentGeneratorConfig()?.enableOpenAILogging) { const openaiRequest = await this.convertGeminiRequestToOpenAI(request); await openaiLogger.logInteraction(openaiRequest, undefined, streamError); } // Provide helpful timeout-specific error message for streaming if (isTimeoutError) { throw new Error(`${errorMessage}\n\nStreaming timeout troubleshooting:\n` + `- Reduce input length or complexity\n` + `- Increase timeout in config: contentGenerator.timeout\n` + `- Check network stability for streaming connections\n` + `- Consider using non-streaming mode for very long inputs`); } throw streamError; } }.bind(this); return wrappedGenerator(); } catch (error) { lastError = error; // Try to handle rate limit with rotation const rotationSuccessful = await this.handleRateLimitWithRotation(error); if (rotationSuccessful && attempt < maxRetries - 1) { console.log(`Retrying streaming request with new API key (attempt ${attempt + 2}/${maxRetries})`); continue; // Retry with new key } // If this is the last attempt or rotation failed, break out of retry loop if (attempt === maxRetries - 1 || !rotationSuccessful) { break; } } } // If we get here, all retries failed - handle the error if (lastError) { const durationMs = Date.now() - startTime; // Identify timeout errors specifically for streaming setup const isTimeoutError = this.isTimeoutError(lastError); const errorMessage = isTimeoutError ? `Streaming setup timeout after ${Math.round(durationMs / 1000)}s. Try reducing input length or increasing timeout in config.` : lastError instanceof Error ? lastError.message : String(lastError); // Estimate token usage even when there's an error in streaming setup let estimatedUsage; try { const tokenCountResult = await this.countTokens({ contents: request.contents, model: this.model, }); estimatedUsage = { promptTokenCount: tokenCountResult.totalTokens, candidatesTokenCount: 0, // No completion tokens since request failed totalTokenCount: tokenCountResult.totalTokens, }; } catch { // If token counting also fails, provide a minimal estimate const contentStr = JSON.stringify(request.contents); const estimatedTokens = Math.ceil(contentStr.length / 4); estimatedUsage = { promptTokenCount: estimatedTokens, candidatesTokenCount: 0, totalTokenCount: estimatedTokens, }; } // Log API error event for UI telemetry with estimated usage const errorEvent = new ApiResponseEvent(this.model, durationMs, `openai-stream-${Date.now()}`, // Generate a prompt ID this.config.getContentGeneratorConfig()?.authType, estimatedUsage, undefined, errorMessage); logApiResponse(this.config, errorEvent); console.error('OpenAI API Streaming Error:', errorMessage); // Provide helpful timeout-specific error message for streaming setup if (isTimeoutError) { throw new Error(`${errorMessage}\n\nStreaming setup timeout troubleshooting:\n` + `- Reduce input length or complexity\n` + `- Increase timeout in config: contentGenerator.timeout\n` + `- Check network connectivity and firewall settings\n` + `- Consider using non-streaming mode for very long inputs`); } throw new Error(`OpenAI API error: ${errorMessage}`); } // This should never be reached, but just in case throw new Error('Unknown error occurred during streaming API request'); } async *streamGenerator(stream) { // Reset the accumulator for each new stream this.streamingToolCalls.clear(); for await (const chunk of stream) { yield this.convertStreamChunkToGeminiFormat(chunk); } } /** * Combine streaming responses for logging purposes */ combineStreamResponsesForLogging(responses) { if (responses.length === 0) { return new GenerateContentResponse(); } const lastResponse = responses[responses.length - 1]; // Find the last response with usage metadata const finalUsageMetadata = responses .slice() .reverse() .find((r) => r.usageMetadata)?.usageMetadata; // Combine all text content from the stream const combinedParts = []; let combinedText = ''; const functionCalls = []; for (const response of responses) { if (response.candidates?.[0]?.content?.parts) { for (const part of response.candidates[0].content.parts) { if ('text' in part && part.text) { combinedText += part.text; } else if ('functionCall' in part && part.functionCall) { functionCalls.push(part); } } } } // Add combined text if any if (combinedText) { combinedParts.push({ text: combinedText }); } // Add function calls combinedParts.push(...functionCalls); // Create combined response const combinedResponse = new GenerateContentResponse(); combinedResponse.candidates = [ { content: { parts: combinedParts, role: 'model', }, finishReason: responses[responses.length - 1]?.candidates?.[0]?.finishReason || FinishReason.FINISH_REASON_UNSPECIFIED, index: 0, safetyRatings: [], }, ]; combinedResponse.responseId = lastResponse?.responseId; combinedResponse.createTime = lastResponse?.createTime; combinedResponse.modelVersion = this.model; combinedResponse.promptFeedback = { safetyRatings: [] }; combinedResponse.usageMetadata = finalUsageMetadata; return combinedResponse; } async countTokens(request) { // Use tiktoken for accurate token counting const content = JSON.stringify(request.contents); let totalTokens = 0; try { const { get_encoding } = await import('tiktoken'); const encoding = get_encoding('cl100k_base'); // GPT-4 encoding, but estimate for qwen totalTokens = encoding.encode(content).length; encoding.free(); } catch (error) { console.warn('Failed to load tiktoken, falling back to character approximation:', error); // Fallback: rough approximation using character count totalTokens = Math.ceil(content.length / 4); // Rough estimate: 1 token ≈ 4 characters } return { totalTokens, }; } async embedContent(request) { // Extract text from contents let text = ''; if (Array.isArray(request.contents)) { text = request.contents .map((content) => { if (typeof content === 'string') return content; if ('parts' in content && content.parts) { return content.parts .map((part) => typeof part === 'string' ? part : 'text' in part ? part.text || '' : '') .join(' '); } return ''; }) .join(' '); } else if (request.contents) { if (typeof request.contents === 'string') { text = request.contents; } else if ('parts' in request.contents && request.contents.parts) { text = request.contents.parts .map((part) => typeof part === 'string' ? part : 'text' in part ? part.text : '') .join(' '); } } try { const embedding = await this.client.embeddings.create({ model: 'text-embedding-ada-002', // Default embedding model input: text, }); return { embeddings: [ { values: embedding.data[0].embedding, }, ], }; } catch (error) { console.error('OpenAI API Embedding Error:', error); throw new Error(`OpenAI API error: ${error instanceof Error ? error.message : String(error)}`); } } convertGeminiParametersToOpenAI(parameters) { if (!parameters || typeof parameters !== 'object') { return parameters; } const converted = JSON.parse(JSON.stringify(parameters)); const convertTypes = (obj) => { if (typeof obj !== 'object' || obj === null) { return obj; } if (Array.isArray(obj)) { return obj.map(convertTypes); } const result = {}; for (const [key, value] of Object.entries(obj)) { if (key === 'type' && typeof value === 'string') { // Convert Gemini types to OpenAI JSON Schema types const lowerValue = value.toLowerCase(); if (lowerValue === 'integer') { result[key] = 'integer'; } else if (lowerValue === 'number') { result[key] = 'number'; } else { result[key] = lowerValue; } } else if (key === 'minimum' || key === 'maximum' || key === 'multipleOf') { // Ensure numeric constraints are actual numbers, not strings if (typeof value === 'string' && !isNaN(Number(value))) { result[key] = Number(value); } else { result[key] = value; } } else if (key === 'minLength' || key === 'maxLength' || key === 'minItems' || key === 'maxItems') { // Ensure length constraints are integers, not strings if (typeof value === 'string' && !isNaN(Number(value))) { result[key] = parseInt(value, 10); } else { result[key] = value; } } else if (typeof value === 'object') { result[key] = convertTypes(value); } else { result[key] = value; } } return result; }; return convertTypes(converted); } async convertGeminiToolsToOpenAI(geminiTools) { const openAITools = []; for (const tool of geminiTools) { let actualTool; // Handle CallableTool vs Tool if ('tool' in tool) { // This is a CallableTool actualTool = await tool.tool(); } else { // This is already a Tool actualTool = tool; } if (actualTool.functionDeclarations) { for (const func of actualTool.functionDeclarations) { if (func.name && func.description) { openAITools.push({ type: 'function', function: { name: func.name, description: func.description, parameters: this.convertGeminiParametersToOpenAI((func.parameters || {})), }, }); } } } } // console.log( // 'OpenAI Tools Parameters:', // JSON.stringify(openAITools, null, 2), // ); return openAITools; } convertToOpenAIFormat(request) { const messages = []; // Handle system instruction from config if (request.config?.systemInstruction) { const systemInstruction = request.config.systemInstruction; let systemText = ''; if (Array.isArray(systemInstruction)) { systemText = systemInstruction .map((content) => { if (typeof content === 'string') return content; if ('parts' in content) { const contentObj = content; return (contentObj.parts ?.map((p) => typeof p === 'string' ? p : 'text' in p ? p.text : '') .join('\n') || ''); } return ''; }) .join('\n'); } else if (typeof systemInstruction === 'string') { systemText = systemInstruction; } else if (typeof systemInstruction === 'object' && 'parts' in systemInstruction) { const systemContent = systemInstruction; systemText = systemContent.parts ?.map((p) => typeof p === 'string' ? p : 'text' in p ? p.text : '') .join('\n') || ''; } if (systemText) { messages.push({ role: 'system', content: systemText, }); } } // Handle contents if (Array.isArray(request.contents)) { for (const content of request.contents) { if (typeof content === 'string') { messages.push({ role: 'user', content }); } else if ('role' in content && 'parts' in content) { // Check if this content has function calls or responses const functionCalls = []; const functionResponses = []; const textParts = []; for (const part of content.parts || []) { if (typeof part === 'string') { textParts.push(part); } else if ('text' in part && part.text) { textParts.push(part.text); } else if ('functionCall' in part && part.functionCall) { functionCalls.push(part.functionCall); } else if ('functionResponse' in part && part.functionResponse) { functionResponses.push(part.functionResponse); } } // Handle function responses (tool results) if (functionResponses.length > 0) { for (const funcResponse of functionResponses) { messages.push({ role: 'tool', tool_call_id: funcResponse.id || '', content: typeof funcResponse.response === 'string' ? funcResponse.response : JSON.stringify(funcResponse.response), }); } } // Handle model messages with function calls else if (content.role === 'model' && functionCalls.length > 0) { const toolCalls = functionCalls.map((fc, index) => ({ id: fc.id || `call_${index}`, type: 'function', function: { name: fc.name || '', arguments: JSON.stringify(fc.args || {}), }, })); messages.push({ role: 'assistant', content: textParts.join('\n') || null, tool_calls: toolCalls, }); } // Handle regular text messages else { const role = content.role === 'model' ? 'assistant' : 'user'; const text = textParts.join('\n'); if (text) { messages.push({ role, content: text }); } } } } } else if (request.contents) { if (typeof request.contents === 'string') { messages.push({ role: 'user', content: request.contents }); } else if ('role' in request.contents && 'parts' in request.contents) { const content = request.contents; const role = content.role === 'model' ? 'assistant' : 'user'; const text = content.parts ?.map((p) => typeof p === 'string' ? p : 'text' in p ? p.text : '') .join('\n') || ''; messages.push({ role, content: text }); } } // Clean up orphaned tool calls and merge consecutive assistant messages const cleanedMessages = this.cleanOrphanedToolCalls(messages); return this.mergeConsecutiveAssistantMessages(cleanedMessages); } /** * Clean up orphaned tool calls from message history to prevent OpenAI API errors */ cleanOrphanedToolCalls(messages) { const cleaned = []; const toolCallIds = new Set(); const toolResponseIds = new Set(); // First pass: collect all tool call IDs and tool response IDs for (const message of messages) { if (message.role === 'assistant' && 'tool_calls' in message && message.tool_calls) { for (const toolCall of message.tool_calls) { if (toolCall.id) { toolCallIds.add(toolCall.id); } } } else if (message.role === 'tool' && 'tool_call_id' in message && message.tool_call_id) { toolResponseIds.add(message.tool_call_id); } } // Second pass: filter out orphaned messages for (const message of messages) { if (message.role === 'assistant' && 'tool_calls' in message && message.tool_calls) { // Filter out tool calls that don't have corresponding responses const validToolCalls = message.tool_calls.filter((toolCall) => toolCall.id && toolResponseIds.has(toolCall.id)); if (validToolCalls.length > 0) { // Keep the message but only with valid tool calls const cleanedMessage = { ...message }; cleanedMessage.tool_calls = validToolCalls; cleaned.push(cleanedMessage); } else if (typeof message.content === 'string' && message.content.trim()) { // Keep the message if it has text content, but remove tool calls const cleanedMessage = { ...message }; delete cleanedMessage.tool_calls; cleaned.push(cleanedMessage); } // If no valid tool calls and no content, skip the message entirely } else if (message.role === 'tool' && 'tool_call_id' in message && message.tool_call_id) { // Only keep tool responses that have corresponding tool calls if (toolCallIds.has(message.tool_call_id)) { cleaned.push(message); } } else { // Keep all other messages as-is cleaned.push(message); } } // Final validation: ensure every assistant message with tool_calls has corresponding tool responses const finalCleaned = []; const finalToolCallIds = new Set(); // Collect all remaining tool call IDs for (const message of cleaned) { if (message.role === 'assistant' && 'tool_calls' in message && message.tool_calls) { for (const toolCall of message.tool_calls) { if (toolCall.id) { finalToolCallIds.add(toolCall.id); } } } } // Verify all tool calls have responses const finalToolResponseIds = new Set(); for (const message of cleaned) { if (message.role === 'tool' && 'tool_call_id' in message && message.tool_call_id) { finalToolResponseIds.add(message.tool_call_id); } } // Remove any remaining orphaned tool calls for (const message of cleaned) { if (message.role === 'assistant' && 'tool_calls' in message && message.tool_calls) { const finalValidToolCalls = message.tool_calls.filter((toolCall) => toolCall.id && finalToolResponseIds.has(toolCall.id)); if (finalValidToolCalls.length > 0) { const cleanedMessage = { ...message }; cleanedMessage.tool_calls = finalValidToolCalls; finalCleaned.push(cleanedMessage); } else if (typeof message.content === 'string' && message.content.trim()) { const cleanedMessage = { ...message }; delete cleanedMessage.tool_calls; finalCleaned.push(cleanedMessage); } } else { finalCleaned.push(message); } } return finalCleaned; } /** * Merge consecutive assistant messages to combine split text and tool calls */ mergeConsecutiveAssistantMessages(messages) { const merged = []; for (const message of messages) { if (message.role === 'assistant' && merged.length > 0) { const lastMessage = merged[merged.length - 1]; // If the last message is also an assistant message, merge them if (lastMessage.role === 'assistant') { // Combine content const combinedContent = [ typeof lastMessage.content === 'string' ? lastMessage.content : '', typeof message.content === 'string' ? message.content : '', ] .filter(Boolean) .join(''); // Combine tool calls const lastToolCalls = 'tool_calls' in lastMessage ? lastMessage.tool_calls || [] : []; const currentToolCalls = 'tool_calls' in message ? message.tool_calls || [] : []; const combinedToolCalls = [...lastToolCalls, ...currentToolCalls]; // Update the last message with combined data lastMessage.content = combinedContent || null; if (combinedToolCalls.length > 0) { lastMessage.tool_calls = combinedToolCalls; } continue; // Skip adding the current message since it's been merged } } // Add the message as-is if no merging is needed merged.push(message); } return merged; } convertToGeminiFormat(openaiResponse) { const choice = openaiResponse.choices[0]; const response = new GenerateContentResponse(); const parts = []; // Handle text content if (choice.message.content) { parts.push({ text: choice.message.content }); } // Handle tool calls if (choice.message.tool_calls) { for (const toolCall of choice.message.tool_calls) { if (toolCall.function) { let args = {}; if (toolCall.function.arguments) { try { args = JSON.parse(toolCall.function.arguments); } catch (error) { console.error('Failed to parse function arguments:', error); args = {}; } } parts.push({ functionCall: { id: toolCall.id, name: toolCall.function.name, args, }, }); } } } response.responseId = openaiResponse.id; response.createTime = openaiResponse.created ? openaiResponse.created.toString() : new Date().getTime().toString(); response.candidates = [ { content: { parts, role: 'model', }, finishReason: this.mapFinishReason(choice.finish_reason || 'stop'), index: 0, safetyRatings: [], }, ]; response.modelVersion = this.model; response.promptFeedback = { safetyRatings: [] }; // Add usage metadata if available if (openaiResponse.usage) { const usage = openaiResponse.usage; const promptTokens = usage.prompt_tokens || 0; const completionTokens = usage.completion_tokens || 0; const totalTokens = usage.total_tokens || 0; const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0; // If we only have total tokens but no breakdown, estimate the split // Typically input is ~70% and output is ~30% for most conversations let finalPromptTokens = promptTokens; let finalCompletionTokens = completionTokens; if (totalTokens > 0 && promptTokens === 0 && completionTokens === 0) { // Estimate: assume 70% input, 30% output finalPromptTokens = Math.round(totalTokens * 0.7); finalCompletionTokens = Math.round(totalTokens * 0.3); } response.usageMetadata = { promptTokenCount: finalPromptTokens, candidatesTokenCount: finalCompletionTokens, totalTokenCount: totalTokens, cachedContentTokenCount: cachedTokens, }; } return response; } convertStreamChunkToGeminiFormat(chunk) { const choice = chunk.choices?.[0]; const response = new GenerateContentResponse(); if (choice) { const parts = []; // Handle text content if (choice.delta?.content) { parts.push({ text: choice.delta.content }); } // Handle tool calls - only accumulate during streaming, emit when complete if (choice.delta?.tool_calls) { for (const toolCall of choice.delta.tool_calls) { const index = toolCall.index ?? 0; // Get or create the tool call accumulator for this index let accumulatedCall = this.streamingToolCalls.get(index); if (!accumulatedCall) { accumulatedCall = { arguments: '' }; this.streamingToolCalls.set(index, accumulatedCall); } // Update accumulated data if (toolCall.id) { accumulatedCall.id = toolCall.id; } if (toolCall.function?.name) { accumulatedCall.name = toolCall.function.name; } if (toolCall.function?.arguments) { accumulatedCall.arguments += toolCall.function.arguments; } } } // Only emit function calls when streaming is complete (finish_reason is present) if (choice.finish_reason) { for (const [, accumulatedCall] of this.streamingToolCalls) { // TODO: Add back id once we have a way to generate tool_call_id from the VLLM parser. // if (accumulatedCall.id && accumulatedCall.name) { if (accumulatedCall.name) { let args = {}; if (accumulatedCall.arguments) { try { args = JSON.parse(accumulatedCall.arguments); } catch (error) {