buroventures-harald-code-core
Version:
Harald Code Core - Core functionality for AI-powered coding assistant
1,072 lines (1,071 loc) • 70.6 kB
JavaScript
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { GenerateContentResponse, FinishReason, } from '@google/genai';
import OpenAI from 'openai';
import { logApiResponse } from '../telemetry/loggers.js';
import { ApiResponseEvent } from '../telemetry/types.js';
import { openaiLogger } from '../utils/openaiLogger.js';
import { ApiKeyRotationManager } from './apiKeyRotationManager.js';
export class OpenAIContentGenerator {
client;
model;
config;
rotationManager;
streamingToolCalls = new Map();
constructor(apiKey, model, config, rotationManager) {
this.model = model;
this.config = config;
this.rotationManager = rotationManager;
const baseURL = process.env.CEREBRAS_BASE_URL || process.env.OPENAI_BASE_URL || 'https://api.cerebras.ai/v1';
// Configure timeout settings - using progressive timeouts
const timeoutConfig = {
// Base timeout for most requests (10 seconds - very fast failure for rate limits)
timeout: 10000,
// Maximum retries for failed requests (no retries at client level)
maxRetries: 0,
// HTTP client options
httpAgent: undefined, // Let the client use default agent
};
// Allow config to override timeout settings
const contentGeneratorConfig = this.config.getContentGeneratorConfig();
if (contentGeneratorConfig?.timeout) {
timeoutConfig.timeout = contentGeneratorConfig.timeout;
}
if (contentGeneratorConfig?.maxRetries !== undefined) {
timeoutConfig.maxRetries = contentGeneratorConfig.maxRetries;
}
// Check if using OpenRouter and add required headers
const isOpenRouter = baseURL.includes('openrouter.ai');
const defaultHeaders = isOpenRouter
? {
'HTTP-Referer': 'https://github.com/QwenLM/qwen-code.git',
'X-Title': 'Qwen Code',
}
: undefined;
this.client = this.createClient(apiKey, baseURL, timeoutConfig, defaultHeaders);
}
/**
* Create OpenAI client with given configuration
*/
createClient(apiKey, baseURL, timeoutConfig, defaultHeaders) {
return new OpenAI({
apiKey,
baseURL,
timeout: timeoutConfig.timeout,
maxRetries: timeoutConfig.maxRetries,
defaultHeaders,
});
}
/**
* Get current API key (from rotation manager or fallback)
*/
getCurrentApiKey() {
if (this.rotationManager) {
const rotatedKey = this.rotationManager.getCurrentApiKey();
if (rotatedKey) {
return rotatedKey;
}
}
// Fallback to environment variables if no rotation manager or no keys
return process.env.CEREBRAS_API_KEY || process.env.OPENAI_API_KEY || '';
}
/**
* Recreate client with current API key
*/
recreateClientWithCurrentKey() {
const currentApiKey = this.getCurrentApiKey();
if (!currentApiKey) {
throw new Error('No API key available for OpenAI client');
}
console.log(`🔄 Recreating OpenAI client with key: ...${currentApiKey.slice(-4)}`);
const baseURL = process.env.CEREBRAS_BASE_URL || process.env.OPENAI_BASE_URL || 'https://api.cerebras.ai/v1';
// Configure timeout settings
const timeoutConfig = {
timeout: 120000,
maxRetries: 3,
};
const contentGeneratorConfig = this.config.getContentGeneratorConfig();
if (contentGeneratorConfig?.timeout) {
timeoutConfig.timeout = contentGeneratorConfig.timeout;
}
if (contentGeneratorConfig?.maxRetries !== undefined) {
timeoutConfig.maxRetries = contentGeneratorConfig.maxRetries;
}
// Check if using OpenRouter and add required headers
const isOpenRouter = baseURL.includes('openrouter.ai');
const defaultHeaders = isOpenRouter
? {
'HTTP-Referer': 'https://github.com/QwenLM/qwen-code.git',
'X-Title': 'Qwen Code',
}
: undefined;
this.client = this.createClient(currentApiKey, baseURL, timeoutConfig, defaultHeaders);
}
/**
* Handle rate limit error and attempt key rotation
*/
async handleRateLimitWithRotation(error) {
if (!this.rotationManager) {
return false; // No rotation available
}
if (!ApiKeyRotationManager.isRateLimitError(error)) {
return false; // Not a rate limit error
}
console.log('Rate limit detected, attempting API key rotation...');
try {
const oldApiKey = this.getCurrentApiKey();
console.log(`🔑 Current key before rotation: ...${oldApiKey?.slice(-4) || 'unknown'}`);
const newApiKey = await this.rotationManager.handleRateLimit(error);
console.log(`🔑 New key after rotation: ...${newApiKey?.slice(-4) || 'unknown'}`);
if (newApiKey && newApiKey !== oldApiKey) {
// Recreate client with new key
this.recreateClientWithCurrentKey();
console.log('✅ Successfully rotated to new API key');
return true;
}
else {
console.log('⚠️ No new key available or same key returned');
}
}
catch (rotationError) {
console.error('❌ Failed to rotate API key:', rotationError);
}
return false;
}
/**
* Check if an error is a timeout error
*/
isTimeoutError(error) {
if (!error)
return false;
const errorMessage = error instanceof Error
? error.message.toLowerCase()
: String(error).toLowerCase();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const errorCode = error?.code;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const errorType = error?.type;
// Check for common timeout indicators
return (errorMessage.includes('timeout') ||
errorMessage.includes('timed out') ||
errorMessage.includes('connection timeout') ||
errorMessage.includes('request timeout') ||
errorMessage.includes('read timeout') ||
errorMessage.includes('etimedout') || // Include ETIMEDOUT in message check
errorMessage.includes('esockettimedout') || // Include ESOCKETTIMEDOUT in message check
errorCode === 'ETIMEDOUT' ||
errorCode === 'ESOCKETTIMEDOUT' ||
errorType === 'timeout' ||
// OpenAI specific timeout indicators
errorMessage.includes('request timed out') ||
errorMessage.includes('deadline exceeded'));
}
async generateContent(request) {
const startTime = Date.now();
const messages = this.convertToOpenAIFormat(request);
// Track usage for current API key
if (this.rotationManager) {
const currentKey = this.getCurrentApiKey();
if (currentKey) {
await this.rotationManager.trackUsage(currentKey);
}
}
// Retry logic with API key rotation (reduced for faster failure)
const maxRetries = 2; // Only 2 attempts total for faster rotation
let lastError;
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
// Ensure we have the current API key for this attempt
if (attempt > 0) {
this.recreateClientWithCurrentKey();
}
// Build sampling parameters with clear priority:
// 1. Request-level parameters (highest priority)
// 2. Config-level sampling parameters (medium priority)
// 3. Default values (lowest priority)
const samplingParams = this.buildSamplingParameters(request);
const createParams = {
model: this.model,
messages,
...samplingParams,
};
if (request.config?.tools) {
createParams.tools = await this.convertGeminiToolsToOpenAI(request.config.tools);
}
// console.log('createParams', createParams);
const completion = (await this.client.chat.completions.create(createParams));
const response = this.convertToGeminiFormat(completion);
const durationMs = Date.now() - startTime;
// Log API response event for UI telemetry
const responseEvent = new ApiResponseEvent(this.model, durationMs, `openai-${Date.now()}`, // Generate a prompt ID
this.config.getContentGeneratorConfig()?.authType, response.usageMetadata);
logApiResponse(this.config, responseEvent);
// Log interaction if enabled
if (this.config.getContentGeneratorConfig()?.enableOpenAILogging) {
const openaiRequest = await this.convertGeminiRequestToOpenAI(request);
const openaiResponse = this.convertGeminiResponseToOpenAI(response);
await openaiLogger.logInteraction(openaiRequest, openaiResponse);
}
return response;
}
catch (error) {
lastError = error;
// Try to handle rate limit with rotation
const rotationSuccessful = await this.handleRateLimitWithRotation(error);
if (rotationSuccessful && attempt < maxRetries - 1) {
console.log(`Retrying request with new API key (attempt ${attempt + 2}/${maxRetries})`);
continue; // Retry with new key
}
// If this is the last attempt or rotation failed, break out of retry loop
if (attempt === maxRetries - 1 || !rotationSuccessful) {
break;
}
}
}
// If we get here, all retries failed
if (lastError) {
const durationMs = Date.now() - startTime;
// Identify timeout errors specifically
const isTimeoutError = this.isTimeoutError(lastError);
const errorMessage = isTimeoutError
? `Request timeout after ${Math.round(durationMs / 1000)}s. Try reducing input length or increasing timeout in config.`
: lastError instanceof Error
? lastError.message
: String(lastError);
// Estimate token usage even when there's an error
// This helps track costs and usage even for failed requests
let estimatedUsage;
try {
const tokenCountResult = await this.countTokens({
contents: request.contents,
model: this.model,
});
estimatedUsage = {
promptTokenCount: tokenCountResult.totalTokens,
candidatesTokenCount: 0, // No completion tokens since request failed
totalTokenCount: tokenCountResult.totalTokens,
};
}
catch {
// If token counting also fails, provide a minimal estimate
const contentStr = JSON.stringify(request.contents);
const estimatedTokens = Math.ceil(contentStr.length / 4);
estimatedUsage = {
promptTokenCount: estimatedTokens,
candidatesTokenCount: 0,
totalTokenCount: estimatedTokens,
};
}
// Log API error event for UI telemetry with estimated usage
const errorEvent = new ApiResponseEvent(this.model, durationMs, `openai-${Date.now()}`, // Generate a prompt ID
this.config.getContentGeneratorConfig()?.authType, estimatedUsage, undefined, errorMessage);
logApiResponse(this.config, errorEvent);
// Log error interaction if enabled
if (this.config.getContentGeneratorConfig()?.enableOpenAILogging) {
const openaiRequest = await this.convertGeminiRequestToOpenAI(request);
await openaiLogger.logInteraction(openaiRequest, undefined, lastError);
}
console.error('OpenAI API Error:', errorMessage);
// Provide helpful timeout-specific error message
if (isTimeoutError) {
throw new Error(`${errorMessage}\n\nTroubleshooting tips:\n` +
`- Reduce input length or complexity\n` +
`- Increase timeout in config: contentGenerator.timeout\n` +
`- Check network connectivity\n` +
`- Consider using streaming mode for long responses`);
}
throw new Error(`OpenAI API error: ${errorMessage}`);
}
// This should never be reached, but just in case
throw new Error('Unknown error occurred during API request');
}
async generateContentStream(request) {
const startTime = Date.now();
const messages = this.convertToOpenAIFormat(request);
// Track usage for current API key
if (this.rotationManager) {
const currentKey = this.getCurrentApiKey();
if (currentKey) {
await this.rotationManager.trackUsage(currentKey);
}
}
// Retry logic with API key rotation for streaming
const maxRetries = 2;
let lastError;
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
// Ensure we have the current API key for this attempt
if (attempt > 0) {
this.recreateClientWithCurrentKey();
}
// Build sampling parameters with clear priority
const samplingParams = this.buildSamplingParameters(request);
const createParams = {
model: this.model,
messages,
...samplingParams,
stream: true,
stream_options: { include_usage: true },
};
if (request.config?.tools) {
createParams.tools = await this.convertGeminiToolsToOpenAI(request.config.tools);
}
// console.log('createParams', createParams);
const stream = (await this.client.chat.completions.create(createParams));
const originalStream = this.streamGenerator(stream);
// Collect all responses for final logging (don't log during streaming)
const responses = [];
// Return a new generator that both yields responses and collects them
const wrappedGenerator = async function* () {
try {
for await (const response of originalStream) {
responses.push(response);
yield response;
}
const durationMs = Date.now() - startTime;
// Get final usage metadata from the last response that has it
const finalUsageMetadata = responses
.slice()
.reverse()
.find((r) => r.usageMetadata)?.usageMetadata;
// Log API response event for UI telemetry
const responseEvent = new ApiResponseEvent(this.model, durationMs, `openai-stream-${Date.now()}`, // Generate a prompt ID
this.config.getContentGeneratorConfig()?.authType, finalUsageMetadata);
logApiResponse(this.config, responseEvent);
// Log interaction if enabled (same as generateContent method)
if (this.config.getContentGeneratorConfig()?.enableOpenAILogging) {
const openaiRequest = await this.convertGeminiRequestToOpenAI(request);
// For streaming, we combine all responses into a single response for logging
const combinedResponse = this.combineStreamResponsesForLogging(responses);
const openaiResponse = this.convertGeminiResponseToOpenAI(combinedResponse);
await openaiLogger.logInteraction(openaiRequest, openaiResponse);
}
}
catch (streamError) {
const durationMs = Date.now() - startTime;
// Identify timeout errors specifically for streaming
const isTimeoutError = this.isTimeoutError(streamError);
const errorMessage = isTimeoutError
? `Streaming request timeout after ${Math.round(durationMs / 1000)}s. Try reducing input length or increasing timeout in config.`
: streamError instanceof Error
? streamError.message
: String(streamError);
// Estimate token usage even when there's an error in streaming
let estimatedUsage;
try {
const tokenCountResult = await this.countTokens({
contents: request.contents,
model: this.model,
});
estimatedUsage = {
promptTokenCount: tokenCountResult.totalTokens,
candidatesTokenCount: 0, // No completion tokens since request failed
totalTokenCount: tokenCountResult.totalTokens,
};
}
catch {
// If token counting also fails, provide a minimal estimate
const contentStr = JSON.stringify(request.contents);
const estimatedTokens = Math.ceil(contentStr.length / 4);
estimatedUsage = {
promptTokenCount: estimatedTokens,
candidatesTokenCount: 0,
totalTokenCount: estimatedTokens,
};
}
// Log API error event for UI telemetry with estimated usage
const errorEvent = new ApiResponseEvent(this.model, durationMs, `openai-stream-${Date.now()}`, // Generate a prompt ID
this.config.getContentGeneratorConfig()?.authType, estimatedUsage, undefined, errorMessage);
logApiResponse(this.config, errorEvent);
// Log error interaction if enabled
if (this.config.getContentGeneratorConfig()?.enableOpenAILogging) {
const openaiRequest = await this.convertGeminiRequestToOpenAI(request);
await openaiLogger.logInteraction(openaiRequest, undefined, streamError);
}
// Provide helpful timeout-specific error message for streaming
if (isTimeoutError) {
throw new Error(`${errorMessage}\n\nStreaming timeout troubleshooting:\n` +
`- Reduce input length or complexity\n` +
`- Increase timeout in config: contentGenerator.timeout\n` +
`- Check network stability for streaming connections\n` +
`- Consider using non-streaming mode for very long inputs`);
}
throw streamError;
}
}.bind(this);
return wrappedGenerator();
}
catch (error) {
lastError = error;
// Try to handle rate limit with rotation
const rotationSuccessful = await this.handleRateLimitWithRotation(error);
if (rotationSuccessful && attempt < maxRetries - 1) {
console.log(`Retrying streaming request with new API key (attempt ${attempt + 2}/${maxRetries})`);
continue; // Retry with new key
}
// If this is the last attempt or rotation failed, break out of retry loop
if (attempt === maxRetries - 1 || !rotationSuccessful) {
break;
}
}
}
// If we get here, all retries failed - handle the error
if (lastError) {
const durationMs = Date.now() - startTime;
// Identify timeout errors specifically for streaming setup
const isTimeoutError = this.isTimeoutError(lastError);
const errorMessage = isTimeoutError
? `Streaming setup timeout after ${Math.round(durationMs / 1000)}s. Try reducing input length or increasing timeout in config.`
: lastError instanceof Error
? lastError.message
: String(lastError);
// Estimate token usage even when there's an error in streaming setup
let estimatedUsage;
try {
const tokenCountResult = await this.countTokens({
contents: request.contents,
model: this.model,
});
estimatedUsage = {
promptTokenCount: tokenCountResult.totalTokens,
candidatesTokenCount: 0, // No completion tokens since request failed
totalTokenCount: tokenCountResult.totalTokens,
};
}
catch {
// If token counting also fails, provide a minimal estimate
const contentStr = JSON.stringify(request.contents);
const estimatedTokens = Math.ceil(contentStr.length / 4);
estimatedUsage = {
promptTokenCount: estimatedTokens,
candidatesTokenCount: 0,
totalTokenCount: estimatedTokens,
};
}
// Log API error event for UI telemetry with estimated usage
const errorEvent = new ApiResponseEvent(this.model, durationMs, `openai-stream-${Date.now()}`, // Generate a prompt ID
this.config.getContentGeneratorConfig()?.authType, estimatedUsage, undefined, errorMessage);
logApiResponse(this.config, errorEvent);
console.error('OpenAI API Streaming Error:', errorMessage);
// Provide helpful timeout-specific error message for streaming setup
if (isTimeoutError) {
throw new Error(`${errorMessage}\n\nStreaming setup timeout troubleshooting:\n` +
`- Reduce input length or complexity\n` +
`- Increase timeout in config: contentGenerator.timeout\n` +
`- Check network connectivity and firewall settings\n` +
`- Consider using non-streaming mode for very long inputs`);
}
throw new Error(`OpenAI API error: ${errorMessage}`);
}
// This should never be reached, but just in case
throw new Error('Unknown error occurred during streaming API request');
}
async *streamGenerator(stream) {
// Reset the accumulator for each new stream
this.streamingToolCalls.clear();
for await (const chunk of stream) {
yield this.convertStreamChunkToGeminiFormat(chunk);
}
}
/**
* Combine streaming responses for logging purposes
*/
combineStreamResponsesForLogging(responses) {
if (responses.length === 0) {
return new GenerateContentResponse();
}
const lastResponse = responses[responses.length - 1];
// Find the last response with usage metadata
const finalUsageMetadata = responses
.slice()
.reverse()
.find((r) => r.usageMetadata)?.usageMetadata;
// Combine all text content from the stream
const combinedParts = [];
let combinedText = '';
const functionCalls = [];
for (const response of responses) {
if (response.candidates?.[0]?.content?.parts) {
for (const part of response.candidates[0].content.parts) {
if ('text' in part && part.text) {
combinedText += part.text;
}
else if ('functionCall' in part && part.functionCall) {
functionCalls.push(part);
}
}
}
}
// Add combined text if any
if (combinedText) {
combinedParts.push({ text: combinedText });
}
// Add function calls
combinedParts.push(...functionCalls);
// Create combined response
const combinedResponse = new GenerateContentResponse();
combinedResponse.candidates = [
{
content: {
parts: combinedParts,
role: 'model',
},
finishReason: responses[responses.length - 1]?.candidates?.[0]?.finishReason ||
FinishReason.FINISH_REASON_UNSPECIFIED,
index: 0,
safetyRatings: [],
},
];
combinedResponse.responseId = lastResponse?.responseId;
combinedResponse.createTime = lastResponse?.createTime;
combinedResponse.modelVersion = this.model;
combinedResponse.promptFeedback = { safetyRatings: [] };
combinedResponse.usageMetadata = finalUsageMetadata;
return combinedResponse;
}
async countTokens(request) {
// Use tiktoken for accurate token counting
const content = JSON.stringify(request.contents);
let totalTokens = 0;
try {
const { get_encoding } = await import('tiktoken');
const encoding = get_encoding('cl100k_base'); // GPT-4 encoding, but estimate for qwen
totalTokens = encoding.encode(content).length;
encoding.free();
}
catch (error) {
console.warn('Failed to load tiktoken, falling back to character approximation:', error);
// Fallback: rough approximation using character count
totalTokens = Math.ceil(content.length / 4); // Rough estimate: 1 token ≈ 4 characters
}
return {
totalTokens,
};
}
async embedContent(request) {
// Extract text from contents
let text = '';
if (Array.isArray(request.contents)) {
text = request.contents
.map((content) => {
if (typeof content === 'string')
return content;
if ('parts' in content && content.parts) {
return content.parts
.map((part) => typeof part === 'string'
? part
: 'text' in part
? part.text || ''
: '')
.join(' ');
}
return '';
})
.join(' ');
}
else if (request.contents) {
if (typeof request.contents === 'string') {
text = request.contents;
}
else if ('parts' in request.contents && request.contents.parts) {
text = request.contents.parts
.map((part) => typeof part === 'string' ? part : 'text' in part ? part.text : '')
.join(' ');
}
}
try {
const embedding = await this.client.embeddings.create({
model: 'text-embedding-ada-002', // Default embedding model
input: text,
});
return {
embeddings: [
{
values: embedding.data[0].embedding,
},
],
};
}
catch (error) {
console.error('OpenAI API Embedding Error:', error);
throw new Error(`OpenAI API error: ${error instanceof Error ? error.message : String(error)}`);
}
}
convertGeminiParametersToOpenAI(parameters) {
if (!parameters || typeof parameters !== 'object') {
return parameters;
}
const converted = JSON.parse(JSON.stringify(parameters));
const convertTypes = (obj) => {
if (typeof obj !== 'object' || obj === null) {
return obj;
}
if (Array.isArray(obj)) {
return obj.map(convertTypes);
}
const result = {};
for (const [key, value] of Object.entries(obj)) {
if (key === 'type' && typeof value === 'string') {
// Convert Gemini types to OpenAI JSON Schema types
const lowerValue = value.toLowerCase();
if (lowerValue === 'integer') {
result[key] = 'integer';
}
else if (lowerValue === 'number') {
result[key] = 'number';
}
else {
result[key] = lowerValue;
}
}
else if (key === 'minimum' ||
key === 'maximum' ||
key === 'multipleOf') {
// Ensure numeric constraints are actual numbers, not strings
if (typeof value === 'string' && !isNaN(Number(value))) {
result[key] = Number(value);
}
else {
result[key] = value;
}
}
else if (key === 'minLength' ||
key === 'maxLength' ||
key === 'minItems' ||
key === 'maxItems') {
// Ensure length constraints are integers, not strings
if (typeof value === 'string' && !isNaN(Number(value))) {
result[key] = parseInt(value, 10);
}
else {
result[key] = value;
}
}
else if (typeof value === 'object') {
result[key] = convertTypes(value);
}
else {
result[key] = value;
}
}
return result;
};
return convertTypes(converted);
}
async convertGeminiToolsToOpenAI(geminiTools) {
const openAITools = [];
for (const tool of geminiTools) {
let actualTool;
// Handle CallableTool vs Tool
if ('tool' in tool) {
// This is a CallableTool
actualTool = await tool.tool();
}
else {
// This is already a Tool
actualTool = tool;
}
if (actualTool.functionDeclarations) {
for (const func of actualTool.functionDeclarations) {
if (func.name && func.description) {
openAITools.push({
type: 'function',
function: {
name: func.name,
description: func.description,
parameters: this.convertGeminiParametersToOpenAI((func.parameters || {})),
},
});
}
}
}
}
// console.log(
// 'OpenAI Tools Parameters:',
// JSON.stringify(openAITools, null, 2),
// );
return openAITools;
}
convertToOpenAIFormat(request) {
const messages = [];
// Handle system instruction from config
if (request.config?.systemInstruction) {
const systemInstruction = request.config.systemInstruction;
let systemText = '';
if (Array.isArray(systemInstruction)) {
systemText = systemInstruction
.map((content) => {
if (typeof content === 'string')
return content;
if ('parts' in content) {
const contentObj = content;
return (contentObj.parts
?.map((p) => typeof p === 'string' ? p : 'text' in p ? p.text : '')
.join('\n') || '');
}
return '';
})
.join('\n');
}
else if (typeof systemInstruction === 'string') {
systemText = systemInstruction;
}
else if (typeof systemInstruction === 'object' &&
'parts' in systemInstruction) {
const systemContent = systemInstruction;
systemText =
systemContent.parts
?.map((p) => typeof p === 'string' ? p : 'text' in p ? p.text : '')
.join('\n') || '';
}
if (systemText) {
messages.push({
role: 'system',
content: systemText,
});
}
}
// Handle contents
if (Array.isArray(request.contents)) {
for (const content of request.contents) {
if (typeof content === 'string') {
messages.push({ role: 'user', content });
}
else if ('role' in content && 'parts' in content) {
// Check if this content has function calls or responses
const functionCalls = [];
const functionResponses = [];
const textParts = [];
for (const part of content.parts || []) {
if (typeof part === 'string') {
textParts.push(part);
}
else if ('text' in part && part.text) {
textParts.push(part.text);
}
else if ('functionCall' in part && part.functionCall) {
functionCalls.push(part.functionCall);
}
else if ('functionResponse' in part && part.functionResponse) {
functionResponses.push(part.functionResponse);
}
}
// Handle function responses (tool results)
if (functionResponses.length > 0) {
for (const funcResponse of functionResponses) {
messages.push({
role: 'tool',
tool_call_id: funcResponse.id || '',
content: typeof funcResponse.response === 'string'
? funcResponse.response
: JSON.stringify(funcResponse.response),
});
}
}
// Handle model messages with function calls
else if (content.role === 'model' && functionCalls.length > 0) {
const toolCalls = functionCalls.map((fc, index) => ({
id: fc.id || `call_${index}`,
type: 'function',
function: {
name: fc.name || '',
arguments: JSON.stringify(fc.args || {}),
},
}));
messages.push({
role: 'assistant',
content: textParts.join('\n') || null,
tool_calls: toolCalls,
});
}
// Handle regular text messages
else {
const role = content.role === 'model'
? 'assistant'
: 'user';
const text = textParts.join('\n');
if (text) {
messages.push({ role, content: text });
}
}
}
}
}
else if (request.contents) {
if (typeof request.contents === 'string') {
messages.push({ role: 'user', content: request.contents });
}
else if ('role' in request.contents && 'parts' in request.contents) {
const content = request.contents;
const role = content.role === 'model' ? 'assistant' : 'user';
const text = content.parts
?.map((p) => typeof p === 'string' ? p : 'text' in p ? p.text : '')
.join('\n') || '';
messages.push({ role, content: text });
}
}
// Clean up orphaned tool calls and merge consecutive assistant messages
const cleanedMessages = this.cleanOrphanedToolCalls(messages);
return this.mergeConsecutiveAssistantMessages(cleanedMessages);
}
/**
* Clean up orphaned tool calls from message history to prevent OpenAI API errors
*/
cleanOrphanedToolCalls(messages) {
const cleaned = [];
const toolCallIds = new Set();
const toolResponseIds = new Set();
// First pass: collect all tool call IDs and tool response IDs
for (const message of messages) {
if (message.role === 'assistant' &&
'tool_calls' in message &&
message.tool_calls) {
for (const toolCall of message.tool_calls) {
if (toolCall.id) {
toolCallIds.add(toolCall.id);
}
}
}
else if (message.role === 'tool' &&
'tool_call_id' in message &&
message.tool_call_id) {
toolResponseIds.add(message.tool_call_id);
}
}
// Second pass: filter out orphaned messages
for (const message of messages) {
if (message.role === 'assistant' &&
'tool_calls' in message &&
message.tool_calls) {
// Filter out tool calls that don't have corresponding responses
const validToolCalls = message.tool_calls.filter((toolCall) => toolCall.id && toolResponseIds.has(toolCall.id));
if (validToolCalls.length > 0) {
// Keep the message but only with valid tool calls
const cleanedMessage = { ...message };
cleanedMessage.tool_calls = validToolCalls;
cleaned.push(cleanedMessage);
}
else if (typeof message.content === 'string' &&
message.content.trim()) {
// Keep the message if it has text content, but remove tool calls
const cleanedMessage = { ...message };
delete cleanedMessage.tool_calls;
cleaned.push(cleanedMessage);
}
// If no valid tool calls and no content, skip the message entirely
}
else if (message.role === 'tool' &&
'tool_call_id' in message &&
message.tool_call_id) {
// Only keep tool responses that have corresponding tool calls
if (toolCallIds.has(message.tool_call_id)) {
cleaned.push(message);
}
}
else {
// Keep all other messages as-is
cleaned.push(message);
}
}
// Final validation: ensure every assistant message with tool_calls has corresponding tool responses
const finalCleaned = [];
const finalToolCallIds = new Set();
// Collect all remaining tool call IDs
for (const message of cleaned) {
if (message.role === 'assistant' &&
'tool_calls' in message &&
message.tool_calls) {
for (const toolCall of message.tool_calls) {
if (toolCall.id) {
finalToolCallIds.add(toolCall.id);
}
}
}
}
// Verify all tool calls have responses
const finalToolResponseIds = new Set();
for (const message of cleaned) {
if (message.role === 'tool' &&
'tool_call_id' in message &&
message.tool_call_id) {
finalToolResponseIds.add(message.tool_call_id);
}
}
// Remove any remaining orphaned tool calls
for (const message of cleaned) {
if (message.role === 'assistant' &&
'tool_calls' in message &&
message.tool_calls) {
const finalValidToolCalls = message.tool_calls.filter((toolCall) => toolCall.id && finalToolResponseIds.has(toolCall.id));
if (finalValidToolCalls.length > 0) {
const cleanedMessage = { ...message };
cleanedMessage.tool_calls = finalValidToolCalls;
finalCleaned.push(cleanedMessage);
}
else if (typeof message.content === 'string' &&
message.content.trim()) {
const cleanedMessage = { ...message };
delete cleanedMessage.tool_calls;
finalCleaned.push(cleanedMessage);
}
}
else {
finalCleaned.push(message);
}
}
return finalCleaned;
}
/**
* Merge consecutive assistant messages to combine split text and tool calls
*/
mergeConsecutiveAssistantMessages(messages) {
const merged = [];
for (const message of messages) {
if (message.role === 'assistant' && merged.length > 0) {
const lastMessage = merged[merged.length - 1];
// If the last message is also an assistant message, merge them
if (lastMessage.role === 'assistant') {
// Combine content
const combinedContent = [
typeof lastMessage.content === 'string' ? lastMessage.content : '',
typeof message.content === 'string' ? message.content : '',
]
.filter(Boolean)
.join('');
// Combine tool calls
const lastToolCalls = 'tool_calls' in lastMessage ? lastMessage.tool_calls || [] : [];
const currentToolCalls = 'tool_calls' in message ? message.tool_calls || [] : [];
const combinedToolCalls = [...lastToolCalls, ...currentToolCalls];
// Update the last message with combined data
lastMessage.content = combinedContent || null;
if (combinedToolCalls.length > 0) {
lastMessage.tool_calls = combinedToolCalls;
}
continue; // Skip adding the current message since it's been merged
}
}
// Add the message as-is if no merging is needed
merged.push(message);
}
return merged;
}
convertToGeminiFormat(openaiResponse) {
const choice = openaiResponse.choices[0];
const response = new GenerateContentResponse();
const parts = [];
// Handle text content
if (choice.message.content) {
parts.push({ text: choice.message.content });
}
// Handle tool calls
if (choice.message.tool_calls) {
for (const toolCall of choice.message.tool_calls) {
if (toolCall.function) {
let args = {};
if (toolCall.function.arguments) {
try {
args = JSON.parse(toolCall.function.arguments);
}
catch (error) {
console.error('Failed to parse function arguments:', error);
args = {};
}
}
parts.push({
functionCall: {
id: toolCall.id,
name: toolCall.function.name,
args,
},
});
}
}
}
response.responseId = openaiResponse.id;
response.createTime = openaiResponse.created
? openaiResponse.created.toString()
: new Date().getTime().toString();
response.candidates = [
{
content: {
parts,
role: 'model',
},
finishReason: this.mapFinishReason(choice.finish_reason || 'stop'),
index: 0,
safetyRatings: [],
},
];
response.modelVersion = this.model;
response.promptFeedback = { safetyRatings: [] };
// Add usage metadata if available
if (openaiResponse.usage) {
const usage = openaiResponse.usage;
const promptTokens = usage.prompt_tokens || 0;
const completionTokens = usage.completion_tokens || 0;
const totalTokens = usage.total_tokens || 0;
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
// If we only have total tokens but no breakdown, estimate the split
// Typically input is ~70% and output is ~30% for most conversations
let finalPromptTokens = promptTokens;
let finalCompletionTokens = completionTokens;
if (totalTokens > 0 && promptTokens === 0 && completionTokens === 0) {
// Estimate: assume 70% input, 30% output
finalPromptTokens = Math.round(totalTokens * 0.7);
finalCompletionTokens = Math.round(totalTokens * 0.3);
}
response.usageMetadata = {
promptTokenCount: finalPromptTokens,
candidatesTokenCount: finalCompletionTokens,
totalTokenCount: totalTokens,
cachedContentTokenCount: cachedTokens,
};
}
return response;
}
convertStreamChunkToGeminiFormat(chunk) {
const choice = chunk.choices?.[0];
const response = new GenerateContentResponse();
if (choice) {
const parts = [];
// Handle text content
if (choice.delta?.content) {
parts.push({ text: choice.delta.content });
}
// Handle tool calls - only accumulate during streaming, emit when complete
if (choice.delta?.tool_calls) {
for (const toolCall of choice.delta.tool_calls) {
const index = toolCall.index ?? 0;
// Get or create the tool call accumulator for this index
let accumulatedCall = this.streamingToolCalls.get(index);
if (!accumulatedCall) {
accumulatedCall = { arguments: '' };
this.streamingToolCalls.set(index, accumulatedCall);
}
// Update accumulated data
if (toolCall.id) {
accumulatedCall.id = toolCall.id;
}
if (toolCall.function?.name) {
accumulatedCall.name = toolCall.function.name;
}
if (toolCall.function?.arguments) {
accumulatedCall.arguments += toolCall.function.arguments;
}
}
}
// Only emit function calls when streaming is complete (finish_reason is present)
if (choice.finish_reason) {
for (const [, accumulatedCall] of this.streamingToolCalls) {
// TODO: Add back id once we have a way to generate tool_call_id from the VLLM parser.
// if (accumulatedCall.id && accumulatedCall.name) {
if (accumulatedCall.name) {
let args = {};
if (accumulatedCall.arguments) {
try {
args = JSON.parse(accumulatedCall.arguments);
}
catch (error) {