jay-code
Version:
Streamlined AI CLI orchestration engine with mathematical rigor and enterprise-grade reliability
376 lines (348 loc) • 9.98 kB
text/typescript
/**
* Utility functions for multi-LLM provider system
*/
import { ILogger } from '../core/logger.js';
import { ConfigManager } from '../config/config-manager.js';
import { ProviderManager, ProviderManagerConfig } from './provider-manager.js';
import { LLMProvider, LLMProviderConfig, FallbackStrategy } from './types.js';
/**
* Create a provider manager with default configuration
*/
export function createProviderManager(
logger: ILogger,
configManager: ConfigManager,
customConfig?: Partial<ProviderManagerConfig>
): ProviderManager {
const defaultConfig = getDefaultProviderConfig();
const config = { ...defaultConfig, ...customConfig };
// Load provider configs from environment
config.providers = loadProviderConfigs(config.providers);
return new ProviderManager(logger, configManager, config);
}
/**
* Get default provider configuration
*/
export function getDefaultProviderConfig(): ProviderManagerConfig {
const defaultProvider = (process.env.DEFAULT_LLM_PROVIDER as LLMProvider) || 'anthropic';
return {
defaultProvider,
providers: {
anthropic: {
provider: 'anthropic',
apiKey: process.env.ANTHROPIC_API_KEY,
model: 'claude-3-sonnet-20240229',
temperature: 0.7,
maxTokens: 4096,
enableStreaming: true,
enableCaching: true,
timeout: 60000,
retryAttempts: 3,
},
openai: {
provider: 'openai',
apiKey: process.env.OPENAI_API_KEY,
model: 'gpt-4-turbo-preview',
temperature: 0.7,
maxTokens: 4096,
enableStreaming: true,
enableCaching: true,
timeout: 60000,
retryAttempts: 3,
},
google: {
provider: 'google',
apiKey: process.env.GOOGLE_AI_API_KEY,
model: 'gemini-pro',
temperature: 0.7,
maxTokens: 2048,
enableStreaming: true,
enableCaching: true,
timeout: 60000,
retryAttempts: 3,
},
cohere: {
provider: 'cohere',
apiKey: process.env.COHERE_API_KEY,
model: 'command',
temperature: 0.7,
maxTokens: 4096,
enableStreaming: true,
enableCaching: true,
timeout: 60000,
retryAttempts: 3,
},
ollama: {
provider: 'ollama',
apiUrl: process.env.OLLAMA_API_URL || 'http://localhost:11434',
model: 'llama-2-7b',
temperature: 0.7,
maxTokens: 2048,
enableStreaming: true,
enableCaching: false,
timeout: 120000, // Longer timeout for local models
retryAttempts: 2,
},
},
fallbackStrategy: getDefaultFallbackStrategy(),
loadBalancing: {
enabled: false,
strategy: 'round-robin',
},
costOptimization: {
enabled: true,
maxCostPerRequest: 1.0, // $1 max per request
preferredProviders: ['anthropic', 'openai'],
},
caching: {
enabled: true,
ttl: 3600, // 1 hour
maxSize: 100, // 100MB
strategy: 'lru',
},
monitoring: {
enabled: true,
metricsInterval: 60000, // 1 minute
},
};
}
/**
* Get default fallback strategy
*/
function getDefaultFallbackStrategy(): FallbackStrategy {
return {
name: 'default',
enabled: true,
maxAttempts: 3,
rules: [
{
condition: 'rate_limit',
fallbackProviders: ['openai', 'google', 'cohere', 'ollama'],
retryOriginal: true,
retryDelay: 60000, // 1 minute
},
{
condition: 'unavailable',
fallbackProviders: ['openai', 'google', 'anthropic', 'cohere'],
retryOriginal: true,
retryDelay: 30000, // 30 seconds
},
{
condition: 'timeout',
fallbackProviders: ['anthropic', 'openai', 'cohere'],
retryOriginal: false,
},
{
condition: 'cost',
fallbackProviders: ['ollama', 'cohere', 'google'],
retryOriginal: false,
},
{
condition: 'error',
errorCodes: ['AUTHENTICATION', 'MODEL_NOT_FOUND'],
fallbackProviders: [],
retryOriginal: false, // Don't retry auth errors
},
],
};
}
/**
* Load provider configurations from environment variables
*/
function loadProviderConfigs(
configs: Record<LLMProvider, LLMProviderConfig>
): Record<LLMProvider, LLMProviderConfig> {
const loaded = { ...configs };
// Override with environment variables if present
for (const [provider, config] of Object.entries(loaded)) {
const envPrefix = `${provider.toUpperCase()}_`;
// Check for provider-specific overrides
if (process.env[`${envPrefix}MODEL`]) {
config.model = process.env[`${envPrefix}MODEL`] as any;
}
if (process.env[`${envPrefix}TEMPERATURE`]) {
config.temperature = parseFloat(process.env[`${envPrefix}TEMPERATURE`]);
}
if (process.env[`${envPrefix}MAX_TOKENS`]) {
config.maxTokens = parseInt(process.env[`${envPrefix}MAX_TOKENS`], 10);
}
if (process.env[`${envPrefix}API_URL`]) {
config.apiUrl = process.env[`${envPrefix}API_URL`];
}
}
return loaded;
}
/**
* Validate provider configuration
*/
export function validateProviderConfig(config: LLMProviderConfig): string[] {
const errors: string[] = [];
if (!config.provider) {
errors.push('Provider name is required');
}
if (!config.model) {
errors.push('Model is required');
}
if (config.temperature !== undefined) {
if (config.temperature < 0 || config.temperature > 2) {
errors.push('Temperature must be between 0 and 2');
}
}
if (config.maxTokens !== undefined) {
if (config.maxTokens < 1 || config.maxTokens > 100000) {
errors.push('Max tokens must be between 1 and 100000');
}
}
if (config.topP !== undefined) {
if (config.topP < 0 || config.topP > 1) {
errors.push('Top-p must be between 0 and 1');
}
}
if (config.timeout !== undefined) {
if (config.timeout < 1000 || config.timeout > 600000) {
errors.push('Timeout must be between 1000ms and 600000ms');
}
}
return errors;
}
/**
* Get model recommendations based on use case
*/
export function getModelRecommendations(useCase: string): {
provider: LLMProvider;
model: string;
reasoning: string;
}[] {
const recommendations: Record<string, any[]> = {
'code-generation': [
{
provider: 'anthropic',
model: 'claude-3-opus-20240229',
reasoning: 'Best for complex code generation with high accuracy',
},
{
provider: 'openai',
model: 'gpt-4-turbo-preview',
reasoning: 'Excellent code generation with function calling support',
},
],
'chat': [
{
provider: 'anthropic',
model: 'claude-3-sonnet-20240229',
reasoning: 'Balanced performance for conversational AI',
},
{
provider: 'openai',
model: 'gpt-3.5-turbo',
reasoning: 'Fast and cost-effective for chat applications',
},
],
'analysis': [
{
provider: 'anthropic',
model: 'claude-3-opus-20240229',
reasoning: 'Excellent for deep analysis and reasoning',
},
{
provider: 'google',
model: 'gemini-pro',
reasoning: 'Good for data analysis with multimodal support',
},
],
'local': [
{
provider: 'ollama',
model: 'llama-2-13b',
reasoning: 'Good balance of performance and resource usage for local deployment',
},
{
provider: 'ollama',
model: 'mistral-7b',
reasoning: 'Fast local model with good performance',
},
],
'budget': [
{
provider: 'ollama',
model: 'llama-2-7b',
reasoning: 'Free local model with no API costs',
},
{
provider: 'google',
model: 'gemini-pro',
reasoning: 'Very cost-effective cloud model',
},
],
};
return recommendations[useCase] || recommendations['chat'];
}
/**
* Calculate estimated monthly cost based on usage
*/
export function estimateMonthlyCost(
provider: LLMProvider,
model: string,
estimatedRequests: number,
avgTokensPerRequest: number
): {
promptCost: number;
completionCost: number;
totalCost: number;
currency: string;
} {
// Get pricing from provider capabilities
// This is a simplified calculation
const pricing = getPricing(provider, model);
if (!pricing) {
return {
promptCost: 0,
completionCost: 0,
totalCost: 0,
currency: 'USD',
};
}
const promptTokens = avgTokensPerRequest * 0.7; // Assume 70% prompt
const completionTokens = avgTokensPerRequest * 0.3; // Assume 30% completion
const promptCost = (promptTokens * estimatedRequests / 1000) * pricing.promptCostPer1k;
const completionCost = (completionTokens * estimatedRequests / 1000) * pricing.completionCostPer1k;
return {
promptCost,
completionCost,
totalCost: promptCost + completionCost,
currency: pricing.currency,
};
}
/**
* Get pricing for a specific provider and model
*/
function getPricing(provider: LLMProvider, model: string): {
promptCostPer1k: number;
completionCostPer1k: number;
currency: string;
} | null {
// This would typically come from provider capabilities
// Simplified for example
const pricingData: Record<string, any> = {
'anthropic:claude-3-opus-20240229': {
promptCostPer1k: 0.015,
completionCostPer1k: 0.075,
currency: 'USD',
},
'openai:gpt-4-turbo-preview': {
promptCostPer1k: 0.01,
completionCostPer1k: 0.03,
currency: 'USD',
},
'google:gemini-pro': {
promptCostPer1k: 0.00025,
completionCostPer1k: 0.0005,
currency: 'USD',
},
'ollama:llama-2-7b': {
promptCostPer1k: 0,
completionCostPer1k: 0,
currency: 'USD',
},
};
return pricingData[`${provider}:${model}`] || null;
}