UNPKG

lynkr

Version:

Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.

461 lines (413 loc) 13.5 kB
/** * Model Tier Selector * Maps complexity scores to appropriate models per provider * Uses config/model-tiers.json for tier preferences */ const fs = require('fs'); const path = require('path'); const logger = require('../logger'); const config = require('../config'); // Load tier config const TIER_CONFIG_PATH = path.join(__dirname, '../../config/model-tiers.json'); // Phase 1.4: calibrated thresholds (written by scripts/calibrate-thresholds.js) const CALIBRATED_PATH = path.join(__dirname, '../../data/calibrated-thresholds.json'); // Tier definitions with complexity ranges (defaults; may be overridden by calibration) const TIER_DEFINITIONS = { SIMPLE: { description: 'Greetings, simple Q&A, confirmations', range: [0, 25], priority: 1, }, MEDIUM: { description: 'Code reading, simple edits, research', range: [26, 50], priority: 2, }, COMPLEX: { description: 'Multi-file changes, debugging, architecture', range: [51, 75], priority: 3, }, REASONING: { description: 'Complex analysis, security audits, novel problems', range: [76, 100], priority: 4, }, }; class ModelTierSelector { constructor() { this.tierConfig = null; this.localProviders = {}; this.providerAliases = {}; /** Per-tier ranges, possibly overridden by calibration. */ this.ranges = null; this._loadConfig(); this._loadCalibrated(); } /** * Load tier configuration from JSON file */ _loadConfig() { try { if (fs.existsSync(TIER_CONFIG_PATH)) { const data = JSON.parse(fs.readFileSync(TIER_CONFIG_PATH, 'utf8')); this.tierConfig = data.tiers || {}; this.localProviders = data.localProviders || {}; this.providerAliases = data.providerAliases || {}; logger.debug({ tiers: Object.keys(this.tierConfig) }, '[ModelTiers] Config loaded'); } else { logger.warn('[ModelTiers] Config file not found, using defaults'); this._loadDefaults(); } } catch (err) { logger.warn({ err: err.message }, '[ModelTiers] Config load failed, using defaults'); this._loadDefaults(); } } /** * Phase 1.4: load calibrated tier thresholds if the nightly job has produced them. * Falls back silently to TIER_DEFINITIONS when absent or malformed. */ _loadCalibrated() { this.ranges = this._defaultRanges(); try { if (!fs.existsSync(CALIBRATED_PATH)) return; const data = JSON.parse(fs.readFileSync(CALIBRATED_PATH, 'utf8')); if (!data?.ranges) return; const calibrated = {}; for (const tier of Object.keys(TIER_DEFINITIONS)) { const r = data.ranges[tier]; if (Array.isArray(r) && r.length === 2 && r[0] <= r[1]) { calibrated[tier] = r; } else { calibrated[tier] = TIER_DEFINITIONS[tier].range; } } this.ranges = calibrated; logger.info({ ranges: this.ranges, calibratedAt: data.calibratedAt }, '[ModelTiers] Using calibrated thresholds'); } catch (err) { logger.debug({ err: err.message }, '[ModelTiers] Calibrated thresholds load failed; using defaults'); } } _defaultRanges() { const ranges = {}; for (const [tier, def] of Object.entries(TIER_DEFINITIONS)) { ranges[tier] = def.range.slice(); } return ranges; } /** * Load default tier config */ _loadDefaults() { this.tierConfig = { SIMPLE: { preferred: { ollama: ['llama3.2'], openai: ['gpt-4o-mini'] } }, MEDIUM: { preferred: { openai: ['gpt-4o'], anthropic: ['claude-sonnet-4-20250514'] } }, COMPLEX: { preferred: { openai: ['o1-mini'], anthropic: ['claude-sonnet-4-20250514'] } }, REASONING: { preferred: { openai: ['o1'], anthropic: ['claude-opus-4-20250514'] } }, }; this.localProviders = { ollama: { free: true, defaultTier: 'SIMPLE' }, llamacpp: { free: true, defaultTier: 'SIMPLE' }, lmstudio: { free: true, defaultTier: 'SIMPLE' }, }; } /** * Normalize provider name using aliases */ _normalizeProvider(provider) { if (!provider) return 'openai'; const lower = provider.toLowerCase(); return this.providerAliases[lower] || lower; } /** * Get tier from complexity score. * Phase 1.4: honors calibrated ranges when present. * @param {number} complexityScore - Score from 0-100 * @returns {string} Tier name (SIMPLE, MEDIUM, COMPLEX, REASONING) */ getTier(complexityScore) { const score = Math.max(0, Math.min(100, complexityScore || 0)); const ranges = this.ranges || this._defaultRanges(); for (const tier of Object.keys(TIER_DEFINITIONS)) { const [lo, hi] = ranges[tier]; if (score >= lo && score <= hi) return tier; } return score > 75 ? 'REASONING' : 'SIMPLE'; } /** * Phase 1.3: find a model with at least `minContext` context window. * Returns null when no qualifying model is available. */ findContextCapable(minContext, preferredTier = null) { const { getModelRegistrySync } = require('./model-registry'); const registry = getModelRegistrySync(); const tierOrder = preferredTier ? [preferredTier, 'REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE'] : ['REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE']; const seen = new Set(); for (const tier of tierOrder) { if (seen.has(tier)) continue; seen.add(tier); const tierConfig = this.tierConfig[tier]; if (!tierConfig?.preferred) continue; for (const [provider, models] of Object.entries(tierConfig.preferred)) { for (const model of models) { const cost = registry.getCost(model); if (cost?.context && cost.context >= minContext) { return { provider, model, tier, context: cost.context }; } } } } return null; } /** * Find a vision-capable model at or above `preferredTier`. * Walks tier order from preferred upward; returns null when none available. */ findVisionCapable(preferredTier = null) { const { getModelRegistrySync } = require('./model-registry'); const registry = getModelRegistrySync(); const tierOrder = preferredTier ? [preferredTier, 'COMPLEX', 'REASONING', 'MEDIUM', 'SIMPLE'] : ['COMPLEX', 'REASONING', 'MEDIUM', 'SIMPLE']; const seen = new Set(); for (const t of tierOrder) { if (seen.has(t)) continue; seen.add(t); const tierConfig = this.tierConfig[t]; if (!tierConfig?.preferred) continue; for (const [provider, models] of Object.entries(tierConfig.preferred)) { for (const model of models) { const info = registry.getCost(model); if (info?.vision) return { provider, model, tier: t }; } } } return null; } /** * Get tier definition */ getTierDefinition(tier) { return TIER_DEFINITIONS[tier] || TIER_DEFINITIONS.MEDIUM; } /** * Get tier priority (1-4) */ getTierPriority(tier) { return TIER_DEFINITIONS[tier]?.priority || 2; } /** * Compare two tiers, returns positive if tier1 > tier2 */ compareTiers(tier1, tier2) { return this.getTierPriority(tier1) - this.getTierPriority(tier2); } /** * Get preferred models for a tier and provider * @param {string} tier - Tier name * @param {string} provider - Provider name * @returns {string[]} Array of model names */ getPreferredModels(tier, provider) { const normalizedProvider = this._normalizeProvider(provider); return this.tierConfig[tier]?.preferred?.[normalizedProvider] || []; } /** * Select model for tier from TIER_* env var (mandatory) * @param {string} tier - Tier name (SIMPLE, MEDIUM, COMPLEX, REASONING) * @param {string} _unused - Deprecated parameter * @returns {Object} { model, provider, source, tier } */ selectModel(tier, _unused = null) { const tierConfig = config.modelTiers?.[tier]; if (!tierConfig) { throw new Error(`TIER_${tier} not configured. Set TIER_${tier}=provider:model in .env`); } const parsed = this._parseTierConfig(tierConfig); if (!parsed) { throw new Error(`Invalid TIER_${tier} format. Expected provider:model, got: ${tierConfig}`); } return { model: parsed.model, provider: parsed.provider, source: 'env_tier', tier, }; } /** * Parse tier config string (format: provider:model) * Examples: "ollama:llama3.2", "azure-openai:gpt-5.2-chat", "openai:gpt-4o" */ _parseTierConfig(configStr) { if (!configStr || typeof configStr !== 'string') return null; const colonIndex = configStr.indexOf(':'); if (colonIndex === -1) { // No colon - treat as model name, use default provider return { provider: config.modelProvider?.type || 'openai', model: configStr.trim(), }; } const provider = configStr.substring(0, colonIndex).trim().toLowerCase(); const model = configStr.substring(colonIndex + 1).trim(); if (!provider || !model) return null; return { provider, model }; } /** * Get the model configured for a provider from .env */ _getProviderModel(provider) { switch (provider) { case 'azure-openai': case 'azureopenai': return config.azureOpenAI?.deployment || null; case 'openai': return config.openai?.model || null; case 'ollama': return config.ollama?.model || null; case 'openrouter': return config.openrouter?.model || null; case 'llamacpp': return config.llamacpp?.model || null; case 'lmstudio': return config.lmstudio?.model || null; case 'bedrock': return config.bedrock?.modelId || null; case 'zai': return config.zai?.model || null; case 'moonshot': return config.moonshot?.model || null; case 'codex': return config.codex?.model || null; case 'vertex': return config.vertex?.model || null; case 'databricks': return config.modelProvider?.defaultModel || null; default: return null; } } /** * Get provider for a specific tier (from env or fallback) */ getProviderForTier(tier) { const tierConfig = config.modelTiers?.[tier]; if (tierConfig) { const parsed = this._parseTierConfig(tierConfig); if (parsed) return parsed.provider; } return config.modelProvider?.type || 'openai'; } /** * Get fallback model if provider can't handle requested tier */ _getFallbackModel(requestedTier, provider) { const tierOrder = ['REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE']; const startIndex = tierOrder.indexOf(requestedTier); // Try lower tiers for (let i = startIndex + 1; i < tierOrder.length; i++) { const fallbackTier = tierOrder[i]; const models = this.getPreferredModels(fallbackTier, provider); if (models.length > 0) { logger.debug({ from: requestedTier, to: fallbackTier, provider, model: models[0], }, '[ModelTiers] Downgrading tier'); return { model: models[0], tier: fallbackTier }; } } return null; } /** * Check if provider can handle a specific tier */ canHandleTier(provider, tier) { const normalizedProvider = this._normalizeProvider(provider); const models = this.getPreferredModels(tier, normalizedProvider); return models.length > 0; } /** * Check if provider is local/free */ isLocalProvider(provider) { const normalizedProvider = this._normalizeProvider(provider); return this.localProviders[normalizedProvider]?.free === true; } /** * Get all providers that can handle a tier */ getProvidersForTier(tier) { const tierConfig = this.tierConfig[tier]; if (!tierConfig?.preferred) return []; return Object.keys(tierConfig.preferred); } /** * Get all tiers a provider can handle */ getTiersForProvider(provider) { const normalizedProvider = this._normalizeProvider(provider); const tiers = []; for (const tier of Object.keys(TIER_DEFINITIONS)) { if (this.canHandleTier(normalizedProvider, tier)) { tiers.push(tier); } } return tiers; } /** * Get tier stats for metrics endpoint */ getTierStats() { const stats = { tiers: {}, providers: {}, }; for (const [tier, def] of Object.entries(TIER_DEFINITIONS)) { const providers = this.getProvidersForTier(tier); stats.tiers[tier] = { ...def, providerCount: providers.length, providers: providers, }; } // Count models per provider const allProviders = new Set(); for (const tierConfig of Object.values(this.tierConfig)) { if (tierConfig.preferred) { Object.keys(tierConfig.preferred).forEach(p => allProviders.add(p)); } } for (const provider of allProviders) { stats.providers[provider] = { tiers: this.getTiersForProvider(provider), isLocal: this.isLocalProvider(provider), }; } return stats; } /** * Reload configuration (for hot reload) */ reload() { this._loadConfig(); logger.info('[ModelTiers] Configuration reloaded'); } } // Singleton instance let instance = null; function getModelTierSelector() { if (!instance) { instance = new ModelTierSelector(); } return instance; } module.exports = { ModelTierSelector, getModelTierSelector, TIER_DEFINITIONS, };