UNPKG

@stackmemoryai/stackmemory

Version:

Lossless, project-scoped memory for AI coding tools. Durable context across sessions with 56 MCP tools, FTS5 search, conductor orchestrator, loop/watch monitoring, snapshot capture, pre-flight overlap checks, Claude/Codex/OpenCode wrappers, Linear sync, a

532 lines (531 loc) 14.3 kB
import { fileURLToPath as __fileURLToPath } from 'url'; import { dirname as __pathDirname } from 'path'; const __filename = __fileURLToPath(import.meta.url); const __dirname = __pathDirname(__filename); import { existsSync, readFileSync } from "fs"; import { join } from "path"; import { homedir } from "os"; import { writeFileSecure, ensureSecureDir } from "../../hooks/secure-fs.js"; import { ModelRouterConfigSchema, parseConfigSafe } from "../../hooks/schemas.js"; import { isFeatureEnabled } from "../config/feature-flags.js"; import { scoreComplexity } from "./complexity-scorer.js"; import { detectSensitiveContent, isApprovedProvider } from "./sensitive-guard.js"; const MODEL_TOKEN_LIMITS = { // Claude 4.x / 4.5 / 4.6 "claude-opus-4-6": 2e5, "claude-sonnet-4-5-20250929": 2e5, "claude-haiku-4-5-20251001": 2e5, "claude-sonnet-4-20250514": 2e5, // Claude 3.x (legacy, still functional) "claude-3-5-sonnet-20241022": 2e5, "claude-3-5-haiku-20241022": 2e5, // OpenAI "gpt-4o": 128e3, "gpt-4o-mini": 128e3, "o3-mini": 2e5, "o4-mini": 2e5, // Qwen "qwen3-max-2025-01-23": 128e3, // Cerebras "llama-4-scout-17b-16e-instruct": 131072, // DeepInfra "THUDM/glm-4-9b-chat": 128e3 }; const DEFAULT_MODEL_TOKEN_LIMIT = 2e5; function getModelTokenLimit(model) { if (!model) return DEFAULT_MODEL_TOKEN_LIMIT; return MODEL_TOKEN_LIMITS[model] ?? DEFAULT_MODEL_TOKEN_LIMIT; } const CONFIG_PATH = join(homedir(), ".stackmemory", "model-router.json"); const DEFAULT_CONFIG = { enabled: false, defaultProvider: "anthropic", taskRouting: {}, fallback: { enabled: true, // Fallback enabled by default provider: "qwen", onRateLimit: true, onError: true, onTimeout: true, maxRetries: 2, retryDelayMs: 1e3 }, providers: { anthropic: { provider: "anthropic", model: "claude-sonnet-4-20250514", apiKeyEnv: "ANTHROPIC_API_KEY" }, qwen: { provider: "qwen", model: "qwen3-max-2025-01-23", baseUrl: "https://dashscope.aliyuncs.com/compatible-mode/v1", apiKeyEnv: "DASHSCOPE_API_KEY", params: { enable_thinking: true, thinking_budget: 1e4 } }, cerebras: { provider: "cerebras", model: "llama-4-scout-17b-16e-instruct", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY" }, deepinfra: { provider: "deepinfra", model: "THUDM/glm-4-9b-chat", baseUrl: "https://api.deepinfra.com/v1/openai", apiKeyEnv: "DEEPINFRA_API_KEY" }, openrouter: { provider: "openrouter", model: "meta-llama/llama-4-scout", baseUrl: "https://openrouter.ai/api", apiKeyEnv: "OPENROUTER_API_KEY" }, "anthropic-batch": { provider: "anthropic-batch", model: "claude-sonnet-4-5-20250929", apiKeyEnv: "ANTHROPIC_API_KEY" } }, thinkingMode: { enabled: true, budget: 1e4, temperature: 0.6, topP: 0.95 } }; let _configCache = null; const CONFIG_CACHE_TTL_MS = 5e3; function loadModelRouterConfig() { const now = Date.now(); if (_configCache && now < _configCache.expiresAt) { return _configCache.config; } let config; try { if (existsSync(CONFIG_PATH)) { const data = JSON.parse(readFileSync(CONFIG_PATH, "utf8")); config = parseConfigSafe( ModelRouterConfigSchema, { ...DEFAULT_CONFIG, ...data }, DEFAULT_CONFIG, "model-router" ); } else { config = { ...DEFAULT_CONFIG }; } } catch { config = { ...DEFAULT_CONFIG }; } _configCache = { config, expiresAt: now + CONFIG_CACHE_TTL_MS }; return config; } function saveModelRouterConfig(config) { try { ensureSecureDir(join(homedir(), ".stackmemory")); writeFileSecure(CONFIG_PATH, JSON.stringify(config, null, 2)); _configCache = null; } catch { } } function invalidateConfigCache() { _configCache = null; } function getModelForTask(taskType) { const config = loadModelRouterConfig(); if (!config.enabled) { return null; } const routedProvider = config.taskRouting[taskType]; const provider = routedProvider || config.defaultProvider; return config.providers[provider] || null; } function buildModelEnv(modelConfig) { const env = {}; const apiKey = process.env[modelConfig.apiKeyEnv]; if (!apiKey) { console.warn(`[model-router] API key not found: ${modelConfig.apiKeyEnv}`); return env; } env["ANTHROPIC_MODEL"] = modelConfig.model; env["ANTHROPIC_SMALL_FAST_MODEL"] = modelConfig.model; env["ANTHROPIC_AUTH_TOKEN"] = apiKey; if (modelConfig.baseUrl) { env["ANTHROPIC_BASE_URL"] = modelConfig.baseUrl; } return env; } function isPlanningContext(input) { const planPatterns = [ /\bplan\b/i, /\barchitect/i, /\bdesign\b/i, /\bstrateg/i, /\bimplement.*approach/i, /\bhow.*should.*we/i, /\bthink.*through/i, /\breason.*about/i, /\banalyze.*options/i, /\btrade-?offs?/i ]; return planPatterns.some((pattern) => pattern.test(input)); } function requiresDeepThinking(input) { const thinkPatterns = [ /\bcomplex/i, /\bdifficult/i, /\btricky/i, /\bcareful/i, /\bstep.*by.*step/i, /\bthink.*hard/i, /\bultrathink/i, /\b--think/i, /\b--think-hard/i ]; return thinkPatterns.some((pattern) => pattern.test(input)); } const OPTIMAL_ROUTING = { linting: { provider: "deepinfra", model: "THUDM/glm-4-9b-chat", apiKeyEnv: "DEEPINFRA_API_KEY", baseUrl: "https://api.deepinfra.com/v1/openai" }, context: { provider: "deepinfra", model: "THUDM/glm-4-9b-chat", apiKeyEnv: "DEEPINFRA_API_KEY", baseUrl: "https://api.deepinfra.com/v1/openai" }, code: { provider: "cerebras", model: "llama-4-scout-17b-16e-instruct", apiKeyEnv: "CEREBRAS_API_KEY", baseUrl: "https://api.cerebras.ai/v1" }, testing: { provider: "cerebras", model: "llama-4-scout-17b-16e-instruct", apiKeyEnv: "CEREBRAS_API_KEY", baseUrl: "https://api.cerebras.ai/v1" }, review: { provider: "anthropic", model: "claude-sonnet-4-5-20250929", apiKeyEnv: "ANTHROPIC_API_KEY" }, plan: { provider: "anthropic", model: "claude-sonnet-4-5-20250929", apiKeyEnv: "ANTHROPIC_API_KEY" }, think: { provider: "anthropic", model: "claude-sonnet-4-5-20250929", apiKeyEnv: "ANTHROPIC_API_KEY" } }; const FALLBACK_CHAIN = ["deepinfra", "cerebras", "anthropic"]; const CHEAP_PROVIDERS = [ { provider: "openrouter", model: "meta-llama/llama-4-scout", apiKeyEnv: "OPENROUTER_API_KEY", baseUrl: "https://openrouter.ai/api" }, { provider: "deepinfra", model: "THUDM/glm-4-9b-chat", apiKeyEnv: "DEEPINFRA_API_KEY", baseUrl: "https://api.deepinfra.com/v1/openai" }, { provider: "cerebras", model: "llama-4-scout-17b-16e-instruct", apiKeyEnv: "CEREBRAS_API_KEY", baseUrl: "https://api.cerebras.ai/v1" } ]; function getOptimalProvider(taskType, preference, complexityInput) { const defaultResult = { provider: "anthropic", model: "claude-sonnet-4-5-20250929", apiKeyEnv: "ANTHROPIC_API_KEY" }; if (!isFeatureEnabled("multiProvider")) { return defaultResult; } if (complexityInput) { const sensitiveCheck = detectSensitiveContent( complexityInput.task, complexityInput.context ); if (sensitiveCheck.sensitive) { return defaultResult; } } if (preference) { if (!isApprovedProvider(preference) && complexityInput) { const check = detectSensitiveContent( complexityInput.task, complexityInput.context ); if (check.sensitive) { return defaultResult; } } const config = loadModelRouterConfig(); const providerConfig = config.providers[preference]; if (providerConfig && process.env[providerConfig.apiKeyEnv]) { return { provider: preference, model: providerConfig.model, baseUrl: providerConfig.baseUrl, apiKeyEnv: providerConfig.apiKeyEnv }; } } if (complexityInput) { const complexity = scoreComplexity( complexityInput.task, complexityInput.context ); if (complexity.tier === "low") { const cheap = findAvailableCheapProvider(); if (cheap) return cheap; } if (complexity.tier === "high") { return defaultResult; } } const route = OPTIMAL_ROUTING[taskType]; if (route && process.env[route.apiKeyEnv]) { return { ...route }; } const fallbackConfig = loadModelRouterConfig(); for (const provider of FALLBACK_CHAIN) { const providerConfig = fallbackConfig.providers[provider]; if (providerConfig && process.env[providerConfig.apiKeyEnv]) { return { provider, model: providerConfig.model, baseUrl: providerConfig.baseUrl, apiKeyEnv: providerConfig.apiKeyEnv }; } } return defaultResult; } function findAvailableCheapProvider() { for (const p of CHEAP_PROVIDERS) { if (process.env[p.apiKeyEnv]) { return { provider: p.provider, model: p.model, apiKeyEnv: p.apiKeyEnv, baseUrl: p.baseUrl }; } } return null; } function getComplexityRoutedProvider(taskType, task, context) { const complexity = scoreComplexity(task, context); const provider = getOptimalProvider(taskType, void 0, { task, context }); return { ...provider, complexity: complexity.tier }; } class ModelRouter { config; currentProvider; inFallbackMode = false; fallbackReason; constructor() { this.config = loadModelRouterConfig(); this.currentProvider = this.config.defaultProvider; } /** * Route a task to the appropriate model */ route(taskType, input) { if (!this.config.enabled) { return { provider: "anthropic", env: {}, switched: false }; } let detectedType = taskType; if (input && taskType === "default") { if (isPlanningContext(input)) { detectedType = "plan"; } else if (requiresDeepThinking(input)) { detectedType = "think"; } } const modelConfig = getModelForTask(detectedType); if (!modelConfig) { return { provider: "anthropic", env: {}, switched: false }; } const switched = modelConfig.provider !== this.currentProvider; this.currentProvider = modelConfig.provider; return { provider: modelConfig.provider, env: buildModelEnv(modelConfig), switched }; } /** * Get current provider */ getCurrentProvider() { return this.currentProvider; } /** * Force switch to a specific provider */ switchTo(provider) { const modelConfig = this.config.providers[provider]; if (!modelConfig) { console.warn(`[model-router] Provider not configured: ${provider}`); return {}; } this.currentProvider = provider; return buildModelEnv(modelConfig); } /** * Reset to default provider */ reset() { this.currentProvider = this.config.defaultProvider; this.inFallbackMode = false; this.fallbackReason = void 0; } /** * Check if fallback is enabled and configured */ isFallbackEnabled() { if (!this.config.fallback?.enabled) return false; const fallbackProvider = this.config.providers[this.config.fallback.provider]; if (!fallbackProvider) return false; const apiKey = process.env[fallbackProvider.apiKeyEnv]; return !!apiKey; } /** * Activate fallback mode */ activateFallback(reason) { if (!this.isFallbackEnabled()) { console.warn("[model-router] Fallback not available"); return {}; } const fallbackProvider = this.config.fallback.provider; const modelConfig = this.config.providers[fallbackProvider]; if (!modelConfig) { console.warn( `[model-router] Fallback provider not configured: ${fallbackProvider}` ); return {}; } this.inFallbackMode = true; this.fallbackReason = reason; this.currentProvider = fallbackProvider; console.log( `[model-router] Fallback activated: ${reason} -> ${fallbackProvider}` ); return buildModelEnv(modelConfig); } /** * Get fallback configuration */ getFallbackConfig() { return this.config.fallback; } /** * Check if currently in fallback mode */ isInFallbackMode() { return this.inFallbackMode; } /** * Get reason for fallback */ getFallbackReason() { return this.fallbackReason; } /** * Get fallback environment variables (for pre-configuring) */ getFallbackEnv() { if (!this.isFallbackEnabled()) return {}; const fallbackProvider = this.config.fallback.provider; const modelConfig = this.config.providers[fallbackProvider]; if (!modelConfig) return {}; return buildModelEnv(modelConfig); } } let routerInstance = null; function getModelRouter() { if (!routerInstance) { routerInstance = new ModelRouter(); } return routerInstance; } function isFallbackAvailable() { const router = getModelRouter(); return router.isFallbackEnabled(); } function getFallbackStatus() { const router = getModelRouter(); const config = loadModelRouterConfig(); if (!config.fallback?.enabled) { return { enabled: false, provider: null, hasApiKey: false, inFallback: false }; } const fallbackProvider = config.providers[config.fallback.provider]; const hasApiKey = fallbackProvider ? !!process.env[fallbackProvider.apiKeyEnv] : false; return { enabled: true, provider: config.fallback.provider, hasApiKey, inFallback: router.isInFallbackMode(), reason: router.getFallbackReason() }; } function triggerFallback(reason = "manual") { const router = getModelRouter(); return router.activateFallback(reason); } function resetFallback() { const router = getModelRouter(); router.reset(); } export { DEFAULT_MODEL_TOKEN_LIMIT, MODEL_TOKEN_LIMITS, ModelRouter, buildModelEnv, getComplexityRoutedProvider, getFallbackStatus, getModelForTask, getModelRouter, getModelTokenLimit, getOptimalProvider, invalidateConfigCache, isFallbackAvailable, isPlanningContext, loadModelRouterConfig, requiresDeepThinking, resetFallback, saveModelRouterConfig, triggerFallback };