UNPKG

lynkr

Version:

Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.

72 lines (65 loc) 1.94 kB
/** * Context-window validation for routed models. * * After tier selection, verifies the chosen model can hold the estimated * input tokens (plus response headroom). When it can't, callers should * escalate to a context-capable model. * * Phase 1.3 of the routing overhaul. * * @module routing/context-validator */ const logger = require('../logger'); const { getModelRegistrySync } = require('./model-registry'); /** Fraction of the context window reserved for the prompt (rest left for response). */ const HEADROOM_FRACTION = 0.85; function getContextLimit(model) { if (!model) return null; try { const registry = getModelRegistrySync(); const cost = registry.getCost(model); return cost?.context || null; } catch (err) { return null; } } /** * Quick yes/no fit check. * Unknown context windows return true (assume fits — we don't have data to reject). */ function fits(model, estimatedTokens, fraction = HEADROOM_FRACTION) { const ctx = getContextLimit(model); if (!ctx) return true; return estimatedTokens <= ctx * fraction; } /** * Detailed validation result. * @returns {{ ok: boolean, context: number|null, required: number, limit: number|null, reason?: string }} */ function validate(model, estimatedTokens) { const ctx = getContextLimit(model); if (!ctx) { return { ok: true, reason: 'unknown_context', context: null, required: estimatedTokens, limit: null, }; } const limit = Math.floor(ctx * HEADROOM_FRACTION); if (estimatedTokens <= limit) { return { ok: true, context: ctx, required: estimatedTokens, limit }; } logger.debug( { model, context: ctx, required: estimatedTokens, limit }, '[ContextValidator] Estimated tokens exceed model context' ); return { ok: false, context: ctx, required: estimatedTokens, limit }; } module.exports = { validate, fits, getContextLimit, HEADROOM_FRACTION, };