lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
53 lines (46 loc) • 1.64 kB
JavaScript
/**
* Deadline-aware routing (Phase 6.3).
*
* Reads LYNKR-Deadline-Ms from the request, filters out candidate models
* whose P95 latency exceeds the deadline. If the originally-routed model
* is too slow, find a faster qualifying alternative.
*/
const { getLatencyTracker } = require('./latency-tracker');
const SAFETY_FACTOR = 1.2; // leave 20% safety margin against P95 estimates
function getDeadlineMs(req) {
if (!req) return null;
const h = req.headers || req;
const raw = h['lynkr-deadline-ms'] || h['LYNKR-Deadline-Ms'];
if (!raw) return null;
const num = Number(raw);
return Number.isFinite(num) && num > 0 ? num : null;
}
/**
* Check whether a routed model is fast enough for the deadline.
*/
function fits(provider, model, deadlineMs) {
if (!deadlineMs) return true;
const tracker = getLatencyTracker();
const p95 = tracker.getModelP95(provider, model);
if (p95 === null) return true; // unknown — assume yes
return p95 * SAFETY_FACTOR <= deadlineMs;
}
/**
* Pick the fastest model among candidates that meets the deadline.
*/
function chooseFastest(candidates, deadlineMs) {
if (!Array.isArray(candidates) || candidates.length === 0) return null;
const tracker = getLatencyTracker();
let bestP95 = Infinity;
let best = null;
for (const c of candidates) {
const p95 = tracker.getModelP95(c.provider, c.model) ?? 5000;
const eligible = !deadlineMs || p95 * SAFETY_FACTOR <= deadlineMs;
if (eligible && p95 < bestP95) {
bestP95 = p95;
best = { ...c, p95 };
}
}
return best;
}
module.exports = { getDeadlineMs, fits, chooseFastest, SAFETY_FACTOR };